added doc/ with latex documentation
This commit is contained in:
parent
d580067f87
commit
2282c9980d
BIN
doc/Description.pdf
Normal file
BIN
doc/Description.pdf
Normal file
Binary file not shown.
372
doc/Description.tex
Normal file
372
doc/Description.tex
Normal file
@ -0,0 +1,372 @@
|
||||
%% Time-stamp: "Last modified 2024-04-10 23:08:54 delucia"
|
||||
\documentclass[a4paper,10pt]{article}
|
||||
|
||||
\usepackage{listings}
|
||||
\usepackage{amsmath}
|
||||
\usepackage{xcolor}
|
||||
\lstset{basicstyle=\ttfamily\footnotesize}
|
||||
|
||||
|
||||
\usepackage{graphicx}
|
||||
\usepackage{subcaption}
|
||||
\usepackage[normalem]{ulem}
|
||||
\usepackage{fancyvrb}
|
||||
\usepackage{fullpage}
|
||||
\usepackage{hyperref}
|
||||
|
||||
\DeclareRobustCommand*\chem[1]
|
||||
{\ensuremath{%
|
||||
{\mathcode`\-="0200\mathcode`\=="003D% no space around "-" and "="
|
||||
\ifx\f@series\testbx\mathbf{#1}\else\mathrm{#1}\fi}}}
|
||||
|
||||
\title{\texttt{TUG} ``standalone'' benchmarks \\
|
||||
(and some hopefully useful metrics)}
|
||||
|
||||
% \author{\Large Marco \\ %
|
||||
% \vspace{0.25cm} \url{delucia@gfz-potsdam.de}}
|
||||
|
||||
% \date{\today}
|
||||
\date{}
|
||||
\sloppy
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
\section{Benchmark description}
|
||||
|
||||
All benchmarks are specified and can be modified (e.g., number of
|
||||
iterations) in the \url{eval/bench_defs.hpp.in} file.
|
||||
|
||||
\subsection{\texttt{barite\_200}}
|
||||
|
||||
At a glance:
|
||||
\begin{table}[!h]
|
||||
\centering
|
||||
\begin{tabular}{|c|c|}
|
||||
\hline
|
||||
Grid & 200x200 \\ \hline
|
||||
Size & 1x1~m$^2$ \\ \hline
|
||||
Timestep & 1000~s \\ \hline
|
||||
Iterations & 50 \\ \hline
|
||||
$\alpha_x, \alpha_y$ & heter., aniso. \\\hline
|
||||
Species \# & 7 \\ \hline
|
||||
Init & homog. \\ \hline
|
||||
\end{tabular}
|
||||
\caption{Summary of parameters for the barite\_200 benchmark}
|
||||
\label{tab:b200}
|
||||
\end{table}
|
||||
|
||||
\noindent \textbf{Initial Conditions (IC):} all concentrations are
|
||||
initially homogeneous, refer to the \textbf{IC} values in table
|
||||
\ref{tab:b200val}. The actual numerical values read by the benchmark
|
||||
from file \url{eval/barite_200/barite_200_init.csv} have higher
|
||||
significant digits.
|
||||
|
||||
\noindent \textbf{Boundary conditions (BC):} the top left corner
|
||||
(first 5 element boundaries in both N and W sides) are set to constant
|
||||
value of 0.1 molal \chem{BaCl_2}. All other boundaries are closed.
|
||||
|
||||
\begin{table*}[!h]
|
||||
\centering
|
||||
\begin{tabular}{|r|r|r|r|r|r|r|r|}\hline
|
||||
& H & O & Charge & Ba & Cl & S\_6\_ & Sr \\\hline
|
||||
\textbf{IC} & 110.0124 & 55.5086 & -1.2163e-09 & 4.4553e-07 & 2.0e-12 & 6.1516e-5 & 6.1472e-5 \\\hline
|
||||
\textbf{BC} & 111.0124 & 55.5062 & -3.3370e-08 & 0.1 & 0.2 & 0 & 0 \\\hline
|
||||
\end{tabular}
|
||||
\caption{Initial and boundary values of all transported variables in
|
||||
the \texttt{barite\_200} benchmark.}
|
||||
\label{tab:b200val}
|
||||
\end{table*}
|
||||
|
||||
Spatially heterogeneous values for $\alpha_x$ and $\alpha_y$ are read
|
||||
from \url{eval/barite_200/alpha_[xy].csv}. They result from a single
|
||||
geostatistical simulation $\mathcal{F}$ of a $\mathcal{N}$(0, 1)
|
||||
variable (anisotropic spherical variogram of correlation length 5 at
|
||||
-30\textdegree and 20 at 60\textdegree, and sill 1). This
|
||||
$\mathcal{F}$ field was scaled with an approximate order of magnitude
|
||||
for $\alpha_x$ and $\alpha_y$ respectively:
|
||||
|
||||
\begin{equation*}
|
||||
\begin{cases}
|
||||
\displaystyle \alpha_x & \displaystyle = 10^{-7} + 10^{-6} \frac{\mathcal{F}-\min{(\mathcal{F})}}{\max{(\mathcal{F})}}\\
|
||||
\alpha_y & \displaystyle = 10^{-7} + 10^{-7} \frac{\mathcal{F}-\min{(\mathcal{F})}}{\max{(\mathcal{F})}}
|
||||
\end{cases}
|
||||
\end{equation*}
|
||||
|
||||
Figure~\ref{fig:b200a} displays the picture for $\alpha_x$ and
|
||||
\ref{fig:b200b} the results for Ba after the 50 iterations in the
|
||||
benchmark.
|
||||
|
||||
\begin{figure}[!htb]
|
||||
\centering
|
||||
\begin{subfigure}{0.6\textwidth}
|
||||
\includegraphics[width=\textwidth]{images/barite_200_field_alphax_crop.png}
|
||||
\caption{$\alpha_x$ field\label{fig:b200a}}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.6\textwidth}
|
||||
\includegraphics[width=\textwidth]{images/barite_200_field_Ba_crop.png}
|
||||
\caption{\chem{log_{10}Ba} after 50 iterations\label{fig:b200b}}
|
||||
\end{subfigure}
|
||||
\caption{Diffusivity field and endresult for Ba in the
|
||||
\texttt{barite\_200} benchmark\label{fig:b200}}
|
||||
\end{figure}
|
||||
|
||||
|
||||
This benchmarks runs in $\sim$11~s on 8 CPUs on my desktop.
|
||||
|
||||
\clearpage
|
||||
|
||||
\subsection{\texttt{barite\_large}}
|
||||
|
||||
Larger grid version of the \texttt{barite\_200} benchmark, this time
|
||||
with heterogeneous initial conditions, closed boundaries everywhere
|
||||
and homogeneous diffusion coefficients. The sense of this benchmark is
|
||||
to check for mass conservation.
|
||||
|
||||
At a glance:
|
||||
|
||||
\begin{table}[!h]
|
||||
\centering
|
||||
\begin{tabular}{|c|c|}
|
||||
\hline
|
||||
Grid & 1000x1000 \\ \hline
|
||||
Size & 10x10~m \\ \hline
|
||||
Timestep & 100~s \\ \hline
|
||||
Iterations & 50 \\ \hline
|
||||
$\alpha$ & homog. 1E-6 \\\hline
|
||||
Species \# & 7 \\ \hline
|
||||
Init & heter. \\ \hline
|
||||
\end{tabular}
|
||||
\caption{Summary of parameters for the \texttt{barite\_large} benchmark}
|
||||
\label{tab:blarge}
|
||||
\end{table}
|
||||
|
||||
\noindent \textbf{Boundary conditions (BC):} all boundaries are
|
||||
closed. As for initial conditions, background concentrations are set
|
||||
in the whole grid (\textbf{All} record in table~\ref{tab:blargeval}).
|
||||
1000 randomly selected grid cells (cfr figure~\ref{fig:blargea} for
|
||||
their position) are assigned initial values of the \textbf{Locations}
|
||||
record. The non-rounded values are read from file
|
||||
\url{eval/barite_large/barite_large_init.csv}.
|
||||
|
||||
|
||||
\begin{table*}[!h]
|
||||
\centering
|
||||
\begin{tabular}{|r|r|r|r|r|r|r|r|}\hline
|
||||
& H & O & Charge & Ba & Cl & S\_6\_ & Sr \\\hline
|
||||
\textbf{All} & 110.0124 & 55.50868 & -1.2163e-09 & 4.4553e-07 & 0 & 0.0006152 & 0.00061 \\\hline
|
||||
\textbf{Locations} & 111.0124 & 55.50622 & -3.0000e-07 & 1 & 2 & 0.01 & 0.001 \\\hline
|
||||
\end{tabular}
|
||||
\caption{\texttt{Barite\_large} benchmark, initial conditions: the
|
||||
whole grid has the values in the \textbf{All} record, while 1000
|
||||
cells as displayed in figure~\ref{fig:blargea} are assigned the
|
||||
values of \textbf{Locations} record. }
|
||||
\label{tab:blargeval}
|
||||
\end{table*}
|
||||
|
||||
|
||||
\begin{figure}[!htb]
|
||||
\centering
|
||||
\begin{subfigure}[T]{0.6\textwidth}
|
||||
\vskip 0pt
|
||||
\includegraphics[width=\textwidth]{images/barite_large_init_locs.pdf}
|
||||
\caption{Locations of ``heterogeneities'' in initial
|
||||
conditions\label{fig:blargea}}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}[T]{0.6\textwidth}
|
||||
\vskip 0pt
|
||||
\includegraphics[width=\textwidth]{images/barite_large_field_Ba_crop.png}
|
||||
\caption{\chem{log_{10}Ba} after 5 iterations\label{fig:blargeb}}
|
||||
\end{subfigure}
|
||||
\caption{Diffusivity field and endresult for Ba in the
|
||||
\texttt{barite\_200} benchmark\label{fig:blarge}}
|
||||
\end{figure}
|
||||
|
||||
This benchmark runs in $\sim$30~s on my desktop using 8 CPUs.
|
||||
|
||||
\clearpage
|
||||
|
||||
\subsection{\texttt{surfex}}
|
||||
|
||||
Homogeneous benchmark with values inspired from POET's
|
||||
\texttt{surfex}, transporting 21 species. Here we use actual
|
||||
physically true values for $\alpha$ (isotropic and homogeneous set to
|
||||
1.1E-12) and simulate a rectangular domain of 2x1 \chem{cm^2}. At a
|
||||
glance:
|
||||
|
||||
\begin{table}[!h]
|
||||
\centering
|
||||
\begin{tabular}{|c|c|}
|
||||
\hline
|
||||
Grid & 200x100 \\ \hline
|
||||
Size & 0.02x0.01~m \\ \hline
|
||||
Timestep & 3600~s (1~h) \\ \hline
|
||||
Iterations & 20 \\ \hline
|
||||
$\alpha$ & homog. 1.1E-12 \\ \hline
|
||||
Species \# & 21 \\ \hline
|
||||
Init & homog. \\ \hline
|
||||
\end{tabular}
|
||||
\caption{Summary of parameters for the \texttt{surfex} benchmark}
|
||||
\label{tab:bsurf}
|
||||
\end{table}
|
||||
|
||||
\noindent \textbf{Boundary conditions (BC):} \textbf{all} domain
|
||||
boundaries are set to constant \textbf{BC} values. \textbf{Initial
|
||||
conditions (IC):} homogeneous, cfr table~\ref{tab:bsurfval}.
|
||||
|
||||
\begin{table*}[!h]
|
||||
\centering
|
||||
\begin{tabular}{|l|r|r|}\hline
|
||||
& \textbf{IC} & \textbf{BC} \\ \hline
|
||||
H & 1.11e+02 & 120.0 \\ \hline
|
||||
O & 5.55e+01 & 55.1 \\ \hline
|
||||
Charge & -2.0e-13 & 8.0e-17 \\ \hline
|
||||
C & 2.0e-16 & 2.0e-15 \\ \hline
|
||||
CH4 & 2.0e-03 & 0.2 \\ \hline
|
||||
Ca & 2.0e-01 & 0.03 \\ \hline
|
||||
Cl & 3.0e-01 & 0.5 \\ \hline
|
||||
Fe2 & 1.4e-04 & 0.0002 \\ \hline
|
||||
Fe3 & 1.3e-09 & 2.0e-08 \\ \hline
|
||||
H0 & 6.0e-12 & 2.0e-11 \\ \hline
|
||||
K & 2.0e-03 & 1.0e-05 \\ \hline
|
||||
Mg & 1.0e-02 & 0.2 \\ \hline
|
||||
Na & 2.0e-01 & 0.3 \\ \hline
|
||||
HS2 & 5.9e-10 & 0 \\ \hline
|
||||
S2 & 8.3e-15 & 8.3e-12 \\ \hline
|
||||
S4 & 2.1e-14 & 5.1e-14 \\ \hline
|
||||
S6 & 1.6e-02 & 0.026 \\ \hline
|
||||
Sr & 4.5e-04 & 0.045 \\ \hline
|
||||
U4 & 2.5e-09 & 2.5e-08 \\ \hline
|
||||
U5 & 1.6e-10 & 1.6e-10 \\ \hline
|
||||
U6 & 2.3e-07 & 1.0e-05 \\ \hline
|
||||
\end{tabular}
|
||||
\caption{\texttt{surfex} benchmark, homogeneous initial conditions
|
||||
\textbf{IC} and boundary values \textbf{BC}}
|
||||
\label{tab:bsurfval}
|
||||
\end{table*}
|
||||
|
||||
|
||||
\begin{figure}[!htb]
|
||||
\centering
|
||||
\begin{subfigure}[T]{0.6\textwidth}
|
||||
\includegraphics[width=\textwidth]{images/surfex_field_U6.png}
|
||||
\caption{\chem{\log_{10}(U6)} after 20
|
||||
iterations\label{fig:bsurfa}}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}[T]{0.6\textwidth}
|
||||
\includegraphics[width=\textwidth]{images/surfex_field_Na.png}
|
||||
\caption{Na (linear scale) after 5 iterations\label{fig:bsurfb}}
|
||||
\end{subfigure}
|
||||
\caption{Results for U6 and Na in the \texttt{surfex}
|
||||
benchmark\label{fig:bsurf}}
|
||||
\end{figure}
|
||||
|
||||
This benchmark runs in $\sim$7~s on my desktop using 8 CPUs.
|
||||
|
||||
\clearpage
|
||||
|
||||
\section{Some hopefully useful metrics}
|
||||
|
||||
The problem with the \emph{classical}, \emph{central} measures such as
|
||||
MAE (Mean Absolute Error) or RMSE (Root Mean Square Error), even when
|
||||
scaled using a constant (e.g., the range of true variable, its mean,
|
||||
or standard deviation) is that the measure does underestimate
|
||||
discrepancies for very small values and conversely is only sensible to
|
||||
large values. In practice these measures only work if the distribution
|
||||
of the variable of interest is uniform on a small range or symmetric
|
||||
distributed, e.g., gaussian. However, many variables we deal with in
|
||||
geochemical models are however rather \emph{lognormally} distributed
|
||||
or \emph{uniformly distributed on a logarithmic scale}. In any case,
|
||||
it is common that variables we try to regress or match with models
|
||||
span many orders of magnitude, and the error measures defined above
|
||||
are biased towards large values.
|
||||
|
||||
This problem can be partially solved operating on logarithms of the
|
||||
variables. Plugging the logarithms of the true and predicted variables
|
||||
$y$ and $\hat{y}$ in the usual MAE and RMSE formula we get,
|
||||
\textbf{assuming both variables strictly larger than 0}:
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:MAElog}
|
||||
\text{MAE}_{\text{log}} = \frac{1}{N}\sum \left| \log{y_i} - \log{\hat{y_i}}\right| = \frac{1}{N}\sum \left| \log{\frac{y_i}{\hat{y_i}}}\right|
|
||||
\end{equation}
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:RMSElog}
|
||||
\text{RMSE}_{\text{log}} = \sqrt{\frac{1}{N}\sum \left( \log{y_i} - \log{\hat{y_i}}\right)^2} = \sqrt{\frac{1}{N}\sum \left( \log{\frac{y_i}{\hat{y_i}}}\right)^2}
|
||||
\end{equation}
|
||||
|
||||
It is usual to define a slightly different variant of
|
||||
\chem{RMSE_{log}}, called RMSLE (Root Mean Square Logarithmic Error)
|
||||
by adding 1 to both the predicted and the true value to avoid dividing
|
||||
by 0:
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:RMSLE}
|
||||
\text{RMSLE} = \sqrt{\frac{1}{N}\sum \left[ \log{(y_i+1)} - \log{(\hat{y_i}+1)}\right]^2} = \sqrt{\frac{1}{N}\sum \left( \log{\frac{y_i+1}{\hat{y_i}+1}}\right)^2}
|
||||
\end{equation}
|
||||
|
||||
All these measures yield 0 if $y$ and $\hat{y}$ are identical. Note
|
||||
that the \chem{MAE_{log}} ressembles a \emph{geometric mean of the
|
||||
absolute values of the quotients} of $y$ and $\hat{y}$ per
|
||||
observation (putting either $y$ or $\hat{y}$ at denominator is
|
||||
equivalent):
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:GMAQ}
|
||||
\text{Geometric Mean of Absolute Quotients} = \left(\prod
|
||||
\left|\frac{\hat{y}_{i}}{y_i}\right|\right)^{\frac {1}{N}}
|
||||
\end{equation}
|
||||
|
||||
The geometric mean of the quotients would be 1 if the two variables
|
||||
are identical. The connection with the \chem{MAE_{log}} is easy to see
|
||||
since by definition a geometric mean is the $N$-th root of the product
|
||||
of the terms:
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:5}
|
||||
\exp \left[{\frac {1}{N}}\sum\log a_{i}\right]= \left(\prod
|
||||
a_{i}\right)^{\frac {1}{N}}
|
||||
\end{equation}
|
||||
|
||||
So the \chem{MAE_{log}} is the logarithm of the actual geometric mean
|
||||
of the (absolute) logarithms of the quotients between the variables.
|
||||
Instead of the simple quotient of the true and predicted values $y$
|
||||
and $\hat{y}$ we can define a relative (\textbf{per observation})
|
||||
error $\alpha_i$ as:
|
||||
\begin{equation}
|
||||
\label{eq:relalpha}
|
||||
\alpha_i =
|
||||
\begin{cases}
|
||||
\displaystyle \frac{ y_i-\hat{y_i}}{y_i} & \text{if~} \hspace{0.1cm} y_i,\hat{y}_i \neq 0 \\
|
||||
1 & \text{if~} \hspace{0.1cm} y_i=0 \text{\hspace{0.1cm} and \hspace{0.1cm}} \hat{y}_i \neq 0 \\
|
||||
0 & \text{if~} \hspace{0.1cm} y_i=0 \text{\hspace{0.1cm} and \hspace{0.1cm}} \hat{y}_i = 0 \\
|
||||
\end{cases}
|
||||
\end{equation}
|
||||
|
||||
The same treatment of the case when a variable is 0 of course also
|
||||
applies to the above introduced relative measures. Without using
|
||||
logarithms we can then define some relative measures such as the Mean
|
||||
Absolute Percentage Error (\textbf{MAPE}) and Relative RMSE
|
||||
(\textbf{RRMSE}):
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:MAPE}
|
||||
\text{MAPE} = \frac{100\%}{N}\sum \left| \alpha_i \right|
|
||||
\end{equation}
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:RRMSE}
|
||||
\text{RRMSE} = \sqrt{\frac{1}{N}\sum \left( \alpha_i\right)^2}
|
||||
\end{equation}
|
||||
|
||||
These relative measures account for discrepancies across all
|
||||
magnitudes of the $y$ and $\hat{y}$ variables and preserve the
|
||||
physical meaning of 0.
|
||||
|
||||
\end{document}
|
||||
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: xelatex
|
||||
%%% TeX-master: t
|
||||
%%% End:
|
||||
BIN
doc/images/barite_200_field_Ba_crop.png
Normal file
BIN
doc/images/barite_200_field_Ba_crop.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 9.3 KiB |
BIN
doc/images/barite_200_field_alphax_crop.png
Normal file
BIN
doc/images/barite_200_field_alphax_crop.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 33 KiB |
BIN
doc/images/barite_large_field_Ba_crop.png
Normal file
BIN
doc/images/barite_large_field_Ba_crop.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 276 KiB |
BIN
doc/images/barite_large_init_locs.pdf
Normal file
BIN
doc/images/barite_large_init_locs.pdf
Normal file
Binary file not shown.
BIN
doc/images/surfex_field_Na.png
Normal file
BIN
doc/images/surfex_field_Na.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 9.0 KiB |
BIN
doc/images/surfex_field_U6.png
Normal file
BIN
doc/images/surfex_field_U6.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 7.2 KiB |
Loading…
x
Reference in New Issue
Block a user