%% Time-stamp: "Last modified 2024-04-10 23:33:46 delucia" \documentclass[a4paper,10pt]{article} \usepackage{listings} \usepackage{amsmath} \usepackage{xcolor} \lstset{basicstyle=\ttfamily\footnotesize} \usepackage{graphicx} \usepackage{subcaption} \usepackage[normalem]{ulem} \usepackage{fancyvrb} \usepackage{fullpage} \usepackage{hyperref} \DeclareRobustCommand*\chem[1] {\ensuremath{% {\mathcode`\-="0200\mathcode`\=="003D% no space around "-" and "=" \ifx\f@series\testbx\mathbf{#1}\else\mathrm{#1}\fi}}} \title{\texttt{TUG} ``standalone'' benchmarks \\ (and some hopefully useful metrics)} % \author{\Large Marco \\ % % \vspace{0.25cm} \url{delucia@gfz-potsdam.de}} % \date{\today} \date{} \sloppy \begin{document} \maketitle \section{Benchmark description} All benchmarks are specified and can be modified (e.g., number of iterations) in the \url{eval/bench_defs.hpp.in} file. \subsection{\texttt{barite\_200}} At a glance: \begin{table}[!h] \centering \begin{tabular}{|c|c|} \hline Grid & 200x200 \\ \hline Size & 1x1~m$^2$ \\ \hline Timestep & 1000~s \\ \hline Iterations & 50 \\ \hline $\alpha_x, \alpha_y$ & heter., aniso. \\\hline Species \# & 7 \\ \hline Init & homog. \\ \hline \end{tabular} \caption{Summary of parameters for the barite\_200 benchmark} \label{tab:b200} \end{table} \noindent \textbf{Initial Conditions (IC):} all concentrations are initially homogeneous, refer to the \textbf{IC} values in table \ref{tab:b200val}. The actual numerical values read by the benchmark from file \url{eval/barite_200/barite_200_init.csv} have higher significant digits. \noindent \textbf{Boundary conditions (BC):} the top left corner (first 5 element boundaries in both N and W sides) are set to constant value of 0.1 molal \chem{BaCl_2}. All other boundaries are closed. \begin{table*}[!h] \centering \begin{tabular}{|r|r|r|r|r|r|r|r|}\hline & H & O & Charge & Ba & Cl & S\_6\_ & Sr \\\hline \textbf{IC} & 110.0124 & 55.5086 & -1.2163e-09 & 4.4553e-07 & 2.0e-12 & 6.1516e-5 & 6.1472e-5 \\\hline \textbf{BC} & 111.0124 & 55.5062 & -3.3370e-08 & 0.1 & 0.2 & 0 & 0 \\\hline \end{tabular} \caption{Initial and boundary values of all transported variables in the \texttt{barite\_200} benchmark.} \label{tab:b200val} \end{table*} Spatially heterogeneous values for $\alpha_x$ and $\alpha_y$ are read from \url{eval/barite_200/alpha_[xy].csv}. They result from a single geostatistical simulation $\mathcal{F}$ of a $\mathcal{N}$(0, 1) variable (anisotropic spherical variogram of correlation length 5 at -30\textdegree and 20 at 60\textdegree, and sill 1). This $\mathcal{F}$ field was scaled with an approximate order of magnitude for $\alpha_x$ and $\alpha_y$ respectively: \begin{equation*} \begin{cases} \displaystyle \alpha_x & \displaystyle = 10^{-7} + 10^{-6} \frac{\mathcal{F}-\min{(\mathcal{F})}}{\max{(\mathcal{F})}}\\ \alpha_y & \displaystyle = 10^{-7} + 10^{-7} \frac{\mathcal{F}-\min{(\mathcal{F})}}{\max{(\mathcal{F})}} \end{cases} \end{equation*} Figure~\ref{fig:b200a} displays the picture for $\alpha_x$ and \ref{fig:b200b} the results for Ba after the 50 iterations in the benchmark. \begin{figure}[!htb] \centering \begin{subfigure}{0.6\textwidth} \includegraphics[width=\textwidth]{images/barite_200_field_alphax_crop.png} \caption{$\alpha_x$ field\label{fig:b200a}} \end{subfigure} \begin{subfigure}{0.6\textwidth} \includegraphics[width=\textwidth]{images/barite_200_field_Ba_crop.png} \caption{\chem{log_{10}Ba} after 50 iterations\label{fig:b200b}} \end{subfigure} \caption{Diffusivity field and endresult for Ba in the \texttt{barite\_200} benchmark\label{fig:b200}} \end{figure} This benchmarks runs in $\sim$11~s on 8 CPUs on my desktop. \clearpage \subsection{\texttt{barite\_large}} Larger grid version of the \texttt{barite\_200} benchmark, this time with heterogeneous initial conditions, closed boundaries everywhere and homogeneous diffusion coefficients. The sense of this benchmark is to check for mass conservation. At a glance: \begin{table}[!h] \centering \begin{tabular}{|c|c|} \hline Grid & 1000x1000 \\ \hline Size & 10x10~m \\ \hline Timestep & 100~s \\ \hline Iterations & 50 \\ \hline $\alpha$ & homog. 1E-6 \\\hline Species \# & 7 \\ \hline Init & heter. \\ \hline \end{tabular} \caption{Summary of parameters for the \texttt{barite\_large} benchmark} \label{tab:blarge} \end{table} \noindent \textbf{Boundary conditions (BC):} all boundaries are closed. As for initial conditions, background concentrations are set in the whole grid (\textbf{All} record in table~\ref{tab:blargeval}). 1000 randomly selected grid cells (cfr figure~\ref{fig:blargea} for their position) are assigned initial values of the \textbf{Locations} record. The non-rounded values are read from file \url{eval/barite_large/barite_large_init.csv}. \begin{table*}[!h] \centering \begin{tabular}{|r|r|r|r|r|r|r|r|}\hline & H & O & Charge & Ba & Cl & S\_6\_ & Sr \\\hline \textbf{All} & 110.0124 & 55.50868 & -1.2163e-09 & 4.4553e-07 & 0 & 0.0006152 & 0.00061 \\\hline \textbf{Locations} & 111.0124 & 55.50622 & -3.0000e-07 & 1 & 2 & 0.01 & 0.001 \\\hline \end{tabular} \caption{\texttt{Barite\_large} benchmark, initial conditions: the whole grid has the values in the \textbf{All} record, while 1000 cells as displayed in figure~\ref{fig:blargea} are assigned the values of \textbf{Locations} record. } \label{tab:blargeval} \end{table*} \begin{figure}[!htb] \centering \begin{subfigure}[T]{0.6\textwidth} \vskip 0pt \includegraphics[width=\textwidth]{images/barite_large_init_locs.pdf} \caption{Locations of ``heterogeneities'' in initial conditions\label{fig:blargea}} \end{subfigure} \begin{subfigure}[T]{0.6\textwidth} \vskip 0pt \includegraphics[width=\textwidth]{images/barite_large_field_Ba_crop.png} \caption{\chem{log_{10}Ba} after 5 iterations\label{fig:blargeb}} \end{subfigure} \caption{Diffusivity field and endresult for Ba in the \texttt{barite\_200} benchmark\label{fig:blarge}} \end{figure} This benchmark runs in $\sim$30~s on my desktop using 8 CPUs. \clearpage \subsection{\texttt{surfex}} Homogeneous benchmark with values inspired from POET's \texttt{surfex}, transporting 21 species. Here we use actual physically true values for $\alpha$ (isotropic and homogeneous set to 1.1E-12) and simulate a rectangular domain of 2x1 \chem{cm^2}. At a glance: \begin{table}[!h] \centering \begin{tabular}{|c|c|} \hline Grid & 200x100 \\ \hline Size & 0.02x0.01~m \\ \hline Timestep & 3600~s (1~h) \\ \hline Iterations & 20 \\ \hline $\alpha$ & homog. 1.1E-12 \\ \hline Species \# & 21 \\ \hline Init & homog. \\ \hline \end{tabular} \caption{Summary of parameters for the \texttt{surfex} benchmark} \label{tab:bsurf} \end{table} \noindent \textbf{Boundary conditions (BC):} \textbf{all} domain boundaries are set to constant \textbf{BC} values. \textbf{Initial conditions (IC):} homogeneous, cfr table~\ref{tab:bsurfval}. \begin{table*}[!h] \centering \begin{tabular}{|l|r|r|}\hline & \textbf{IC} & \textbf{BC} \\ \hline H & 1.11e+02 & 120.0 \\ \hline O & 5.55e+01 & 55.1 \\ \hline Charge & -2.0e-13 & 8.0e-17 \\ \hline C & 2.0e-16 & 2.0e-15 \\ \hline CH4 & 2.0e-03 & 0.2 \\ \hline Ca & 2.0e-01 & 0.03 \\ \hline Cl & 3.0e-01 & 0.5 \\ \hline Fe2 & 1.4e-04 & 0.0002 \\ \hline Fe3 & 1.3e-09 & 2.0e-08 \\ \hline H0 & 6.0e-12 & 2.0e-11 \\ \hline K & 2.0e-03 & 1.0e-05 \\ \hline Mg & 1.0e-02 & 0.2 \\ \hline Na & 2.0e-01 & 0.3 \\ \hline HS2 & 5.9e-10 & 0 \\ \hline S2 & 8.3e-15 & 8.3e-12 \\ \hline S4 & 2.1e-14 & 5.1e-14 \\ \hline S6 & 1.6e-02 & 0.026 \\ \hline Sr & 4.5e-04 & 0.045 \\ \hline U4 & 2.5e-09 & 2.5e-08 \\ \hline U5 & 1.6e-10 & 1.6e-10 \\ \hline U6 & 2.3e-07 & 1.0e-05 \\ \hline \end{tabular} \caption{\texttt{surfex} benchmark, homogeneous initial conditions \textbf{IC} and boundary values \textbf{BC}} \label{tab:bsurfval} \end{table*} \begin{figure}[!htb] \centering \begin{subfigure}[T]{0.6\textwidth} \includegraphics[width=\textwidth]{images/surfex_field_U6.png} \caption{\chem{\log_{10}(U6)} after 20 iterations\label{fig:bsurfa}} \end{subfigure} \begin{subfigure}[T]{0.6\textwidth} \includegraphics[width=\textwidth]{images/surfex_field_Na.png} \caption{Na (linear scale) after 5 iterations\label{fig:bsurfb}} \end{subfigure} \caption{Results for U6 and Na in the \texttt{surfex} benchmark\label{fig:bsurf}} \end{figure} This benchmark runs in $\sim$7~s on my desktop using 8 CPUs. \clearpage \section{Some hopefully useful metrics} The problem with the \emph{classical}, \emph{central} measures such as MAE (Mean Absolute Error) or RMSE (Root Mean Square Error), even when scaled using a constant (e.g., the range of true variable, its mean, or standard deviation) is that the measure does underestimate discrepancies for very small values and conversely is only sensible to large values. In practice these measures only work if the distribution of the variable of interest is uniform on a small range or symmetric distributed, e.g., gaussian. However, many variables we deal with in geochemical models are however rather \emph{lognormally} distributed or \emph{uniformly distributed on a logarithmic scale}. In any case, it is common that variables we try to regress or match with models span many orders of magnitude, and the error measures defined above are biased towards large values. This problem can be partially solved operating on logarithms of the variables. Plugging the logarithms of the true and predicted variables $y$ and $\hat{y}$ in the usual MAE and RMSE formula we get, \textbf{assuming both variables strictly larger than 0}: \begin{equation} \label{eq:MAElog} \text{MAE}_{\text{log}} = \frac{1}{N}\sum \left| \log{y_i} - \log{\hat{y_i}}\right| = \frac{1}{N}\sum \left| \log{\frac{y_i}{\hat{y_i}}}\right| \end{equation} \begin{equation} \label{eq:RMSElog} \text{RMSE}_{\text{log}} = \sqrt{\frac{1}{N}\sum \left( \log{y_i} - \log{\hat{y_i}}\right)^2} = \sqrt{\frac{1}{N}\sum \left( \log{\frac{y_i}{\hat{y_i}}}\right)^2} \end{equation} It is usual to define a slightly different variant of \chem{RMSE_{log}}, called RMSLE (Root Mean Square Logarithmic Error) by adding 1 to both the predicted and the true value to avoid dividing by 0: \begin{equation} \label{eq:RMSLE} \text{RMSLE} = \sqrt{\frac{1}{N}\sum \left[ \log{(y_i+1)} - \log{(\hat{y_i}+1)}\right]^2} = \sqrt{\frac{1}{N}\sum \left( \log{\frac{y_i+1}{\hat{y_i}+1}}\right)^2} \end{equation} All these measures yield 0 if $y$ and $\hat{y}$ are identical. Note that the \chem{MAE_{log}} ressembles a \emph{geometric mean of the absolute values of the quotients} of $y$ and $\hat{y}$ per observation (putting either $y$ or $\hat{y}$ at denominator is equivalent): \begin{equation} \label{eq:GMAQ} \text{Geometric Mean of Absolute Quotients} = \left(\prod \left|\frac{\hat{y}_{i}}{y_i}\right|\right)^{\frac {1}{N}} \end{equation} The geometric mean of the quotients would be 1 if the two variables are identical. The connection with the \chem{MAE_{log}} is easy to see since by definition a geometric mean is the $N$-th root of the product of the terms: \begin{equation} \label{eq:5} \exp \left[{\frac {1}{N}}\sum\log a_{i}\right]= \left(\prod a_{i}\right)^{\frac {1}{N}} \end{equation} So the \chem{MAE_{log}} is the logarithm of the actual geometric mean of the (absolute) logarithms of the quotients between the variables. Instead of the simple quotient of the true and predicted values $y$ and $\hat{y}$ we can define a relative (\textbf{per observation}) error $\alpha_i$ as: \begin{equation} \label{eq:relalpha} \alpha_i = \begin{cases} \displaystyle \frac{ y_i-\hat{y_i}}{y_i} & \text{if~} \hspace{0.1cm} y_i,\hat{y}_i \neq 0 \\ 1 & \text{if~} \hspace{0.1cm} y_i=0 \text{\hspace{0.1cm} and \hspace{0.1cm}} \hat{y}_i \neq 0 \\ 0 & \text{if~} \hspace{0.1cm} y_i=0 \text{\hspace{0.1cm} and \hspace{0.1cm}} \hat{y}_i = 0 \\ \end{cases} \end{equation} The same treatment of the case when a variable is 0 of course also applies to the above introduced relative measures. Without using logarithms we can then define some relative measures such as the Mean Absolute Percentage Error (\textbf{MAPE}) and Relative RMSE (\textbf{RRMSE}): \begin{equation} \label{eq:MAPE} \text{MAPE} = \frac{100\%}{N}\sum \left| \alpha_i \right| \end{equation} \begin{equation} \label{eq:RRMSE} \text{RRMSE} = \sqrt{\frac{1}{N}\sum \left( \alpha_i\right)^2} \end{equation} These relative measures account for discrepancies across all magnitudes of the $y$ and $\hat{y}$ variables while preserving the physical meaning of 0. An implementation of all these metrics in R is given in the \texttt{Metrics.R} file in this same directory. \end{document} %%% Local Variables: %%% mode: xelatex %%% TeX-master: t %%% End: