fixing doc

This commit is contained in:
Marco De Lucia 2024-04-10 23:41:58 +02:00
parent 608926ea02
commit 36f6d1df1c
4 changed files with 205 additions and 15 deletions

View File

@ -1,17 +1,17 @@
# TUG Benchmark
**This Readme is under construction**
**Refer to the =Description.pdf= in the =doc= folder**
This repository contains input data from POET simulations used in the NAAICE
project and is simulated with the latest version of tug with heterogeneous
alphas.
This repository contains input data from POET simulations used in the
NAAICE project and is simulated with the latest version of tug with
heterogeneous alphas.
The alpha values are randomly generated from a uniform distribution between 1e-7
and 1e-6 and stored in according .csv files.
Benchmarks (grids, timestep, file paths, etc) are defined in the
header file `eval/bench_defs.hpp.in`. The data used by the three
benchmarks `barite_200`, `barite_large` and `surfex` are in the
corresponding subdirectories of `eval`, as `tar.gz`. Remember to
unpack them before running the executable!
You can find the three benchmarks in the `eval` directory. There is also a
header file, which describes the benchmarks with its grid shape etc.
To write the resulting fields to a file, you can set the `BENCH_OUTPUT`
variable, e.g. `BENCH_OUTPUT=1 ./bench`. The resulting fields are written to
according .csv files.
To write the results to file, set the `BENCH_OUTPUT` environmental
variable, e.g. `BENCH_OUTPUT=1 ./bench`. The resulting fields are
written to according .csv files *in the `build` directory*.

Binary file not shown.

View File

@ -1,4 +1,4 @@
%% Time-stamp: "Last modified 2024-04-10 23:08:54 delucia"
%% Time-stamp: "Last modified 2024-04-10 23:33:46 delucia"
\documentclass[a4paper,10pt]{article}
\usepackage{listings}
@ -360,8 +360,9 @@ Absolute Percentage Error (\textbf{MAPE}) and Relative RMSE
\end{equation}
These relative measures account for discrepancies across all
magnitudes of the $y$ and $\hat{y}$ variables and preserve the
physical meaning of 0.
magnitudes of the $y$ and $\hat{y}$ variables while preserving the
physical meaning of 0. An implementation of all these metrics in R is
given in the \texttt{Metrics.R} file in this same directory.
\end{document}

189
doc/Metrics.R Normal file
View File

@ -0,0 +1,189 @@
## R code to compute the different metrics for GeoML Benchmarks, GFZ
## implementation
##
## M. De Lucia, delucia@gfz-potsdam.de
##
## Time-stamp: "Last modified 2024-04-10 23:40:08 delucia"
## Hopefully self-evident naming of the functions
## Convenience function for output of messages to stdout prepending
## name of function which called it
msg_ <- function(...) {
## check if we are called from another function or interactively
fname <- sys.call(-1)
if (is.null(fname)) {
cat(paste(..., "\n"))
} else {
prefix <- paste0("::", as.list(fname)[[1]], "::")
cat(paste(prefix, ..., "\n"))
}
invisible()
}
MAElog <- function(actual, pred) {
pred[is.na(pred)] <- 0
ind <- which(actual == 0 | pred <= 0)
if (length(ind) > 0) {
msg_("Removed ", length(ind))
mean(abs(log(pred[-ind]/actual[-ind])), na.rm=TRUE)
} else {
mean(abs(log(pred/actual)), na.rm=TRUE)
}
}
RMSElog <- function(actual, pred, eps=FALSE) {
pred[is.na(pred)] <- 0
if (length(ind <- which(actual==0 | pred<=0)) > 0) {
msg_("Removed ", length(ind))
sqrt(mean(log(pred[-ind]/actual[-ind])^2, na.rm=TRUE))
} else {
sqrt(mean(log(pred/actual)^2, na.rm=TRUE))
}
}
GMAQabs <- function(actual, pred) {
pred[is.na(pred)] <- 0
ind <- which(actual==0 | pred<=0)
if (length(ind) > 0) {
msg_("Removed ", length(ind))
abs(1-exp(mean(log(abs(pred[-ind]/actual[-ind])), na.rm=TRUE)))
} else {
abs(1-exp(mean(log(abs(pred/actual)), na.rm=TRUE)))
}
}
## Workhorse function computing the "alphas" (cfr workflows/metrics)
## for relative error measures. We first compute the alphas, then take
## care of the +Inf, -Inf, NaN cases scanning the input vectors and
## substituting 0 and 1. NAs in the predictions are preserved
Alphas <- function(actual, pred) {
alphas <- 1-pred/actual
alphas[actual==0 & pred==0] <- 0
alphas[actual==0 & pred > 0] <- 1
alphas[actual==0 & pred < 0] <- -1
alphas
}
MAPE <- function(actual, pred) {
100*mean(abs(Alphas(actual, pred)), na.rm=TRUE)
}
RRMSE <- function(actual, pred) {
sqrt(mean(Alphas(actual, pred)^2, na.rm=TRUE))
}
RMSLE <- function(actual, pred) {
sqrt(mean((log((1+pred)/(1+actual))^2), na.rm=TRUE))
}
MAE <- function(actual, pred) {
mean(abs(actual-pred), na.rm=TRUE)
}
RMSE <- function(actual, pred) {
sqrt(mean((actual-pred)^2, na.rm=TRUE))
}
MSE <- function(actual, pred) {
mean((actual-pred)^2, na.rm=TRUE)
}
## Function is called "R2" but it actually computes "1-R2" (note that
## "1-rss/tss" is missing)
R2 <- function(actual, pred) {
rss <- sum((actual - pred)^2, na.rm=TRUE)
tss <- sum((pred - mean(pred, na.rm=TRUE))^2, na.rm=TRUE)
rss/tss
}
Range <- function(x) {
diff(range(x, na.rm=TRUE))
}
## 2024-03-08: fixed Range of actual instead of pred, thanks @Mary
Normsupnorm<- function(actual, pred) {
max(abs(actual - pred), na.rm=TRUE)/Range(actual)
}
## This function computes all the measures at once in the (hopefully)
## expected right order. You need to loop this function over all
## variables.
Metrics <- function(actual, pred) {
## MAE MSE RMSE 1-R2 NMAE NRMSE Normsupnorm MAElog RMSElog RMSLE GMAQabs MAPE RRMSE NNEG Range
alphas <- Alphas(actual, pred)
ran_pred <- Range(pred)
ran_actu <- Range(actual) ## 2024-03-08: same fix as in Normsupnorm
ret <- c(
MAE = MAE(actual, pred),
MSE = MSE(actual, pred),
RMSE = RMSE(actual, pred),
'1-R2' = R2(actual, pred),
NMAE = MAE(actual, pred)/ran_actu,
NRMSE = RMSE(actual, pred)/ran_actu,
Normsupnorm = Normsupnorm(actual, pred),
MAElog = MAElog(actual,pred),
RMSElog = RMSElog(actual,pred),
RMSLE = RMSLE(actual, pred),
GMAQabs = GMAQabs(actual, pred),
MAPE = MAPE(actual, pred),
RRMSE = RRMSE(actual, pred),
NNEG = NNEG(pred),
Range = ran_pred
)
ret
}
## Function applying Metrics() on all columns of two data.frames
## having the same name
DoAllMetrics <- function(vals, preds) {
## Match column names (=variables) in both data.frames
vars <- intersect(colnames(vals), colnames(preds))
msg_("Going to compute metrics for: ", paste(vars, collapse=', '))
## Some lambda calculus combined with R ugliness
as.data.frame(t(sapply(vars, function(x) Metrics(actual = vals[[x]], pred = preds[[x]]))))
}
### Store the computed metrics into a .csv file. First format all
### columns with "formatC" specifying 7 digits, and then use function
### "data.table::fwrite" with option "scipen=1" to try and use the
### scientific/exponential notation as much as possible. The empty
### spaces resulting from padding are removed too.
WriteMetrics <- function(tab, fileout) {
require(data.table)
tocsv <- data.frame(Output=rownames(tab), gsub(" ", "", sapply(tab, formatC, digits = 7)), check.names = FALSE)
data.table::fwrite(tocsv, file=fileout, scipen=1)
}
##################################################################
### ###
### Usage Example ###
### ###
##################################################################
### We start by sourcing these functions
## source("./Metrics.R")
### NOTE: I use the extension package data.table to read and write the
### .csv files.
## require(data.table)
### NOTE2: we expect named columns in the .csv - with header!
### Example to compute all metrics at once for from a file - check colnames!
## Trues <- data.table::fread("./Trues.csv", header=TRUE, data.table = FALSE)
### Load the simulation results dataset
## Preds <- data.table::fread("./MyPreds.csv", header=TRUE, data.table = FALSE)
## res <- DoAllMetrics(Val, Preds)
### Write out the metrics
## WriteMetrcs(res, file="MyMetrics.csv")