naaice_tug_input/doc/Metrics.R
Marco De Lucia 36f6d1df1c fixing doc
2024-04-10 23:41:58 +02:00

190 lines
6.0 KiB
R

## R code to compute the different metrics for GeoML Benchmarks, GFZ
## implementation
##
## M. De Lucia, delucia@gfz-potsdam.de
##
## Time-stamp: "Last modified 2024-04-10 23:40:08 delucia"
## Hopefully self-evident naming of the functions
## Convenience function for output of messages to stdout prepending
## name of function which called it
msg_ <- function(...) {
## check if we are called from another function or interactively
fname <- sys.call(-1)
if (is.null(fname)) {
cat(paste(..., "\n"))
} else {
prefix <- paste0("::", as.list(fname)[[1]], "::")
cat(paste(prefix, ..., "\n"))
}
invisible()
}
MAElog <- function(actual, pred) {
pred[is.na(pred)] <- 0
ind <- which(actual == 0 | pred <= 0)
if (length(ind) > 0) {
msg_("Removed ", length(ind))
mean(abs(log(pred[-ind]/actual[-ind])), na.rm=TRUE)
} else {
mean(abs(log(pred/actual)), na.rm=TRUE)
}
}
RMSElog <- function(actual, pred, eps=FALSE) {
pred[is.na(pred)] <- 0
if (length(ind <- which(actual==0 | pred<=0)) > 0) {
msg_("Removed ", length(ind))
sqrt(mean(log(pred[-ind]/actual[-ind])^2, na.rm=TRUE))
} else {
sqrt(mean(log(pred/actual)^2, na.rm=TRUE))
}
}
GMAQabs <- function(actual, pred) {
pred[is.na(pred)] <- 0
ind <- which(actual==0 | pred<=0)
if (length(ind) > 0) {
msg_("Removed ", length(ind))
abs(1-exp(mean(log(abs(pred[-ind]/actual[-ind])), na.rm=TRUE)))
} else {
abs(1-exp(mean(log(abs(pred/actual)), na.rm=TRUE)))
}
}
## Workhorse function computing the "alphas" (cfr workflows/metrics)
## for relative error measures. We first compute the alphas, then take
## care of the +Inf, -Inf, NaN cases scanning the input vectors and
## substituting 0 and 1. NAs in the predictions are preserved
Alphas <- function(actual, pred) {
alphas <- 1-pred/actual
alphas[actual==0 & pred==0] <- 0
alphas[actual==0 & pred > 0] <- 1
alphas[actual==0 & pred < 0] <- -1
alphas
}
MAPE <- function(actual, pred) {
100*mean(abs(Alphas(actual, pred)), na.rm=TRUE)
}
RRMSE <- function(actual, pred) {
sqrt(mean(Alphas(actual, pred)^2, na.rm=TRUE))
}
RMSLE <- function(actual, pred) {
sqrt(mean((log((1+pred)/(1+actual))^2), na.rm=TRUE))
}
MAE <- function(actual, pred) {
mean(abs(actual-pred), na.rm=TRUE)
}
RMSE <- function(actual, pred) {
sqrt(mean((actual-pred)^2, na.rm=TRUE))
}
MSE <- function(actual, pred) {
mean((actual-pred)^2, na.rm=TRUE)
}
## Function is called "R2" but it actually computes "1-R2" (note that
## "1-rss/tss" is missing)
R2 <- function(actual, pred) {
rss <- sum((actual - pred)^2, na.rm=TRUE)
tss <- sum((pred - mean(pred, na.rm=TRUE))^2, na.rm=TRUE)
rss/tss
}
Range <- function(x) {
diff(range(x, na.rm=TRUE))
}
## 2024-03-08: fixed Range of actual instead of pred, thanks @Mary
Normsupnorm<- function(actual, pred) {
max(abs(actual - pred), na.rm=TRUE)/Range(actual)
}
## This function computes all the measures at once in the (hopefully)
## expected right order. You need to loop this function over all
## variables.
Metrics <- function(actual, pred) {
## MAE MSE RMSE 1-R2 NMAE NRMSE Normsupnorm MAElog RMSElog RMSLE GMAQabs MAPE RRMSE NNEG Range
alphas <- Alphas(actual, pred)
ran_pred <- Range(pred)
ran_actu <- Range(actual) ## 2024-03-08: same fix as in Normsupnorm
ret <- c(
MAE = MAE(actual, pred),
MSE = MSE(actual, pred),
RMSE = RMSE(actual, pred),
'1-R2' = R2(actual, pred),
NMAE = MAE(actual, pred)/ran_actu,
NRMSE = RMSE(actual, pred)/ran_actu,
Normsupnorm = Normsupnorm(actual, pred),
MAElog = MAElog(actual,pred),
RMSElog = RMSElog(actual,pred),
RMSLE = RMSLE(actual, pred),
GMAQabs = GMAQabs(actual, pred),
MAPE = MAPE(actual, pred),
RRMSE = RRMSE(actual, pred),
NNEG = NNEG(pred),
Range = ran_pred
)
ret
}
## Function applying Metrics() on all columns of two data.frames
## having the same name
DoAllMetrics <- function(vals, preds) {
## Match column names (=variables) in both data.frames
vars <- intersect(colnames(vals), colnames(preds))
msg_("Going to compute metrics for: ", paste(vars, collapse=', '))
## Some lambda calculus combined with R ugliness
as.data.frame(t(sapply(vars, function(x) Metrics(actual = vals[[x]], pred = preds[[x]]))))
}
### Store the computed metrics into a .csv file. First format all
### columns with "formatC" specifying 7 digits, and then use function
### "data.table::fwrite" with option "scipen=1" to try and use the
### scientific/exponential notation as much as possible. The empty
### spaces resulting from padding are removed too.
WriteMetrics <- function(tab, fileout) {
require(data.table)
tocsv <- data.frame(Output=rownames(tab), gsub(" ", "", sapply(tab, formatC, digits = 7)), check.names = FALSE)
data.table::fwrite(tocsv, file=fileout, scipen=1)
}
##################################################################
### ###
### Usage Example ###
### ###
##################################################################
### We start by sourcing these functions
## source("./Metrics.R")
### NOTE: I use the extension package data.table to read and write the
### .csv files.
## require(data.table)
### NOTE2: we expect named columns in the .csv - with header!
### Example to compute all metrics at once for from a file - check colnames!
## Trues <- data.table::fread("./Trues.csv", header=TRUE, data.table = FALSE)
### Load the simulation results dataset
## Preds <- data.table::fread("./MyPreds.csv", header=TRUE, data.table = FALSE)
## res <- DoAllMetrics(Val, Preds)
### Write out the metrics
## WriteMetrcs(res, file="MyMetrics.csv")