210 lines
6.6 KiB
Julia
210 lines
6.6 KiB
Julia
using DataFrames, RCall, Statistics, Plots, Base.Threads, CSV, CategoricalArrays
|
|
|
|
prefix = "dolo_inter"
|
|
input_dir = "./"
|
|
|
|
output_dir = "error_eval/"
|
|
|
|
if !isdir(output_dir)
|
|
mkdir(output_dir)
|
|
end
|
|
|
|
relevant_header = ["H", "O", "C", "Ca", "Cl", "Mg", "Calcite", "Dolomite"]
|
|
@rput relevant_header
|
|
|
|
function alphas(obs::AbstractVector{<:Real}, pred::AbstractVector{<:Real})
|
|
# Workhorse function computing the "alphas" (cfr workflows/metrics)
|
|
# for relative error measures. We first compute the alphas, then take
|
|
# care of the +Inf, -Inf, NaN cases scanning the input vectors and
|
|
# substituting 0 and 1. NAs in the predictions are preserved
|
|
|
|
# create vector of length obs
|
|
alphas_result = zeros(length(obs))
|
|
|
|
for i in eachindex(obs, pred, alphas_result)
|
|
if isnan(obs[i]) && isnan(pred[i])
|
|
# Preserve NaN in pred
|
|
alphas_result[i] = NaN
|
|
elseif obs[i] == 0 && pred[i] == 0
|
|
alphas_result[i] = 0
|
|
elseif obs[i] == 0 && pred[i] > 0
|
|
alphas_result[i] = 1
|
|
elseif obs[i] == 0 && pred[i] < 0
|
|
alphas_result[i] = -1
|
|
else
|
|
alphas_result[i] = 1 - pred[i] / obs[i]
|
|
end
|
|
end
|
|
|
|
return alphas_result
|
|
end
|
|
|
|
function MAPE(obs::AbstractVector{<:Real}, pred::AbstractVector{<:Real})
|
|
# Compute the Mean Absolute Percentage Error (MAPE) between obs and pred
|
|
# MAPE is defined as the mean of the absolute values of the alphas
|
|
# (relative errors) times 100
|
|
|
|
alphas_result = alphas(obs, pred)
|
|
|
|
return 100 * mean(abs.(alphas_result[.!isnan.(alphas_result)]))
|
|
end
|
|
|
|
function RRMSE(obs::AbstractVector{<:Real}, pred::AbstractVector{<:Real})
|
|
# Compute the Relative Root Mean Squared Error (RRMSE) between obs and pred
|
|
# RRMSE is defined as the RMSE divided by the maximum of the observations
|
|
|
|
alphas_result = alphas(obs, pred)
|
|
|
|
return sqrt(mean(alphas_result[.!isnan.(alphas_result)] .^ 2))
|
|
end
|
|
|
|
|
|
function apply_functions_colwise(obs::DataFrame, interp::DataFrame)::DataFrame
|
|
res = DataFrame()
|
|
|
|
# Loop over all columns
|
|
for spec in relevant_header
|
|
real = obs[:, spec]
|
|
pred = interp[:, spec]
|
|
|
|
# Compute RRMSE and MAPE
|
|
rrmse = RRMSE(real, pred)
|
|
mape = MAPE(real, pred)
|
|
|
|
# Append the results to the new DataFrame
|
|
colname = string(spec, "_RRMSE")
|
|
res[:, colname] = [rrmse]
|
|
|
|
colname = string(spec, "_MAPE")
|
|
res[:, colname] = [mape]
|
|
end
|
|
|
|
return res
|
|
end
|
|
|
|
function get_all_qs_files(dir::String)::Vector{String}
|
|
files = readdir(dir, join=true)
|
|
files = filter(x -> occursin(r"iter_\d+.qs2", x), files)
|
|
return files[2:end]
|
|
end
|
|
|
|
function get_chemistry_data(files::Vector{String})::Dict{Int,DataFrame}
|
|
res = nothing
|
|
|
|
basenames = basename.(files)
|
|
basenames = replace.(basenames, ".qs2" => "")
|
|
its = split.(basenames, "_")
|
|
its = last.(its)
|
|
|
|
iterations = parse.(Int, its)
|
|
|
|
@rput files
|
|
R"res <- lapply(files, function(x) { dat <- qs2::qs_read(x)$C ; dat <- dat[,relevant_header] ; return(dat) })"
|
|
@rget res
|
|
|
|
data = Dict(zip(iterations, res))
|
|
|
|
return data
|
|
end
|
|
|
|
reference_dir = "$input_dir/dolo_inter_ref_128_1/"
|
|
reference_files = get_all_qs_files(reference_dir)
|
|
reference_data = get_chemistry_data(reference_files)
|
|
|
|
result_dirs = readdir(input_dir)
|
|
|
|
# testdatadir = result_dirs[1]
|
|
#
|
|
# metainfo = split(testdatadir, "_")
|
|
#
|
|
# aqueous = parse(Int, metainfo[3])
|
|
# minerals = parse(Int, metainfo[4])
|
|
# ntpn = parse(Int, metainfo[5])
|
|
#
|
|
# files = get_all_qs_files(joinpath(input_dir, testdatadir))
|
|
# data = get_chemistry_data(files)
|
|
#
|
|
# apply_functions_colwise(reference_data[1], data[1])
|
|
|
|
df_global_data = DataFrame()
|
|
|
|
for dir in result_dirs
|
|
# continue if dir contains "$prefix_ref"
|
|
if occursin("$(prefix)_ref", dir) || !occursin(prefix, dir)
|
|
continue
|
|
end
|
|
|
|
metainfo = split(dir, "_")
|
|
|
|
has_important = length(metainfo) == 7
|
|
|
|
aqueous = parse(Int, metainfo[3])
|
|
minerals = parse(Int, metainfo[4])
|
|
ntpn = parse(Int, metainfo[5])
|
|
|
|
files = get_all_qs_files(joinpath(input_dir, dir))
|
|
data = get_chemistry_data(files)
|
|
|
|
for (iter, val) in data
|
|
realval = reference_data[iter]
|
|
|
|
df_local = apply_functions_colwise(realval, val)
|
|
df_local[:, "Aqueous"] .= aqueous
|
|
df_local[:, "Minerals"] .= minerals
|
|
df_local[:, "iter"] .= iter
|
|
|
|
global df_global_data = vcat(df_global_data, df_local)
|
|
end
|
|
end
|
|
|
|
function plot_by_iteration_and_rounding(df_in::DataFrame, species::String, errtype::String)
|
|
my_symbol = Symbol(species * "_" * errtype)
|
|
|
|
df = select(df_in, [:AM, :iter, my_symbol])
|
|
|
|
sort!(df, :iter)
|
|
|
|
max_iter = maximum(df.iter)
|
|
only_last_iter = filter(row -> row.iter == max_iter, df)
|
|
sort!(only_last_iter, my_symbol, rev=true)
|
|
df[!, :AM] = CategoricalArray(df[!, :AM])
|
|
levels!(df[!, :AM], only_last_iter[!, :AM])
|
|
|
|
AM_CA = CategoricalArray(only_last_iter.AM)
|
|
levels!(AM_CA, only_last_iter.AM)
|
|
|
|
# plot
|
|
p = plot(df.iter, df[!, my_symbol], group=df.AM,
|
|
xlabel="Iteration", ylabel=errtype, title="$(errtype) $(species) vs. Iteration",
|
|
minorgrid=true,
|
|
legend=:outerright,
|
|
# yticks=[1e-7, 1e-6, 1e-5, 1e-4, 1e-3],
|
|
# ylim=(1e-7, 1e-3),
|
|
legendtitle="(Aqu, Min)",
|
|
legendcolumns=1,
|
|
legendfontsize=7,
|
|
legendtitlefontsize=7,)
|
|
|
|
return p
|
|
end
|
|
|
|
grouped_df = groupby(df_global_data, [:Aqueous, :Minerals, :iter])
|
|
# Combine all columns by maximum
|
|
combine_df = combine(grouped_df, names(df_global_data) .=> maximum .=> names(df_global_data))
|
|
|
|
combine_df[!, :AM] = map(x -> "(" * string(x[1]) * ", " * string(x[2]) * ")",
|
|
zip(combine_df.Aqueous, combine_df.Minerals))
|
|
|
|
CSV.write("$output_dir/eval_dolomite_MAPE_RRMSE.csv", combine_df)
|
|
|
|
p = plot_by_iteration_and_rounding(combine_df, "Dolomite", "MAPE")
|
|
savefig(p, "$output_dir/MAPE_Dolomite.pdf")
|
|
|
|
p = plot_by_iteration_and_rounding(combine_df, "Calcite", "MAPE")
|
|
savefig(p, "$output_dir/MAPE_Calcite.pdf")
|
|
|
|
p = plot_by_iteration_and_rounding(combine_df, "Mg", "MAPE")
|
|
savefig(p, "$output_dir/MAPE_Mg.pdf")
|
|
|
|
|