Added rrmse threshold values

This commit is contained in:
rastogi 2025-10-15 11:32:29 +02:00
parent 96567f9074
commit af6462409b
8 changed files with 83 additions and 44 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,8 +1,9 @@
iterations <- 50
dt <- 100
checkpoint_interval <- 30
control_interval <- 40
mape_threshold <- c(.0, 1e-12, 1e-1, 1e-1, 1e-2, 1e-2, 1e+1, 1e+1, 1e-3, 0, 1e-1, .0, 0.12, .0)
iterations <- 1000
dt <- 200
checkpoint_interval <- 100
control_interval <- 200
mape_threshold <- c(.0, .0, 1e-12, 1e-1, 1e-1, 1e-2, 1e-2, 1e+1, 1e+1, 1e-3, 0, 1e-1, .0, 0.12, .0)
rrmse_threshold <- c(.0, .0, 1e-12, 1e-4, 1e-1, 1e-2, 1e-2, 1e+1, 1e+1, 1e-3, 0, 1e-1, .0, 0.12, .0)
out_save <- seq(50, iterations, by = 50)
list(
@ -11,5 +12,6 @@ list(
out_save = out_save,
checkpoint_interval = checkpoint_interval,
control_interval = control_interval,
mape_threshold = mape_threshold
mape_threshold = mape_threshold,
rrmse_threshold = rrmse_threshold
)

BIN
bin/poet

Binary file not shown.

16
bin/run_poet.sh Normal file
View File

@ -0,0 +1,16 @@
#!/bin/bash
#SBATCH --job-name=poet_dolo_interp
#SBATCH --output=poet_dolo_interp_%j.out
#SBATCH --error=poet_dolo_interp_%j.err
#SBATCH --nodes=2
#SBATCH --ntasks=48
#SBATCH --ntasks-per-node=24
#SBATCH --time=02:00:00
#SBATCH --exclusive
#SBATCH --mail-user=vasudha.rastogi@uni-potsdam.de
#SBATCH --mail-type=END,FAIL
module load cmake gcc openmpi
mpirun -n 48 ./poet --interp dolo_interp_rt.R dolo_interp.qs2 dolo_interp

View File

@ -1,18 +1,34 @@
Iteration Rollback Species MAPE RRSME
---------------------------------------------------------------------------
40 0 cell_ID 0 0
40 0 ID 0 0
40 0 H 1.98104e-14 3.20223e-16
40 0 O 0.108665 0.00113056
40 0 Charge 0.0118752 0.000124408
40 0 C 0.000321187 3.76695e-05
40 0 Ca 0.000321235 3.76683e-05
40 0 Cl 2.96333 0.0577691
40 0 Mg 3.01563 0.0600123
40 0 Calcite 0.000193346 2.27729e-05
40 0 Calcite_p1 0 0
40 0 Dolomite 3.52007e-06 5.97467e-06
40 0 Dolomite_p1 0 0
40 0 O2g_eq 0.292809 0.00304717
40 0 O2g_si 0 0
20 0 cell_ID 0 0
20 0 ID 0 0
20 0 H 1.77731e-14 2.7095e-16
20 0 O 0.0553364 0.000567638
20 0 Charge 0.0138864 0.000147021
20 0 C 8.81221e-05 1.41354e-05
20 0 Ca 8.8185e-05 1.41354e-05
20 0 Cl 6.02029 0.0873663
20 0 Mg 6.08272 0.0883807
20 0 Calcite 5.26812e-05 8.46394e-06
20 0 Calcite_p1 0 0
20 0 Dolomite 5.3721e-07 1.14804e-06
20 0 Dolomite_p1 0 0
20 0 O2g_eq 0.145109 0.00148947
20 0 O2g_si 0 0
40 1 cell_ID 0 0
40 1 ID 0 0
40 1 H 1.70761e-14 3.24997e-16
40 1 O 0.0285341 0.000295504
40 1 Charge 0.012912 0.000140398
40 1 C 0.000332495 3.36477e-05
40 1 Ca 0.000332544 3.36467e-05
40 1 Cl 4.03557 0.0764422
40 1 Mg 4.11227 0.0777245
40 1 Calcite 0.000199586 2.02213e-05
40 1 Calcite_p1 0 0
40 1 Dolomite 1.50815e-06 1.98748e-06
40 1 Dolomite_p1 0 0
40 1 O2g_eq 0.0708157 0.000734877
40 1 O2g_si 0 0

View File

@ -271,6 +271,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters &params)
Rcpp::as<uint32_t>(global_rt_setup->operator[]("checkpoint_interval"));
params.mape_threshold =
Rcpp::as<std::vector<double>>(global_rt_setup->operator[]("mape_threshold"));
params.rrmse_threshold =
Rcpp::as<std::vector<double>>(global_rt_setup->operator[]("rrmse_threshold"));
}
catch (const std::exception &e)
{
@ -303,37 +305,40 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem)
bool triggerRollbackIfExceeded(ChemistryModule &chem, RuntimeParameters &params, uint32_t &current_iteration)
{
const std::vector<double> &mape_values = chem.error_history.back().mape;
const auto &mape = chem.error_history.back().mape;
const auto &rrmse = chem.error_history.back().rrmse;
const auto &props = chem.getField().GetProps();
for (uint32_t i = 0; i < params.mape_threshold.size(); i++)
{
// Skip if no meaningful MAPE value
if(mape_values[i] == 0){
continue;
}
if (mape_values[i] > params.mape_threshold[i])
for (uint32_t i = 0; i < params.mape_threshold.size(); ++i)
{
uint32_t rollback_iteration = ((current_iteration - 1) / params.checkpoint_interval) * params.checkpoint_interval;
// Skip invalid entries
if ((mape[i] == 0 && rrmse[i] == 0))
continue;
MSG("[THRESHOLD EXCEEDED] " + chem.getField().GetProps()[i] + " has MAPE = " +
std::to_string(mape_values[i]) + " exceeding threshold = " + std::to_string(params.mape_threshold[i]) +
" → rolling back to iteration " + std::to_string(rollback_iteration));
bool mape_exceeded = mape[i] > params.mape_threshold[i];
bool rrmse_exceeded = rrmse[i] > params.rrmse_threshold[i];
Checkpoint_s checkpoint_read{.field = chem.getField()};
read_checkpoint("checkpoint" + std::to_string(rollback_iteration) + ".hdf5", checkpoint_read);
current_iteration = checkpoint_read.iteration;
if (mape_exceeded || rrmse_exceeded)
{
uint32_t rollback_iter = ((current_iteration - 1) / params.checkpoint_interval) * params.checkpoint_interval;
std::string metric = mape_exceeded ? "MAPE" : "RRMSE";
double value = mape_exceeded ? mape[i] : rrmse[i];
double threshold = mape_exceeded ? params.mape_threshold[i] : params.rrmse_threshold[i];
// Rollback happend
return true;
MSG("[THRESHOLD EXCEEDED] " + props[i] + " has " + metric + " = " +
std::to_string(value) + " exceeding threshold = " + std::to_string(threshold) +
" → rolling back to iteration " + std::to_string(rollback_iter));
Checkpoint_s checkpoint_read{.field = chem.getField()};
read_checkpoint("checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read);
current_iteration = checkpoint_read.iteration;
return true; // rollback happened
}
}
}
MSG("All species are within their error thresholds.");
return false;
MSG("All species are within their MAPE and RRMSE thresholds.");
return false;
}
static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters &params,
DiffusionModule &diffusion,
ChemistryModule &chem)