diff --git a/bin/dolo_fgcs_3.R b/bin/dolo_fgcs_3.R index eb57e65bb..b4ebe1ef1 100644 --- a/bin/dolo_fgcs_3.R +++ b/bin/dolo_fgcs_3.R @@ -113,23 +113,4 @@ setup <- list( Grid = grid_setup, # Parameters related to the grid structure Diffusion = diffusion_setup, # Parameters related to the diffusion process Chemistry = chemistry_setup # Parameters related to the chemistry process -) - -iterations <- 15000 -dt <- 200 -checkpoint_interval <- 100 -control_interval <- 100 -mape_threshold <- rep(0.1, 13) -mape_threshold[5] <- 1 #Charge -out_save <- seq(1000, iterations, by = 1000) -#out_save = c(seq(1, 10), seq(10, 100, by= 10), seq(200, iterations, by=100)) - - -list( - timesteps = rep(dt, iterations), - store_result = TRUE, - out_save = out_save, - checkpoint_interval = checkpoint_interval, - control_interval = control_interval, - mape_threshold = mape_threshold ) \ No newline at end of file diff --git a/bin/run_poet.sh b/bin/run_poet.sh index 27e590a5c..2343c9941 100644 --- a/bin/run_poet.sh +++ b/bin/run_poet.sh @@ -1,7 +1,7 @@ #!/bin/bash -#SBATCH --job-name=dolo_proto1_eps01_no_zeroabs -#SBATCH --output=dolo_proto1_eps01_no_zeroabs_%j.out -#SBATCH --error=dolo_proto1_eps01_no_zeroabs_%j.err +#SBATCH --job-name=proto2_eps01_3_rb +#SBATCH --output=proto2_eps01_3_rb_%j.out +#SBATCH --error=proto2_eps01_3_rb_%j.err #SBATCH --partition=long #SBATCH --nodes=6 #SBATCH --ntasks-per-node=24 @@ -15,5 +15,5 @@ module purge module load cmake gcc openmpi #mpirun -n 144 ./poet dolo_fgcs_3.R dolo_fgcs_3.qs2 dolo_only_pqc -mpirun -n 144 ./poet --interp dolo_fgcs_3_rt.R dolo_fgcs_3.qs2 dolo_proto1_eps01_no_zeroabs +mpirun -n 144 ./poet --interp dolo_fgcs_3_rt.R dolo_fgcs_3.qs2 proto2_eps01_3_rb #mpirun -n 144 ./poet --interp barite_fgcs_4_new/barite_fgcs_4_new_rt.R barite_fgcs_4_new/barite_fgcs_4_new.qs2 barite \ No newline at end of file diff --git a/share/poet/barite/barite_het.qs2 b/share/poet/barite/barite_het.qs2 index df1cac1da..c5b4ec46b 100644 Binary files a/share/poet/barite/barite_het.qs2 and b/share/poet/barite/barite_het.qs2 differ diff --git a/share/poet/surfex/PoetEGU_surfex_500.qs2 b/share/poet/surfex/PoetEGU_surfex_500.qs2 index 41fa6d927..0f5ecd578 100644 Binary files a/share/poet/surfex/PoetEGU_surfex_500.qs2 and b/share/poet/surfex/PoetEGU_surfex_500.qs2 differ diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 5c2960b59..bc9a00052 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -522,8 +522,6 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { chem_field = out_vec; /* do master stuff */ - std::cout << "[DEBUG] control_batch.size() = " << this->control_batch.size() - << std::endl; if (!this->control_batch.empty()) { std::cout << "[Master] Processing " << this->control_batch.size() diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index 54fbcb69f..0998a63b2 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -151,8 +151,6 @@ void poet::ChemistryModule::processCtrlPkgs( WorkerRunWorkPackage(control_wp, current_sim_time, dt); phreeqc_end = MPI_Wtime(); - std::cout << "PQC RAN" << std::endl; - timings.ctrl_phreeqc_t += phreeqc_end - phreeqc_start; copyPkgs(control_wp, mpi_buffer); diff --git a/src/Control/ControlModule.cpp b/src/Control/ControlModule.cpp index 04d5979b9..f50d2300a 100644 --- a/src/Control/ControlModule.cpp +++ b/src/Control/ControlModule.cpp @@ -96,8 +96,8 @@ void poet::ControlModule::writeErrorMetrics( stats_a = MPI_Wtime(); writeSpeciesStatsToCSV(metrics_history, species, out_dir, "species_overview.csv"); - writeCellStatsToCSV(cell_metrics_history, species, out_dir, - "cell_overview.csv"); + // writeCellStatsToCSV(cell_metrics_history, species, out_dir, + // "cell_overview.csv"); write_metrics(cell_metrics_history, species, out_dir, "metrics_overview.hdf5"); stats_b = MPI_Wtime(); @@ -110,6 +110,7 @@ uint32_t poet::ControlModule::getRollbackIter() { uint32_t last_iter = ((global_iteration - 1) / config.checkpoint_interval) * config.checkpoint_interval; + /* uint32_t rollback_iter = (last_iter <= last_checkpoint_written) ? last_iter : last_checkpoint_written; @@ -120,6 +121,8 @@ uint32_t poet::ControlModule::getRollbackIter() { ", returning=" + std::to_string(last_checkpoint_written)); return last_checkpoint_written; + */ + return last_iter; } std::optional poet::ControlModule::getRollbackTarget( @@ -140,7 +143,8 @@ std::optional poet::ControlModule::getRollbackTarget( for (size_t sp_i = 2; sp_i < species.size(); sp_i++) { - if (s_mape[sp_i] == 0) { + // skip charge + if (s_mape[sp_i] == 0 || sp_i == 4) { continue; } if (s_mape[sp_i] > config.mape_threshold[sp_i]) { @@ -201,6 +205,8 @@ void poet::ControlModule::computeErrorMetrics( for (size_t cell_i = 0; cell_i < n_cells; cell_i++) { c_metrics.id.push_back(reference_values[cell_i][0]); + c_metrics.mape[cell_i][0] = reference_values[cell_i][0]; + c_metrics.rrmse[cell_i][0] = reference_values[cell_i][0]; for (size_t sp_i = 2; sp_i < n_species; sp_i++) { const double ref_value = reference_values[cell_i][sp_i]; @@ -226,6 +232,14 @@ void poet::ControlModule::computeErrorMetrics( c_metrics.mape[cell_i][sp_i] = 100.0 * std::abs(alpha); c_metrics.rrmse[cell_i][sp_i] = alpha * alpha; + // Log extreme MAPE values for debugging + if (c_metrics.mape[cell_i][sp_i] > 100.0) { + std::cout << "WARNING: High MAPE detected - Cell=" + << c_metrics.id[cell_i] << ", Species=" << species[sp_i] + << ", MAPE=" << c_metrics.mape[cell_i][sp_i] + << "%, Ref=" << ref_value << ", Sur=" << sur_value + << ", Alpha=" << alpha << std::endl; + } } } } @@ -242,7 +256,7 @@ void poet::ControlModule::processCheckpoint( DiffusionModule &diffusion, uint32_t ¤t_iter, const std::string &out_dir, const std::vector &species) { - if (flush_request) { + if (flush_request && rollback_count < 3) { uint32_t target = getRollbackIter(); readCheckpoint(diffusion, current_iter, target, out_dir); diff --git a/src/IO/StatsIO.cpp b/src/IO/StatsIO.cpp index eb2fb012a..78e3910b5 100644 --- a/src/IO/StatsIO.cpp +++ b/src/IO/StatsIO.cpp @@ -27,9 +27,13 @@ int write_metrics(const std::vector &metrics_history, for (size_t idx = 0; idx < metrics_history.size(); ++idx) { const auto &metrics = metrics_history[idx]; - std::string grp = "entry_" + std::to_string(idx) + "_iter_" + + /* + std::string grp = "entry_" + std::to_string(idx) + "_iter_" + std::to_string(metrics.iteration) + "_rb_" + std::to_string(metrics.rollback_count); + */ + std::string grp = "iter_" + std::to_string(metrics.iteration) + "_rb_" + + std::to_string(metrics.rollback_count); size_t n_cells = metrics.id.size(); size_t n_species = metrics.mape[0].size(); @@ -37,24 +41,29 @@ int write_metrics(const std::vector &metrics_history, H5Easy::dump(file, grp + "/meta", 0, H5Easy::DumpMode::Overwrite); // Attach attributes - H5Easy::dumpAttribute(file, grp + "/meta", "species_names", species_names, H5Easy::DumpMode::Overwrite); - H5Easy::dumpAttribute(file, grp + "/meta", "iteration", metrics.iteration, H5Easy::DumpMode::Overwrite); + H5Easy::dumpAttribute(file, grp + "/meta", "species_names", species_names, + H5Easy::DumpMode::Overwrite); + H5Easy::dumpAttribute(file, grp + "/meta", "iteration", metrics.iteration, + H5Easy::DumpMode::Overwrite); H5Easy::dumpAttribute(file, grp + "/meta", "rollback_count", metrics.rollback_count, H5Easy::DumpMode::Overwrite); - H5Easy::dumpAttribute(file, grp + "/meta", "n_cells", n_cells, H5Easy::DumpMode::Overwrite); - H5Easy::dumpAttribute(file, grp + "/meta", "n_species", n_species, H5Easy::DumpMode::Overwrite); + H5Easy::dumpAttribute(file, grp + "/meta", "n_cells", n_cells, + H5Easy::DumpMode::Overwrite); + H5Easy::dumpAttribute(file, grp + "/meta", "n_species", n_species, + H5Easy::DumpMode::Overwrite); // ───────────────────────────────────────────── // 2. Real datasets // ───────────────────────────────────────────── - H5Easy::dump(file, grp + "/mape", metrics.mape, H5Easy::DumpMode::Overwrite); - H5Easy::dump(file, grp + "/rrmse", metrics.rrmse, H5Easy::DumpMode::Overwrite); + H5Easy::dump(file, grp + "/mape", metrics.mape, + H5Easy::DumpMode::Overwrite); + H5Easy::dump(file, grp + "/rrmse", metrics.rrmse, + H5Easy::DumpMode::Overwrite); } return 0; } - void writeCellStatsToCSV(const std::vector &all_stats, const std::vector &species_names, const std::string &out_dir, diff --git a/src/IO/StatsIO.hpp b/src/IO/StatsIO.hpp index bb838ce72..f8f624b92 100644 --- a/src/IO/StatsIO.hpp +++ b/src/IO/StatsIO.hpp @@ -1,4 +1,5 @@ #include +#include void writeSpeciesStatsToCSV( const std::vector &all_stats,