From 1dc3e5899d0f975702c77773217146498ecf7deb Mon Sep 17 00:00:00 2001 From: rastogi Date: Fri, 1 Aug 2025 11:09:59 +0200 Subject: [PATCH 01/19] Initial commit --- bench/barite/barite_het_rt.R | 2 +- src/CMakeLists.txt | 2 +- src/Chemistry/ChemistryModule.hpp | 10 +- src/Chemistry/MasterFunctions.cpp | 233 +++++--- src/Chemistry/WorkerFunctions.cpp | 867 +++++++++++++++++------------- src/poet.cpp | 12 +- src/poet.hpp.in | 4 + 7 files changed, 699 insertions(+), 431 deletions(-) diff --git a/bench/barite/barite_het_rt.R b/bench/barite/barite_het_rt.R index a0b63df67..beb3a5c45 100644 --- a/bench/barite/barite_het_rt.R +++ b/bench/barite/barite_het_rt.R @@ -1,4 +1,4 @@ list( timesteps = rep(50, 100), store_result = TRUE -) \ No newline at end of file +) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ca39b6106..1886a9f43 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -24,7 +24,7 @@ elseif (POET_TUG_APPROACH STREQUAL "Explicit") target_compile_definitions(POETLib PRIVATE POET_TUG_FTCS) endif() -target_include_directories(POETLib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") +target_include_directories(POETLib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}") target_link_libraries( POETLib PUBLIC RRuntime diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index 4bf925400..22547a212 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -12,6 +12,8 @@ #include "SurrogateModels/DHT_Wrapper.hpp" #include "SurrogateModels/Interpolation.hpp" +#include "poet.hpp" + #include "PhreeqcRunner.hpp" #include #include @@ -249,6 +251,8 @@ public: std::vector ai_surrogate_validity_vector; + RuntimeParameters *runtime_params = nullptr; + protected: void initializeDHT(uint32_t size_mb, const NamedVector &key_species, @@ -275,7 +279,7 @@ protected: CHEM_AI_BCAST_VALIDITY }; - enum { LOOP_WORK, LOOP_END }; + enum { LOOP_WORK, LOOP_END, WITH_REL_ERROR }; enum { WORKER_PHREEQC, @@ -316,7 +320,7 @@ protected: void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, int &pkg_to_send, int &count_pkgs, int &free_workers, - double dt, uint32_t iteration, + double dt, uint32_t iteration, uint32_t control_iter, const std::vector &wp_sizes_vector); void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, int &free_workers); @@ -373,7 +377,7 @@ protected: bool ai_surrogate_enabled{false}; - static constexpr uint32_t BUFFER_OFFSET = 5; + static constexpr uint32_t BUFFER_OFFSET = 6; inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { MPI_Bcast(buf, count, datatype, 0, this->group_comm); diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index fce7b4139..63858c530 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -7,7 +7,8 @@ #include std::vector -poet::ChemistryModule::MasterGatherWorkerMetrics(int type) const { +poet::ChemistryModule::MasterGatherWorkerMetrics(int type) const +{ MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); uint32_t dummy; @@ -21,7 +22,8 @@ poet::ChemistryModule::MasterGatherWorkerMetrics(int type) const { } std::vector -poet::ChemistryModule::MasterGatherWorkerTimings(int type) const { +poet::ChemistryModule::MasterGatherWorkerTimings(int type) const +{ MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); double dummy; @@ -34,31 +36,36 @@ poet::ChemistryModule::MasterGatherWorkerTimings(int type) const { return timings; } -std::vector poet::ChemistryModule::GetWorkerPhreeqcTimings() const { +std::vector poet::ChemistryModule::GetWorkerPhreeqcTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_PHREEQC); } -std::vector poet::ChemistryModule::GetWorkerDHTGetTimings() const { +std::vector poet::ChemistryModule::GetWorkerDHTGetTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_DHT_GET); } -std::vector poet::ChemistryModule::GetWorkerDHTFillTimings() const { +std::vector poet::ChemistryModule::GetWorkerDHTFillTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_DHT_FILL); } -std::vector poet::ChemistryModule::GetWorkerIdleTimings() const { +std::vector poet::ChemistryModule::GetWorkerIdleTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IDLE); } -std::vector poet::ChemistryModule::GetWorkerDHTHits() const { +std::vector poet::ChemistryModule::GetWorkerDHTHits() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_DHT_HITS; @@ -76,7 +83,8 @@ std::vector poet::ChemistryModule::GetWorkerDHTHits() const { return ret; } -std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const { +std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_DHT_EVICTIONS; @@ -95,35 +103,40 @@ std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const { } std::vector -poet::ChemistryModule::GetWorkerInterpolationWriteTimings() const { +poet::ChemistryModule::GetWorkerInterpolationWriteTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_WRITE); } std::vector -poet::ChemistryModule::GetWorkerInterpolationReadTimings() const { +poet::ChemistryModule::GetWorkerInterpolationReadTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_READ); } std::vector -poet::ChemistryModule::GetWorkerInterpolationGatherTimings() const { +poet::ChemistryModule::GetWorkerInterpolationGatherTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_GATHER); } std::vector -poet::ChemistryModule::GetWorkerInterpolationFunctionCallTimings() const { +poet::ChemistryModule::GetWorkerInterpolationFunctionCallTimings() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_FC); } std::vector -poet::ChemistryModule::GetWorkerInterpolationCalls() const { +poet::ChemistryModule::GetWorkerInterpolationCalls() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_IP_CALLS; @@ -141,7 +154,8 @@ poet::ChemistryModule::GetWorkerInterpolationCalls() const { return ret; } -std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { +std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const +{ int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_PHT_CACHE_HITS; @@ -161,11 +175,14 @@ std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { inline std::vector shuffleVector(const std::vector &in_vector, uint32_t size_per_prop, - uint32_t wp_count) { + uint32_t wp_count) +{ std::vector out_buffer(in_vector.size()); uint32_t write_i = 0; - for (uint32_t i = 0; i < wp_count; i++) { - for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t i = 0; i < wp_count; i++) + { + for (uint32_t j = i; j < size_per_prop; j += wp_count) + { out_buffer[write_i] = in_vector[j]; write_i++; } @@ -175,14 +192,18 @@ inline std::vector shuffleVector(const std::vector &in_vector, inline std::vector shuffleField(const std::vector &in_field, uint32_t size_per_prop, - uint32_t prop_count, - uint32_t wp_count) { + uint32_t species_count, + uint32_t wp_count) +{ std::vector out_buffer(in_field.size()); uint32_t write_i = 0; - for (uint32_t i = 0; i < wp_count; i++) { - for (uint32_t j = i; j < size_per_prop; j += wp_count) { - for (uint32_t k = 0; k < prop_count; k++) { - out_buffer[(write_i * prop_count) + k] = + for (uint32_t i = 0; i < wp_count; i++) + { + for (uint32_t j = i; j < size_per_prop; j += wp_count) + { + for (uint32_t k = 0; k < species_count; k++) + { + out_buffer[(write_i * species_count) + k] = in_field[(k * size_per_prop) + j]; } write_i++; @@ -192,28 +213,34 @@ inline std::vector shuffleField(const std::vector &in_field, } inline void unshuffleField(const std::vector &in_buffer, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count, std::vector &out_field) { + uint32_t size_per_prop, uint32_t species_count, + uint32_t wp_count, std::vector &out_field) +{ uint32_t read_i = 0; - for (uint32_t i = 0; i < wp_count; i++) { - for (uint32_t j = i; j < size_per_prop; j += wp_count) { - for (uint32_t k = 0; k < prop_count; k++) { + for (uint32_t i = 0; i < wp_count; i++) + { + for (uint32_t j = i; j < size_per_prop; j += wp_count) + { + for (uint32_t k = 0; k < species_count; k++) + { out_field[(k * size_per_prop) + j] = - in_buffer[(read_i * prop_count) + k]; + in_buffer[(read_i * species_count) + k]; } read_i++; } } } -inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { +inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) +{ /* visual progress */ double progress = (float)(count_pkgs + 1) / n_wp; std::cout << "["; int pos = barWidth * progress; - for (int iprog = 0; iprog < barWidth; ++iprog) { + for (int iprog = 0; iprog < barWidth; ++iprog) + { if (iprog < pos) std::cout << "="; else if (iprog == pos) @@ -228,14 +255,17 @@ inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { inline void poet::ChemistryModule::MasterSendPkgs( worker_list_t &w_list, workpointer_t &work_pointer, int &pkg_to_send, - int &count_pkgs, int &free_workers, double dt, uint32_t iteration, - const std::vector &wp_sizes_vector) { + int &count_pkgs, int &free_workers, double dt, uint32_t iteration, uint32_t control_iteration, + const std::vector &wp_sizes_vector) +{ /* declare variables */ int local_work_package_size; /* search for free workers and send work */ - for (int p = 0; p < this->comm_size - 1; p++) { - if (w_list[p].has_work == 0 && pkg_to_send > 0) /* worker is free */ { + for (int p = 0; p < this->comm_size - 1; p++) + { + if (w_list[p].has_work == 0 && pkg_to_send > 0) /* worker is free */ + { /* to enable different work_package_size, set local copy of * work_package_size to pre-calculated work package size vector */ @@ -264,7 +294,8 @@ inline void poet::ChemistryModule::MasterSendPkgs( // current work package start location in field uint32_t wp_start_index = std::accumulate(wp_sizes_vector.begin(), std::next(wp_sizes_vector.begin(), count_pkgs), 0); send_buffer[end_of_wp + 4] = wp_start_index; - + // whether this iteration is a control iteration + send_buffer[end_of_wp + 5] = control_iteration; /* ATTENTION Worker p has rank p+1 */ // MPI_Send(send_buffer, end_of_wp + BUFFER_OFFSET, MPI_DOUBLE, p + 1, @@ -283,7 +314,8 @@ inline void poet::ChemistryModule::MasterSendPkgs( inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, - int &free_workers) { + int &free_workers) +{ /* declare most of the variables here */ int need_to_receive = 1; double idle_a, idle_b; @@ -293,37 +325,74 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, // master_recv_a = MPI_Wtime(); /* start to loop as long there are packages to recv and the need to receive */ - while (need_to_receive && pkg_to_recv > 0) { + while (need_to_receive && pkg_to_recv > 0) + { // only of there are still packages to send and free workers are available if (to_send && free_workers > 0) // non blocking probing - MPI_Iprobe(MPI_ANY_SOURCE, LOOP_WORK, MPI_COMM_WORLD, &need_to_receive, + MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &need_to_receive, &probe_status); - else { + else + { idle_a = MPI_Wtime(); // blocking probing - MPI_Probe(MPI_ANY_SOURCE, LOOP_WORK, MPI_COMM_WORLD, &probe_status); + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &probe_status); idle_b = MPI_Wtime(); this->idle_t += idle_b - idle_a; } /* if need_to_receive was set to true above, so there is a message to * receive */ - if (need_to_receive) { + if (need_to_receive) + { p = probe_status.MPI_SOURCE; - MPI_Get_count(&probe_status, MPI_DOUBLE, &size); - MPI_Recv(w_list[p - 1].send_addr, size, MPI_DOUBLE, p, LOOP_WORK, - this->group_comm, MPI_STATUS_IGNORE); - w_list[p - 1].has_work = 0; - pkg_to_recv -= 1; - free_workers++; + if (probe_status.MPI_TAG == LOOP_WORK) + { + MPI_Get_count(&probe_status, MPI_DOUBLE, &size); + MPI_Recv(w_list[p - 1].send_addr, size, MPI_DOUBLE, p, LOOP_WORK, + this->group_comm, MPI_STATUS_IGNORE); + w_list[p - 1].has_work = 0; + pkg_to_recv -= 1; + free_workers++; + } + if (probe_status.MPI_TAG == WITH_REL_ERROR) + { + MPI_Get_count(&probe_status, MPI_DOUBLE, &size); + + std::cout << "[Master] Probed rel error from worker " << p + << ", size = " << size << std::endl; + + int half = size/2; + + std::vector rel_err_buffer(size); + std::vector rel_error(half); + MPI_Recv(rel_err_buffer.data(), size, MPI_DOUBLE, p, WITH_REL_ERROR, + this->group_comm, MPI_STATUS_IGNORE); + + + + std::copy(rel_err_buffer.begin(), rel_err_buffer.begin() + half, + w_list[p - 1].send_addr); + + std::copy(rel_err_buffer.begin() + half, rel_err_buffer.end(), rel_error.begin()); + + std::cout << "[Master] Received rel error buffer from worker " << p + << ", first value = " << (rel_err_buffer.empty() ? -1 : rel_err_buffer[0]) + << std::endl; + + w_list[p - 1].has_work = 0; + pkg_to_recv -= 1; + free_workers++; + } } } } -void poet::ChemistryModule::simulate(double dt) { +void poet::ChemistryModule::simulate(double dt) +{ double start_t{MPI_Wtime()}; - if (this->is_sequential) { + if (this->is_sequential) + { MasterRunSequential(); return; } @@ -333,7 +402,8 @@ void poet::ChemistryModule::simulate(double dt) { this->chem_t += end_t - start_t; } -void poet::ChemistryModule::MasterRunSequential() { +void poet::ChemistryModule::MasterRunSequential() +{ // std::vector shuffled_field = // shuffleField(chem_field.AsVector(), n_cells, prop_count, 1); @@ -360,7 +430,8 @@ void poet::ChemistryModule::MasterRunSequential() { // chem_field = out_vec; } -void poet::ChemistryModule::MasterRunParallel(double dt) { +void poet::ChemistryModule::MasterRunParallel(double dt) +{ /* declare most of the needed variables here */ double seq_a, seq_b, seq_c, seq_d; double worker_chemistry_a, worker_chemistry_b; @@ -373,14 +444,15 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { const std::vector wp_sizes_vector = CalculateWPSizesVector(this->n_cells, this->wp_size); - if (this->ai_surrogate_enabled) { + if (this->ai_surrogate_enabled) + { ftype = CHEM_AI_BCAST_VALIDITY; PropagateFunctionType(ftype); this->ai_surrogate_validity_vector = shuffleVector(this->ai_surrogate_validity_vector, - this->n_cells, + this->n_cells, wp_sizes_vector.size()); ChemBCast(&this->ai_surrogate_validity_vector.front(), this->n_cells, MPI_INT); - } + } ftype = CHEM_WORK_LOOP; PropagateFunctionType(ftype); @@ -388,6 +460,7 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { MPI_Barrier(this->group_comm); static uint32_t iteration = 0; + uint32_t control_iteration = static_cast(this->runtime_params->control_iteration_active ? 1 : 0); /* start time measurement of sequential part */ seq_a = MPI_Wtime(); @@ -417,20 +490,47 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { /* start send/recv loop */ // while there are still packages to recv - while (pkg_to_recv > 0) { + while (pkg_to_recv > 0) + { // print a progressbar to stdout - if (print_progessbar) { + if (print_progessbar) + { printProgressbar((int)i_pkgs, (int)wp_sizes_vector.size()); } // while there are still packages to send - if (pkg_to_send > 0) { + if (pkg_to_send > 0) + { // send packages to all free workers ... MasterSendPkgs(worker_list, work_pointer, pkg_to_send, i_pkgs, - free_workers, dt, iteration, wp_sizes_vector); + free_workers, dt, iteration, control_iteration, wp_sizes_vector); } // ... and try to receive them from workers who has finished their work MasterRecvPkgs(worker_list, pkg_to_recv, pkg_to_send > 0, free_workers); } + // to do: Statistik + + /* if control_iteration_active is true receive rel. error data and compare with epsilon */ + if (this->runtime_params->control_iteration_active) + { + + // do Statistik + /** + int rel_err_offset = size / 2; // or calculate as needed + + for (std::size_t ep_i = 0; ep_i < this->runtime_params->species_epsilon.size(); ep_i++) + { + if (rel_err_buffer[rel_err_offset + ep_i] > this->runtime_params->species_epsilon[ep_i]) + { + std::cout << "[Master] At least one relative error exceeded epsilon threshold!" + << std::endl; + std::cout << "value: " << rel_err_buffer[rel_err_offset + ep_i] << " epsilon: " + << this->runtime_params->species_epsilon[ep_i] << std::endl; + break; + } + } + */ + } + // Just to complete the progressbar std::cout << std::endl; @@ -461,7 +561,8 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { /* end time measurement of whole chemistry simulation */ /* advise workers to end chemistry iteration */ - for (int i = 1; i < this->comm_size; i++) { + for (int i = 1; i < this->comm_size; i++) + { MPI_Send(NULL, 0, MPI_DOUBLE, i, LOOP_END, this->group_comm); } @@ -469,28 +570,32 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { iteration++; } -void poet::ChemistryModule::MasterLoopBreak() { +void poet::ChemistryModule::MasterLoopBreak() +{ int type = CHEM_BREAK_MAIN_LOOP; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); } std::vector poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, - uint32_t wp_size) const { + uint32_t wp_size) const +{ bool mod_pkgs = (n_cells % wp_size) != 0; uint32_t n_packages = (uint32_t)(n_cells / wp_size) + static_cast(mod_pkgs); std::vector wp_sizes_vector(n_packages, 0); - for (int i = 0; i < n_cells; i++) { + for (int i = 0; i < n_cells; i++) + { wp_sizes_vector[i % n_packages] += 1; } return wp_sizes_vector; } -void poet::ChemistryModule::masterSetField(Field field) { +void poet::ChemistryModule::masterSetField(Field field) +{ this->chem_field = field; this->prop_count = field.GetProps().size(); diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index b7eb6096c..a45a5bc23 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -10,414 +10,559 @@ #include #include #include +#include #include #include #include -namespace poet { +namespace poet +{ -inline std::string get_string(int root, MPI_Comm communicator) { - int count; - MPI_Bcast(&count, 1, MPI_INT, root, communicator); + inline std::string get_string(int root, MPI_Comm communicator) + { + int count; + MPI_Bcast(&count, 1, MPI_INT, root, communicator); - char *buffer = new char[count + 1]; - MPI_Bcast(buffer, count, MPI_CHAR, root, communicator); + char *buffer = new char[count + 1]; + MPI_Bcast(buffer, count, MPI_CHAR, root, communicator); - buffer[count] = '\0'; + buffer[count] = '\0'; - std::string ret_str(buffer); - delete[] buffer; + std::string ret_str(buffer); + delete[] buffer; - return ret_str; -} + return ret_str; + } -void poet::ChemistryModule::WorkerLoop() { - struct worker_s timings; + void poet::ChemistryModule::WorkerLoop() + { + struct worker_s timings; - // HACK: defining the worker iteration count here, which will increment after - // each CHEM_ITER_END message - uint32_t iteration = 1; - bool loop = true; + // HACK: defining the worker iteration count here, which will increment after + // each CHEM_ITER_END message + uint32_t iteration = 1; + bool loop = true; - while (loop) { - int func_type; - PropagateFunctionType(func_type); + while (loop) + { + int func_type; + PropagateFunctionType(func_type); - switch (func_type) { - case CHEM_FIELD_INIT: { - ChemBCast(&this->prop_count, 1, MPI_UINT32_T); - if (this->ai_surrogate_enabled) { - this->ai_surrogate_validity_vector.resize( - this->n_cells); // resize statt reserve? - } - break; - } - case CHEM_AI_BCAST_VALIDITY: { - // Receive the index vector of valid ai surrogate predictions - MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells, - MPI_INT, 0, this->group_comm); - break; - } - case CHEM_WORK_LOOP: { - WorkerProcessPkgs(timings, iteration); - break; - } - case CHEM_PERF: { - int type; - ChemBCast(&type, 1, MPI_INT); - if (type < WORKER_DHT_HITS) { - WorkerPerfToMaster(type, timings); + switch (func_type) + { + case CHEM_FIELD_INIT: + { + ChemBCast(&this->prop_count, 1, MPI_UINT32_T); + if (this->ai_surrogate_enabled) + { + this->ai_surrogate_validity_vector.resize( + this->n_cells); // resize statt reserve? + } break; } - WorkerMetricsToMaster(type); - break; - } - case CHEM_BREAK_MAIN_LOOP: { - WorkerPostSim(iteration); - loop = false; - break; - } - default: { - throw std::runtime_error("Worker received unknown tag from master."); - } - } - } -} - -void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, - uint32_t &iteration) { - MPI_Status probe_status; - bool loop = true; - - MPI_Barrier(this->group_comm); - - while (loop) { - double idle_a = MPI_Wtime(); - MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status); - double idle_b = MPI_Wtime(); - - switch (probe_status.MPI_TAG) { - case LOOP_WORK: { - timings.idle_t += idle_b - idle_a; - int count; - MPI_Get_count(&probe_status, MPI_DOUBLE, &count); - - WorkerDoWork(probe_status, count, timings); - break; - } - case LOOP_END: { - WorkerPostIter(probe_status, iteration); - iteration++; - loop = false; - break; - } - } - } -} - -void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, - int double_count, - struct worker_s &timings) { - static int counter = 1; - - double dht_get_start, dht_get_end; - double phreeqc_time_start, phreeqc_time_end; - double dht_fill_start, dht_fill_end; - - uint32_t iteration; - double dt; - double current_sim_time; - uint32_t wp_start_index; - int count = double_count; - std::vector mpi_buffer(count); - - /* receive */ - MPI_Recv(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, - MPI_STATUS_IGNORE); - - /* decrement count of work_package by BUFFER_OFFSET */ - count -= BUFFER_OFFSET; - - /* check for changes on all additional variables given by the 'header' of - * mpi_buffer */ - - // work_package_size - poet::WorkPackage s_curr_wp(mpi_buffer[count]); - - // current iteration of simulation - iteration = mpi_buffer[count + 1]; - - // current timestep size - dt = mpi_buffer[count + 2]; - - // current simulation time ('age' of simulation) - current_sim_time = mpi_buffer[count + 3]; - - // current work package start location in field - wp_start_index = mpi_buffer[count + 4]; - - for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { - s_curr_wp.input[wp_i] = - std::vector(mpi_buffer.begin() + this->prop_count * wp_i, - mpi_buffer.begin() + this->prop_count * (wp_i + 1)); - } - - // std::cout << this->comm_rank << ":" << counter++ << std::endl; - if (dht_enabled || interp_enabled) { - dht->prepareKeys(s_curr_wp.input, dt); - } - - if (dht_enabled) { - /* check for values in DHT */ - dht_get_start = MPI_Wtime(); - dht->checkDHT(s_curr_wp); - dht_get_end = MPI_Wtime(); - timings.dht_get += dht_get_end - dht_get_start; - } - - if (interp_enabled) { - interp->tryInterpolation(s_curr_wp); - } - - if (this->ai_surrogate_enabled) { - // Map valid predictions from the ai surrogate in the workpackage - for (int i = 0; i < s_curr_wp.size; i++) { - if (this->ai_surrogate_validity_vector[wp_start_index + i] == 1) { - s_curr_wp.mapping[i] = CHEM_AISURR; + case CHEM_AI_BCAST_VALIDITY: + { + // Receive the index vector of valid ai surrogate predictions + MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells, + MPI_INT, 0, this->group_comm); + break; + } + case CHEM_WORK_LOOP: + { + WorkerProcessPkgs(timings, iteration); + break; + } + case CHEM_PERF: + { + int type; + ChemBCast(&type, 1, MPI_INT); + if (type < WORKER_DHT_HITS) + { + WorkerPerfToMaster(type, timings); + break; + } + WorkerMetricsToMaster(type); + break; + } + case CHEM_BREAK_MAIN_LOOP: + { + WorkerPostSim(iteration); + loop = false; + break; + } + default: + { + throw std::runtime_error("Worker received unknown tag from master."); + } } } } - phreeqc_time_start = MPI_Wtime(); + void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, + uint32_t &iteration) + { + MPI_Status probe_status; + bool loop = true; - WorkerRunWorkPackage(s_curr_wp, current_sim_time, dt); + MPI_Barrier(this->group_comm); - phreeqc_time_end = MPI_Wtime(); + while (loop) + { + double idle_a = MPI_Wtime(); + MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status); + double idle_b = MPI_Wtime(); - for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { - std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), - mpi_buffer.begin() + this->prop_count * wp_i); - } + switch (probe_status.MPI_TAG) + { + case LOOP_WORK: + { + timings.idle_t += idle_b - idle_a; + int count; + MPI_Get_count(&probe_status, MPI_DOUBLE, &count); - /* send results to master */ - MPI_Request send_req; - MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, MPI_COMM_WORLD, - &send_req); - - if (dht_enabled || interp_enabled) { - /* write results to DHT */ - dht_fill_start = MPI_Wtime(); - dht->fillDHT(s_curr_wp); - dht_fill_end = MPI_Wtime(); - - if (interp_enabled) { - interp->writePairs(); + WorkerDoWork(probe_status, count, timings); + break; + } + case LOOP_END: + { + WorkerPostIter(probe_status, iteration); + iteration++; + loop = false; + break; + } + } } - timings.dht_fill += dht_fill_end - dht_fill_start; } - timings.phreeqc_t += phreeqc_time_end - phreeqc_time_start; + void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, + int double_count, + struct worker_s &timings) + { + static int counter = 1; - MPI_Wait(&send_req, MPI_STATUS_IGNORE); -} + double dht_get_start, dht_get_end; + double phreeqc_time_start, phreeqc_time_end; + double dht_fill_start, dht_fill_end; -void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, - uint32_t iteration) { - MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, - MPI_STATUS_IGNORE); + uint32_t iteration; + double dt; + double current_sim_time; + uint32_t wp_start_index; + int count = double_count; + bool control_iteration_active = false; + std::vector mpi_buffer(count); - if (this->dht_enabled) { - dht_hits.push_back(dht->getHits()); - dht_evictions.push_back(dht->getEvictions()); - dht->resetCounter(); + /* receive */ + MPI_Recv(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, + MPI_STATUS_IGNORE); - if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { + /* decrement count of work_package by BUFFER_OFFSET */ + count -= BUFFER_OFFSET; + /* check for changes on all additional variables given by the 'header' of + * mpi_buffer */ + + // work_package_size + poet::WorkPackage s_curr_wp(mpi_buffer[count]); + + // current iteration of simulation + iteration = mpi_buffer[count + 1]; + + // current timestep size + dt = mpi_buffer[count + 2]; + + // current simulation time ('age' of simulation) + current_sim_time = mpi_buffer[count + 3]; + + // current work package start location in field + wp_start_index = mpi_buffer[count + 4]; + + control_iteration_active = (mpi_buffer[count + 5] == 1); + + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) + { + s_curr_wp.input[wp_i] = + std::vector(mpi_buffer.begin() + this->prop_count * wp_i, + mpi_buffer.begin() + this->prop_count * (wp_i + 1)); + } + + // std::cout << this->comm_rank << ":" << counter++ << std::endl; + if (dht_enabled || interp_enabled) + { + dht->prepareKeys(s_curr_wp.input, dt); + } + + if (dht_enabled) + { + /* check for values in DHT */ + dht_get_start = MPI_Wtime(); + dht->checkDHT(s_curr_wp); + dht_get_end = MPI_Wtime(); + timings.dht_get += dht_get_end - dht_get_start; + } + + if (interp_enabled) + { + interp->tryInterpolation(s_curr_wp); + } + + if (this->ai_surrogate_enabled) + { + // Map valid predictions from the ai surrogate in the workpackage + for (int i = 0; i < s_curr_wp.size; i++) + { + if (this->ai_surrogate_validity_vector[wp_start_index + i] == 1) + { + s_curr_wp.mapping[i] = CHEM_AISURR; + } + } + } + + /* if control iteration: create copy surrogate results (output and mappings) and then set them to zero, + give this to phreeqc */ + + poet::WorkPackage s_curr_wp_pqc = s_curr_wp; + + if (control_iteration_active) + { + for (std::size_t wp_i = 0; wp_i < s_curr_wp_pqc.size; wp_i++) + { + s_curr_wp_pqc.output[wp_i] = std::vector(this->prop_count, 0.0); + s_curr_wp_pqc.mapping[wp_i] = 0; + } + } + + phreeqc_time_start = MPI_Wtime(); + + WorkerRunWorkPackage(control_iteration_active ? s_curr_wp_pqc : s_curr_wp, current_sim_time, dt); + + phreeqc_time_end = MPI_Wtime(); + + if (control_iteration_active) + { + // increase size for relative error + std::size_t rel_error_size = s_curr_wp.size * this->prop_count; + + // extend mpi_buffer, for rel. error for every species + mpi_buffer.resize(count + rel_error_size); + std::size_t offset = count; + count += rel_error_size; + + // calc rel. error if phreeqc != surrogate + for (std::size_t wp_i = 0; wp_i < s_curr_wp_pqc.size; wp_i++) + { + const auto &surrogate_result = s_curr_wp.output[wp_i]; + const auto &phreeqc_result = s_curr_wp_pqc.output[wp_i]; + + // std::cout << "surrogate_result.size() " << surrogate_result.size() << ", phreeqc_result " << phreeqc_result.size() << std::endl; + + // fill NaNs + if (surrogate_result.size() == 0) + { + for (std::size_t i = 0; i < this->prop_count; i++) + { + mpi_buffer[offset++] = std::numeric_limits::quiet_NaN(); + } + } + + // compute rel error + if (surrogate_result.size() == phreeqc_result.size()) + { + for (std::size_t i = 0; i < this->prop_count; i++) + { + double ref = phreeqc_result[i]; + double surrogate = surrogate_result[i]; + + if (std::abs(ref) > 1e-9) + { + mpi_buffer[offset++] = std::abs((surrogate - ref) / ref); + } + else + { + mpi_buffer[offset++] = 0.0; + } + } + } + } + } + + poet::WorkPackage &s_curr_wp_copy = control_iteration_active ? s_curr_wp_pqc : s_curr_wp; + + for (std::size_t wp_i = 0; wp_i < s_curr_wp_copy.size; wp_i++) + { + std::copy(s_curr_wp_copy.output[wp_i].begin(), s_curr_wp_copy.output[wp_i].end(), + mpi_buffer.begin() + this->prop_count * wp_i); + } + + /* send results to master */ + MPI_Request send_req; + + int mpi_tag = control_iteration_active ? WITH_REL_ERROR : LOOP_WORK; + MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD, &send_req); + + if (control_iteration_active) + { + std::cout << "[Worker " << this->comm_rank << "] Sent results." << std::endl; + } + + if (dht_enabled || interp_enabled) + { + /* write results to DHT */ + dht_fill_start = MPI_Wtime(); + dht->fillDHT(s_curr_wp_copy); + dht_fill_end = MPI_Wtime(); + + if (interp_enabled) + { + interp->writePairs(); + } + timings.dht_fill += dht_fill_end - dht_fill_start; + } + + timings.phreeqc_t += phreeqc_time_end - phreeqc_time_start; + + MPI_Wait(&send_req, MPI_STATUS_IGNORE); + } + + void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, + uint32_t iteration) + { + MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, + MPI_STATUS_IGNORE); + + if (this->dht_enabled) + { + dht_hits.push_back(dht->getHits()); + dht_evictions.push_back(dht->getEvictions()); + dht->resetCounter(); + + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) + { + WorkerWriteDHTDump(iteration); + } + } + + if (this->interp_enabled) + { + std::stringstream out; + interp_calls.push_back(interp->getInterpolationCount()); + interp->resetCounter(); + interp->writePHTStats(); + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) + { + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } + + const auto max_mean_idx = + DHT_get_used_idx_factor(this->interp->getDHTObject(), 1); + + if (max_mean_idx >= 2) + { + DHT_flush(this->interp->getDHTObject()); + DHT_flush(this->dht->getDHT()); + if (this->comm_rank == 2) + { + std::cout << "Flushed both DHT and PHT!\n\n"; + } + } + } + + RInsidePOET::getInstance().parseEvalQ("gc()"); + } + + void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) + { + if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) + { WorkerWriteDHTDump(iteration); } - } - - if (this->interp_enabled) { - std::stringstream out; - interp_calls.push_back(interp->getInterpolationCount()); - interp->resetCounter(); - interp->writePHTStats(); - if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { + if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) + { + std::stringstream out; out << this->dht_file_out_dir << "/iter_" << std::setfill('0') << std::setw(this->file_pad) << iteration << ".pht"; interp->dumpPHTState(out.str()); } + } - const auto max_mean_idx = - DHT_get_used_idx_factor(this->interp->getDHTObject(), 1); + void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) + { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".dht"; + int res = dht->tableToFile(out.str().c_str()); + if (res != DHT_SUCCESS && this->comm_rank == 2) + std::cerr + << "CPP: Worker: Error in writing current state of DHT to file.\n"; + else if (this->comm_rank == 2) + std::cout << "CPP: Worker: Successfully written DHT to file " << out.str() + << "\n"; + } - if (max_mean_idx >= 2) { - DHT_flush(this->interp->getDHTObject()); - DHT_flush(this->dht->getDHT()); - if (this->comm_rank == 2) { - std::cout << "Flushed both DHT and PHT!\n\n"; + void poet::ChemistryModule::WorkerReadDHTDump( + const std::string &dht_input_file) + { + int res = dht->fileToTable((char *)dht_input_file.c_str()); + if (res != DHT_SUCCESS) + { + if (res == DHT_WRONG_FILE) + { + if (this->comm_rank == 1) + std::cerr + << "CPP: Worker: Wrong file layout! Continue with empty DHT ...\n"; + } + else + { + if (this->comm_rank == 1) + std::cerr << "CPP: Worker: Error in loading current state of DHT from " + "file. Continue with empty DHT ...\n"; + } + } + else + { + if (this->comm_rank == 2) + std::cout << "CPP: Worker: Successfully loaded state of DHT from file " + << dht_input_file << "\n"; + } + } + + void poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package, + double dSimTime, + double dTimestep) + { + + std::vector> inout_chem = work_package.input; + std::vector to_ignore; + + for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) + { + if (work_package.mapping[wp_id] != CHEM_PQC) + { + to_ignore.push_back(wp_id); + } + } + this->pqc_runner->run(inout_chem, dTimestep, to_ignore); + + for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) + { + if (work_package.mapping[wp_id] == CHEM_PQC) + { + work_package.output[wp_id] = inout_chem[wp_id]; } } } - RInsidePOET::getInstance().parseEvalQ("gc()"); -} - -void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { - if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { - WorkerWriteDHTDump(iteration); - } - if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { - std::stringstream out; - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".pht"; - interp->dumpPHTState(out.str()); - } -} - -void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { - std::stringstream out; - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".dht"; - int res = dht->tableToFile(out.str().c_str()); - if (res != DHT_SUCCESS && this->comm_rank == 2) - std::cerr - << "CPP: Worker: Error in writing current state of DHT to file.\n"; - else if (this->comm_rank == 2) - std::cout << "CPP: Worker: Successfully written DHT to file " << out.str() - << "\n"; -} - -void poet::ChemistryModule::WorkerReadDHTDump( - const std::string &dht_input_file) { - int res = dht->fileToTable((char *)dht_input_file.c_str()); - if (res != DHT_SUCCESS) { - if (res == DHT_WRONG_FILE) { - if (this->comm_rank == 1) - std::cerr - << "CPP: Worker: Wrong file layout! Continue with empty DHT ...\n"; - } else { - if (this->comm_rank == 1) - std::cerr << "CPP: Worker: Error in loading current state of DHT from " - "file. Continue with empty DHT ...\n"; + void poet::ChemistryModule::WorkerPerfToMaster(int type, + const struct worker_s &timings) + { + switch (type) + { + case WORKER_PHREEQC: + { + MPI_Gather(&timings.phreeqc_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; + } + case WORKER_DHT_GET: + { + MPI_Gather(&timings.dht_get, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; + } + case WORKER_DHT_FILL: + { + MPI_Gather(&timings.dht_fill, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; + } + case WORKER_IDLE: + { + MPI_Gather(&timings.idle_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; + } + case WORKER_IP_WRITE: + { + double val = interp->getPHTWriteTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_READ: + { + double val = interp->getPHTReadTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_GATHER: + { + double val = interp->getDHTGatherTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_FC: + { + double val = interp->getInterpolationTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + default: + { + throw std::runtime_error("Unknown perf type in master's message."); } - } else { - if (this->comm_rank == 2) - std::cout << "CPP: Worker: Successfully loaded state of DHT from file " - << dht_input_file << "\n"; - } -} - -void poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package, - double dSimTime, - double dTimestep) { - - std::vector> inout_chem = work_package.input; - std::vector to_ignore; - - for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) { - if (work_package.mapping[wp_id] != CHEM_PQC) { - to_ignore.push_back(wp_id); } } - this->pqc_runner->run(inout_chem, dTimestep, to_ignore); - for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) { - if (work_package.mapping[wp_id] == CHEM_PQC) { - work_package.output[wp_id] = inout_chem[wp_id]; + void poet::ChemistryModule::WorkerMetricsToMaster(int type) + { + MPI_Comm worker_comm = dht->getCommunicator(); + int worker_rank; + MPI_Comm_rank(worker_comm, &worker_rank); + + MPI_Comm &group_comm = this->group_comm; + + auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm]( + std::vector &send_buffer, int tag) + { + std::vector to_master(send_buffer.size()); + MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(), + MPI_UINT32_T, MPI_SUM, 0, worker_comm); + + if (worker_rank == 0) + { + MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag, + group_comm); + } + }; + + switch (type) + { + case WORKER_DHT_HITS: + { + reduce_and_send(dht_hits, WORKER_DHT_HITS); + break; + } + case WORKER_DHT_EVICTIONS: + { + reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS); + break; + } + case WORKER_IP_CALLS: + { + reduce_and_send(interp_calls, WORKER_IP_CALLS); + return; + } + case WORKER_PHT_CACHE_HITS: + { + std::vector input = this->interp->getPHTLocalCacheHits(); + reduce_and_send(input, WORKER_PHT_CACHE_HITS); + return; + } + default: + { + throw std::runtime_error("Unknown perf type in master's message."); + } } } -} - -void poet::ChemistryModule::WorkerPerfToMaster(int type, - const struct worker_s &timings) { - switch (type) { - case WORKER_PHREEQC: { - MPI_Gather(&timings.phreeqc_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_DHT_GET: { - MPI_Gather(&timings.dht_get, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_DHT_FILL: { - MPI_Gather(&timings.dht_fill, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_IDLE: { - MPI_Gather(&timings.idle_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_IP_WRITE: { - double val = interp->getPHTWriteTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - case WORKER_IP_READ: { - double val = interp->getPHTReadTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - case WORKER_IP_GATHER: { - double val = interp->getDHTGatherTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - case WORKER_IP_FC: { - double val = interp->getInterpolationTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - default: { - throw std::runtime_error("Unknown perf type in master's message."); - } - } -} - -void poet::ChemistryModule::WorkerMetricsToMaster(int type) { - MPI_Comm worker_comm = dht->getCommunicator(); - int worker_rank; - MPI_Comm_rank(worker_comm, &worker_rank); - - MPI_Comm &group_comm = this->group_comm; - - auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm]( - std::vector &send_buffer, int tag) { - std::vector to_master(send_buffer.size()); - MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(), - MPI_UINT32_T, MPI_SUM, 0, worker_comm); - - if (worker_rank == 0) { - MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag, - group_comm); - } - }; - - switch (type) { - case WORKER_DHT_HITS: { - reduce_and_send(dht_hits, WORKER_DHT_HITS); - break; - } - case WORKER_DHT_EVICTIONS: { - reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS); - break; - } - case WORKER_IP_CALLS: { - reduce_and_send(interp_calls, WORKER_IP_CALLS); - return; - } - case WORKER_PHT_CACHE_HITS: { - std::vector input = this->interp->getPHTLocalCacheHits(); - reduce_and_send(input, WORKER_PHT_CACHE_HITS); - return; - } - default: { - throw std::runtime_error("Unknown perf type in master's message."); - } - } -} } // namespace poet diff --git a/src/poet.cpp b/src/poet.cpp index 742f4c395..87d5a2796 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -249,6 +249,10 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { params.timesteps = Rcpp::as>(global_rt_setup->operator[]("timesteps")); + params.control_iteration = + Rcpp::as(global_rt_setup->operator[]("control_iteration")); + params.species_epsilon = + Rcpp::as>(global_rt_setup->operator[]("species_epsilon")); } catch (const std::exception &e) { ERRMSG("Error while parsing R scripts: " + std::string(e.what())); @@ -277,7 +281,7 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) { *global_rt_setup = R["setup"]; } -static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, +static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, DiffusionModule &diffusion, ChemistryModule &chem) { @@ -291,8 +295,12 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, R["TMP_PROPS"] = Rcpp::wrap(chem.getField().GetProps()); /* SIMULATION LOOP */ + double dSimTime{0}; for (uint32_t iter = 1; iter < maxiter + 1; iter++) { + + params.control_iteration_active = (iter % params.control_iteration == 0); + double start_t = MPI_Wtime(); const double &dt = params.timesteps[iter - 1]; @@ -308,6 +316,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, /* run transport */ diffusion.simulate(dt); + chem.runtime_params = ¶ms; + chem.getField().update(diffusion.getField()); // MSG("Chemistry start"); diff --git a/src/poet.hpp.in b/src/poet.hpp.in index 9462f6d7e..c08199b6d 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -41,6 +41,7 @@ static const inline std::string r_runtime_parameters = "mysetup"; struct RuntimeParameters { std::string out_dir; std::vector timesteps; + std::vector species_epsilon; Rcpp::List init_params; @@ -51,6 +52,9 @@ struct RuntimeParameters { bool print_progress = false; + bool control_iteration_active = false; + std::uint32_t control_iteration = 25; + static constexpr std::uint32_t WORK_PACKAGE_SIZE_DEFAULT = 32; std::uint32_t work_package_size = WORK_PACKAGE_SIZE_DEFAULT; From 3500a24b4b0118bff598ec081fb2dbe3ecfba754 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 2 Jul 2025 13:44:49 +0200 Subject: [PATCH 02/19] feat: Implement checkpointing Co-authored-by: hmars-t --- src/CMakeLists.txt | 31 +++++++++++++++++++++---------- src/IO/Datatypes.hpp | 9 +++++++++ src/IO/HDF5Functions.hpp | 8 ++++++++ src/IO/checkpoint.cpp | 28 ++++++++++++++++++++++++++++ src/poet.cpp | 14 ++++++++++++++ 5 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 src/IO/Datatypes.hpp create mode 100644 src/IO/HDF5Functions.hpp create mode 100644 src/IO/checkpoint.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1886a9f43..89790649b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,8 +1,27 @@ +include(FetchContent) +FetchContent_Declare( + cli11 + QUIET + GIT_REPOSITORY https://github.com/CLIUtils/CLI11.git + GIT_TAG v2.4.2 +) + +FetchContent_Declare( + highfive + QUIET + GIT_REPOSITORY https://github.com/highfive-devs/highfive.git + GIT_TAG v3.0.0 +) + +FetchContent_MakeAvailable(cli11) +FetchContent_MakeAvailable(highfive) + add_library(POETLib Init/InitialList.cpp Init/GridInit.cpp Init/DiffusionInit.cpp Init/ChemistryInit.cpp + IO/checkpoint.cpp DataStructures/Field.cpp Transport/DiffusionModule.cpp Chemistry/ChemistryModule.cpp @@ -30,18 +49,10 @@ target_link_libraries( PUBLIC RRuntime PUBLIC IPhreeqcPOET PUBLIC tug - PUBLIC MPI::MPI_C + PUBLIC MPI::MPI_C + PUBLIC HighFive::HighFive ) -include(FetchContent) -FetchContent_Declare( - cli11 - QUIET - GIT_REPOSITORY https://github.com/CLIUtils/CLI11.git - GIT_TAG v2.4.2 -) - -FetchContent_MakeAvailable(cli11) # add_library(poetlib # Base/Grid.cpp diff --git a/src/IO/Datatypes.hpp b/src/IO/Datatypes.hpp new file mode 100644 index 000000000..c20fb3b79 --- /dev/null +++ b/src/IO/Datatypes.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include +#include + +struct Checkpoint_s { + poet::Field &field; + uint32_t iteration; +}; \ No newline at end of file diff --git a/src/IO/HDF5Functions.hpp b/src/IO/HDF5Functions.hpp new file mode 100644 index 000000000..e7954a2f7 --- /dev/null +++ b/src/IO/HDF5Functions.hpp @@ -0,0 +1,8 @@ +#pragma once + +#include +#include "Datatypes.hpp" + +int write_checkpoint(const std::string &file_path, struct Checkpoint_s &&checkpoint); + +int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint); \ No newline at end of file diff --git a/src/IO/checkpoint.cpp b/src/IO/checkpoint.cpp new file mode 100644 index 000000000..f197c22f5 --- /dev/null +++ b/src/IO/checkpoint.cpp @@ -0,0 +1,28 @@ +#include "IO/Datatypes.hpp" +#include +#include + +int write_checkpoint(const std::string &file_path, struct Checkpoint_s &&checkpoint){ + + // TODO: errorhandling + H5Easy::File file(file_path, H5Easy::File::Overwrite); + + + H5Easy::dump(file, "/MetaParam/Iterations", checkpoint.iteration); + H5Easy::dump(file, "/Grid/Names", checkpoint.field.GetProps()); + H5Easy::dump(file, "/Grid/Chemistry", checkpoint.field.As2DVector()); + + return 0; +} + +int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint){ + + H5Easy::File file(file_path, H5Easy::File::ReadOnly); + + checkpoint.iteration = H5Easy::load(file, "/MetaParam/Iterations"); + + checkpoint.field = H5Easy::load>>(file, "/Grid/Chemistry"); + + return 0; +} + diff --git a/src/poet.cpp b/src/poet.cpp index 87d5a2796..408bb56ff 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -26,6 +26,8 @@ #include "CLI/CLI.hpp" #include "Chemistry/ChemistryModule.hpp" #include "DataStructures/Field.hpp" +#include "IO/Datatypes.hpp" +#include "IO/HDF5Functions.hpp" #include "Init/InitialList.hpp" #include "Transport/DiffusionModule.hpp" @@ -403,6 +405,18 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, // store_result is TRUE) call_master_iter_end(R, diffusion.getField(), chem.getField()); + // TODO: write checkpoint + // checkpoint struct --> field and iteration + + if (iter == 1) { + write_checkpoint("checkpoint1.hdf5", + {.field = chem.getField(), .iteration = iter}); + } else if (iter == 2) { + Checkpoint_s checkpoint_read{.field = chem.getField()}; + read_checkpoint("checkpoint1.hdf5", checkpoint_read); + iter = checkpoint_read.iteration; + } + diffusion.getField().update(chem.getField()); MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + From 858da525d78524f6769750986fab7add65664313 Mon Sep 17 00:00:00 2001 From: rastogi Date: Mon, 1 Sep 2025 12:39:01 +0200 Subject: [PATCH 03/19] computeStats not working correctly, Unit Tests added --- CMakeLists.txt | 4 +- src/CMakeLists.txt | 1 + src/Chemistry/ChemistryModule.hpp | 768 ++++++++++++++++-------------- src/Chemistry/MasterFunctions.cpp | 128 +++-- src/Chemistry/WorkerFunctions.cpp | 81 +--- src/IO/HDF5Functions.hpp | 5 +- src/IO/StatsIO.cpp | 37 ++ src/IO/StatsIO.hpp | 9 + src/poet.cpp | 143 ++++-- src/poet.hpp.in | 2 +- test/CMakeLists.txt | 2 +- test/testStats.cpp | 119 +++++ 12 files changed, 789 insertions(+), 510 deletions(-) create mode 100644 src/IO/StatsIO.cpp create mode 100644 src/IO/StatsIO.hpp create mode 100644 test/testStats.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fa7f009a1..1550f0962 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,12 +28,12 @@ if (POET_PREPROCESS_BENCHS) endif() # as tug will also pull in doctest as a dependency -set(TUG_ENABLE_TESTING OFF CACHE BOOL "" FORCE) +set(TUG_ENABLE_TESTING ON CACHE BOOL "" FORCE) add_subdirectory(ext/tug EXCLUDE_FROM_ALL) add_subdirectory(ext/iphreeqc EXCLUDE_FROM_ALL) -option(POET_ENABLE_TESTING "Build test suite for POET" OFF) +option(POET_ENABLE_TESTING "Build test suite for POET" ON) if (POET_ENABLE_TESTING) add_subdirectory(test) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89790649b..a9849a768 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,7 @@ add_library(POETLib Init/DiffusionInit.cpp Init/ChemistryInit.cpp IO/checkpoint.cpp + IO/StatsIO.cpp DataStructures/Field.cpp Transport/DiffusionModule.cpp Chemistry/ChemistryModule.cpp diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index 22547a212..cc78ede87 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -23,392 +23,432 @@ #include #include -namespace poet { -/** - * \brief Wrapper around PhreeqcRM to provide POET specific parallelization with - * easy access. - */ -class ChemistryModule { -public: +namespace poet +{ /** - * Creates a new instance of Chemistry module with given grid cell count, work - * package size and communicator. - * - * This constructor shall only be called by the master. To create workers, see - * ChemistryModule::createWorker . - * - * When the use of parallelization is intended, the nxyz value shall be set to - * 1 to save memory and only one node is needed for initialization. - * - * \param nxyz Count of grid cells to allocate and initialize for each - * process. For parellel use set to 1 at the master. - * \param wp_size Count of grid cells to fill each work package at maximum. - * \param communicator MPI communicator to distribute work in. + * \brief Wrapper around PhreeqcRM to provide POET specific parallelization with + * easy access. */ - ChemistryModule(uint32_t wp_size, - const InitialList::ChemistryInit chem_params, - MPI_Comm communicator); + class ChemistryModule + { + public: + /** + * Creates a new instance of Chemistry module with given grid cell count, work + * package size and communicator. + * + * This constructor shall only be called by the master. To create workers, see + * ChemistryModule::createWorker . + * + * When the use of parallelization is intended, the nxyz value shall be set to + * 1 to save memory and only one node is needed for initialization. + * + * \param nxyz Count of grid cells to allocate and initialize for each + * process. For parellel use set to 1 at the master. + * \param wp_size Count of grid cells to fill each work package at maximum. + * \param communicator MPI communicator to distribute work in. + */ + ChemistryModule(uint32_t wp_size, + const InitialList::ChemistryInit chem_params, + MPI_Comm communicator); - /** - * Deconstructor, which frees DHT data structure if used. - */ - ~ChemistryModule(); + /** + * Deconstructor, which frees DHT data structure if used. + */ + ~ChemistryModule(); - void masterSetField(Field field); - /** - * Run the chemical simulation with parameters set. - */ - void simulate(double dt); + void masterSetField(Field field); + /** + * Run the chemical simulation with parameters set. + */ + void simulate(double dt); - /** - * Returns all known species names, including not only aqueous species, but - * also equilibrium, exchange, surface and kinetic reactants. - */ - // auto GetPropNames() const { return this->prop_names; } + /** + * Returns all known species names, including not only aqueous species, but + * also equilibrium, exchange, surface and kinetic reactants. + */ + // auto GetPropNames() const { return this->prop_names; } - /** - * Return the accumulated runtime in seconds for chemical simulation. - */ - auto GetChemistryTime() const { return this->chem_t; } + /** + * Return the accumulated runtime in seconds for chemical simulation. + */ + auto GetChemistryTime() const { return this->chem_t; } - void setFilePadding(std::uint32_t maxiter) { - this->file_pad = - static_cast(std::ceil(std::log10(maxiter + 1))); - } + void setFilePadding(std::uint32_t maxiter) + { + this->file_pad = + static_cast(std::ceil(std::log10(maxiter + 1))); + } - struct SurrogateSetup { - std::vector prop_names; - std::array base_totals; - bool has_het_ids; + struct SurrogateSetup + { + std::vector prop_names; + std::array base_totals; + bool has_het_ids; - bool dht_enabled; - std::uint32_t dht_size_mb; - int dht_snaps; - std::string dht_out_dir; + bool dht_enabled; + std::uint32_t dht_size_mb; + int dht_snaps; + std::string dht_out_dir; - bool interp_enabled; - std::uint32_t interp_bucket_size; - std::uint32_t interp_size_mb; - std::uint32_t interp_min_entries; - bool ai_surrogate_enabled; - }; + bool interp_enabled; + std::uint32_t interp_bucket_size; + std::uint32_t interp_size_mb; + std::uint32_t interp_min_entries; + bool ai_surrogate_enabled; + }; - void masterEnableSurrogates(const SurrogateSetup &setup) { - // FIXME: This is a hack to get the prop_names and prop_count from the setup - this->prop_names = setup.prop_names; - this->prop_count = setup.prop_names.size(); + void masterEnableSurrogates(const SurrogateSetup &setup) + { + // FIXME: This is a hack to get the prop_names and prop_count from the setup + this->prop_names = setup.prop_names; + this->prop_count = setup.prop_names.size(); - this->dht_enabled = setup.dht_enabled; - this->interp_enabled = setup.interp_enabled; - this->ai_surrogate_enabled = setup.ai_surrogate_enabled; + this->dht_enabled = setup.dht_enabled; + this->interp_enabled = setup.interp_enabled; + this->ai_surrogate_enabled = setup.ai_surrogate_enabled; - this->base_totals = setup.base_totals; + this->base_totals = setup.base_totals; - if (this->dht_enabled || this->interp_enabled) { - this->initializeDHT(setup.dht_size_mb, this->params.dht_species, - setup.has_het_ids); + if (this->dht_enabled || this->interp_enabled) + { + this->initializeDHT(setup.dht_size_mb, this->params.dht_species, + setup.has_het_ids); - if (setup.dht_snaps != DHT_SNAPS_DISABLED) { - this->setDHTSnapshots(setup.dht_snaps, setup.dht_out_dir); + if (setup.dht_snaps != DHT_SNAPS_DISABLED) + { + this->setDHTSnapshots(setup.dht_snaps, setup.dht_out_dir); + } + } + + if (this->interp_enabled) + { + this->initializeInterp(setup.interp_bucket_size, setup.interp_size_mb, + setup.interp_min_entries, + this->params.interp_species); } } - if (this->interp_enabled) { - this->initializeInterp(setup.interp_bucket_size, setup.interp_size_mb, - setup.interp_min_entries, - this->params.interp_species); + /** + * Intended to alias input parameters for grid initialization with a single + * value per species. + */ + using SingleCMap = std::unordered_map; + + /** + * Intended to alias input parameters for grid initialization with mutlitple + * values per species. + */ + using VectorCMap = std::unordered_map>; + + /** + * Enumerating DHT file options + */ + enum + { + DHT_SNAPS_DISABLED = 0, //!< disabled file output + DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation + DHT_SNAPS_ITEREND //!< output snapshots after each iteration + }; + + /** + * **Only called by workers!** Start the worker listening loop. + */ + void WorkerLoop(); + + /** + * **Called by master** Advise the workers to break the loop. + */ + void MasterLoopBreak(); + + /** + * **Master only** Return count of grid cells. + */ + auto GetNCells() const { return this->n_cells; } + /** + * **Master only** Return work package size. + */ + auto GetWPSize() const { return this->wp_size; } + /** + * **Master only** Return the time in seconds the master spent waiting for any + * free worker. + */ + auto GetMasterIdleTime() const { return this->idle_t; } + /** + * **Master only** Return the time in seconds the master spent in sequential + * part of the simulation, including times for shuffling/unshuffling field + * etc. + */ + auto GetMasterSequentialTime() const { return this->seq_t; } + /** + * **Master only** Return the time in seconds the master spent in the + * send/receive loop. + */ + auto GetMasterLoopTime() const { return this->send_recv_t; } + + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to run Phreeqc simulation. + * + * \return Vector of all accumulated Phreeqc timings. + */ + std::vector GetWorkerPhreeqcTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to get values from the DHT. + * + * \return Vector of all accumulated DHT get times. + */ + std::vector GetWorkerDHTGetTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to write values to the DHT. + * + * \return Vector of all accumulated DHT fill times. + */ + std::vector GetWorkerDHTFillTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers waiting for work packages from the master. + * + * \return Vector of all accumulated waiting times. + */ + std::vector GetWorkerIdleTimings() const; + + /** + * **Master only** Collect and return DHT hits of all workers. + * + * \return Vector of all count of DHT hits. + */ + std::vector GetWorkerDHTHits() const; + + /** + * **Master only** Collect and return DHT evictions of all workers. + * + * \return Vector of all count of DHT evictions. + */ + std::vector GetWorkerDHTEvictions() const; + + /** + * **Master only** Returns the current state of the chemical field. + * + * \return Reference to the chemical field. + */ + Field &getField() { return this->chem_field; } + + /** + * **Master only** Enable/disable progress bar. + * + * \param enabled True if print progressbar, false if not. + */ + void setProgressBarPrintout(bool enabled) + { + this->print_progessbar = enabled; + }; + + /** + * **Master only** Set the ai surrogate validity vector from R + */ + void set_ai_surrogate_validity_vector(std::vector r_vector); + + std::vector GetWorkerInterpolationCalls() const; + + std::vector GetWorkerInterpolationWriteTimings() const; + std::vector GetWorkerInterpolationReadTimings() const; + std::vector GetWorkerInterpolationGatherTimings() const; + std::vector GetWorkerInterpolationFunctionCallTimings() const; + + std::vector GetWorkerPHTCacheHits() const; + + std::vector ai_surrogate_validity_vector; + + RuntimeParameters *runtime_params = nullptr; + uint32_t control_iteration_counter = 0; + + struct error_stats + { + std::vector mape; + std::vector rrsme; + uint32_t iteration; + + error_stats(size_t species_count, size_t iter) + : mape(species_count, 0.0), rrsme(species_count, 0.0), iteration(iter) {} + }; + + std::vector error_stats_history; + + static void computeStats(const std::vector &pqc_vector, + const std::vector &sur_vector, + uint32_t size_per_prop, uint32_t species_count, + error_stats &stats); + + protected: + void initializeDHT(uint32_t size_mb, + const NamedVector &key_species, + bool has_het_ids); + void setDHTSnapshots(int type, const std::string &out_dir); + void setDHTReadFile(const std::string &input_file); + + void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, + std::uint32_t min_entries, + const NamedVector &key_species); + + enum + { + CHEM_FIELD_INIT, + CHEM_DHT_ENABLE, + CHEM_DHT_SIGNIF_VEC, + CHEM_DHT_SNAPS, + CHEM_DHT_READ_FILE, + CHEM_IP_ENABLE, + CHEM_IP_MIN_ENTRIES, + CHEM_IP_SIGNIF_VEC, + CHEM_WORK_LOOP, + CHEM_PERF, + CHEM_BREAK_MAIN_LOOP, + CHEM_AI_BCAST_VALIDITY + }; + + enum + { + LOOP_WORK, + LOOP_END, + LOOP_CTRL + }; + + enum + { + WORKER_PHREEQC, + WORKER_DHT_GET, + WORKER_DHT_FILL, + WORKER_IDLE, + WORKER_IP_WRITE, + WORKER_IP_READ, + WORKER_IP_GATHER, + WORKER_IP_FC, + WORKER_DHT_HITS, + WORKER_DHT_EVICTIONS, + WORKER_PHT_CACHE_HITS, + WORKER_IP_CALLS + }; + + std::vector interp_calls; + std::vector dht_hits; + std::vector dht_evictions; + + struct worker_s + { + double phreeqc_t = 0.; + double dht_get = 0.; + double dht_fill = 0.; + double idle_t = 0.; + }; + + struct worker_info_s + { + char has_work = 0; + double *send_addr; + }; + + using worker_list_t = std::vector; + using workpointer_t = std::vector::iterator; + + void MasterRunParallel(double dt); + void MasterRunSequential(); + + void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, + int &pkg_to_send, int &count_pkgs, int &free_workers, + double dt, uint32_t iteration, uint32_t control_iteration, + const std::vector &wp_sizes_vector); + void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, + int &free_workers); + + std::vector MasterGatherWorkerTimings(int type) const; + std::vector MasterGatherWorkerMetrics(int type) const; + + void WorkerProcessPkgs(struct worker_s &timings, uint32_t &iteration); + + void WorkerDoWork(MPI_Status &probe_status, int double_count, + struct worker_s &timings); + void WorkerPostIter(MPI_Status &prope_status, uint32_t iteration); + void WorkerPostSim(uint32_t iteration); + + void WorkerWriteDHTDump(uint32_t iteration); + void WorkerReadDHTDump(const std::string &dht_input_file); + + void WorkerPerfToMaster(int type, const struct worker_s &timings); + void WorkerMetricsToMaster(int type); + + void WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, + double dTimestep); + + std::vector CalculateWPSizesVector(uint32_t n_cells, + uint32_t wp_size) const; + std::vector shuffleField(const std::vector &in_field, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count); + void unshuffleField(const std::vector &in_buffer, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count, std::vector &out_field); + std::vector + parseDHTSpeciesVec(const NamedVector &key_species, + const std::vector &to_compare) const; + + void BCastStringVec(std::vector &io); + + int comm_size, comm_rank; + MPI_Comm group_comm; + + bool is_sequential; + bool is_master; + + uint32_t wp_size; + bool dht_enabled{false}; + int dht_snaps_type{DHT_SNAPS_DISABLED}; + std::string dht_file_out_dir; + + poet::DHT_Wrapper *dht = nullptr; + + bool interp_enabled{false}; + std::unique_ptr interp; + + bool ai_surrogate_enabled{false}; + + static constexpr uint32_t BUFFER_OFFSET = 6; + + inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const + { + MPI_Bcast(buf, count, datatype, 0, this->group_comm); } - } - /** - * Intended to alias input parameters for grid initialization with a single - * value per species. - */ - using SingleCMap = std::unordered_map; - - /** - * Intended to alias input parameters for grid initialization with mutlitple - * values per species. - */ - using VectorCMap = std::unordered_map>; - - /** - * Enumerating DHT file options - */ - enum { - DHT_SNAPS_DISABLED = 0, //!< disabled file output - DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation - DHT_SNAPS_ITEREND //!< output snapshots after each iteration - }; - - /** - * **Only called by workers!** Start the worker listening loop. - */ - void WorkerLoop(); - - /** - * **Called by master** Advise the workers to break the loop. - */ - void MasterLoopBreak(); - - /** - * **Master only** Return count of grid cells. - */ - auto GetNCells() const { return this->n_cells; } - /** - * **Master only** Return work package size. - */ - auto GetWPSize() const { return this->wp_size; } - /** - * **Master only** Return the time in seconds the master spent waiting for any - * free worker. - */ - auto GetMasterIdleTime() const { return this->idle_t; } - /** - * **Master only** Return the time in seconds the master spent in sequential - * part of the simulation, including times for shuffling/unshuffling field - * etc. - */ - auto GetMasterSequentialTime() const { return this->seq_t; } - /** - * **Master only** Return the time in seconds the master spent in the - * send/receive loop. - */ - auto GetMasterLoopTime() const { return this->send_recv_t; } - - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to run Phreeqc simulation. - * - * \return Vector of all accumulated Phreeqc timings. - */ - std::vector GetWorkerPhreeqcTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to get values from the DHT. - * - * \return Vector of all accumulated DHT get times. - */ - std::vector GetWorkerDHTGetTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to write values to the DHT. - * - * \return Vector of all accumulated DHT fill times. - */ - std::vector GetWorkerDHTFillTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers waiting for work packages from the master. - * - * \return Vector of all accumulated waiting times. - */ - std::vector GetWorkerIdleTimings() const; - - /** - * **Master only** Collect and return DHT hits of all workers. - * - * \return Vector of all count of DHT hits. - */ - std::vector GetWorkerDHTHits() const; - - /** - * **Master only** Collect and return DHT evictions of all workers. - * - * \return Vector of all count of DHT evictions. - */ - std::vector GetWorkerDHTEvictions() const; - - /** - * **Master only** Returns the current state of the chemical field. - * - * \return Reference to the chemical field. - */ - Field &getField() { return this->chem_field; } - - /** - * **Master only** Enable/disable progress bar. - * - * \param enabled True if print progressbar, false if not. - */ - void setProgressBarPrintout(bool enabled) { - this->print_progessbar = enabled; - }; - - /** - * **Master only** Set the ai surrogate validity vector from R - */ - void set_ai_surrogate_validity_vector(std::vector r_vector); - - std::vector GetWorkerInterpolationCalls() const; - - std::vector GetWorkerInterpolationWriteTimings() const; - std::vector GetWorkerInterpolationReadTimings() const; - std::vector GetWorkerInterpolationGatherTimings() const; - std::vector GetWorkerInterpolationFunctionCallTimings() const; - - std::vector GetWorkerPHTCacheHits() const; - - std::vector ai_surrogate_validity_vector; - - RuntimeParameters *runtime_params = nullptr; - -protected: - void initializeDHT(uint32_t size_mb, - const NamedVector &key_species, - bool has_het_ids); - void setDHTSnapshots(int type, const std::string &out_dir); - void setDHTReadFile(const std::string &input_file); - - void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, - std::uint32_t min_entries, - const NamedVector &key_species); - - enum { - CHEM_FIELD_INIT, - CHEM_DHT_ENABLE, - CHEM_DHT_SIGNIF_VEC, - CHEM_DHT_SNAPS, - CHEM_DHT_READ_FILE, - CHEM_IP_ENABLE, - CHEM_IP_MIN_ENTRIES, - CHEM_IP_SIGNIF_VEC, - CHEM_WORK_LOOP, - CHEM_PERF, - CHEM_BREAK_MAIN_LOOP, - CHEM_AI_BCAST_VALIDITY - }; - - enum { LOOP_WORK, LOOP_END, WITH_REL_ERROR }; - - enum { - WORKER_PHREEQC, - WORKER_DHT_GET, - WORKER_DHT_FILL, - WORKER_IDLE, - WORKER_IP_WRITE, - WORKER_IP_READ, - WORKER_IP_GATHER, - WORKER_IP_FC, - WORKER_DHT_HITS, - WORKER_DHT_EVICTIONS, - WORKER_PHT_CACHE_HITS, - WORKER_IP_CALLS - }; - - std::vector interp_calls; - std::vector dht_hits; - std::vector dht_evictions; - - struct worker_s { - double phreeqc_t = 0.; - double dht_get = 0.; - double dht_fill = 0.; + inline void PropagateFunctionType(int &type) const + { + ChemBCast(&type, 1, MPI_INT); + } + double simtime = 0.; double idle_t = 0.; + double seq_t = 0.; + double send_recv_t = 0.; + + std::array base_totals{0}; + + bool print_progessbar{false}; + + std::uint8_t file_pad{1}; + + double chem_t{0.}; + + uint32_t n_cells = 0; + uint32_t prop_count = 0; + std::vector prop_names; + + Field chem_field; + + const InitialList::ChemistryInit params; + + std::unique_ptr pqc_runner; + + std::vector sur_shuffled; }; - - struct worker_info_s { - char has_work = 0; - double *send_addr; - }; - - using worker_list_t = std::vector; - using workpointer_t = std::vector::iterator; - - void MasterRunParallel(double dt); - void MasterRunSequential(); - - void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, - int &pkg_to_send, int &count_pkgs, int &free_workers, - double dt, uint32_t iteration, uint32_t control_iter, - const std::vector &wp_sizes_vector); - void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, - int &free_workers); - - std::vector MasterGatherWorkerTimings(int type) const; - std::vector MasterGatherWorkerMetrics(int type) const; - - void WorkerProcessPkgs(struct worker_s &timings, uint32_t &iteration); - - void WorkerDoWork(MPI_Status &probe_status, int double_count, - struct worker_s &timings); - void WorkerPostIter(MPI_Status &prope_status, uint32_t iteration); - void WorkerPostSim(uint32_t iteration); - - void WorkerWriteDHTDump(uint32_t iteration); - void WorkerReadDHTDump(const std::string &dht_input_file); - - void WorkerPerfToMaster(int type, const struct worker_s &timings); - void WorkerMetricsToMaster(int type); - - void WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, - double dTimestep); - - std::vector CalculateWPSizesVector(uint32_t n_cells, - uint32_t wp_size) const; - - std::vector shuffleField(const std::vector &in_field, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count); - void unshuffleField(const std::vector &in_buffer, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count, std::vector &out_field); - std::vector - parseDHTSpeciesVec(const NamedVector &key_species, - const std::vector &to_compare) const; - - void BCastStringVec(std::vector &io); - - int comm_size, comm_rank; - MPI_Comm group_comm; - - bool is_sequential; - bool is_master; - - uint32_t wp_size; - bool dht_enabled{false}; - int dht_snaps_type{DHT_SNAPS_DISABLED}; - std::string dht_file_out_dir; - - poet::DHT_Wrapper *dht = nullptr; - - bool interp_enabled{false}; - std::unique_ptr interp; - - bool ai_surrogate_enabled{false}; - - static constexpr uint32_t BUFFER_OFFSET = 6; - - inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { - MPI_Bcast(buf, count, datatype, 0, this->group_comm); - } - - inline void PropagateFunctionType(int &type) const { - ChemBCast(&type, 1, MPI_INT); - } - double simtime = 0.; - double idle_t = 0.; - double seq_t = 0.; - double send_recv_t = 0.; - - std::array base_totals{0}; - - bool print_progessbar{false}; - - std::uint8_t file_pad{1}; - - double chem_t{0.}; - - uint32_t n_cells = 0; - uint32_t prop_count = 0; - std::vector prop_names; - - Field chem_field; - - const InitialList::ChemistryInit params; - - std::unique_ptr pqc_runner; -}; } // namespace poet #endif // CHEMISTRYMODULE_H_ diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 63858c530..c1ce0b75e 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -172,6 +173,61 @@ std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const return ret; } +void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, + const std::vector &sur_vector, + uint32_t size_per_prop, uint32_t species_count, + error_stats &stats) +{ + for (uint32_t i = 0; i < species_count; i++) + { + double err_sum = 0.0; + double sqr_err_sum = 0.0; + int count = 0; + + for (uint32_t j = 0; j < size_per_prop; j++) + { + + double pqc_value = pqc_vector[i * size_per_prop + j]; + double sur_value = sur_vector[i * size_per_prop + j]; + +if (i == 0 && (j % 10000 == 0)) { + std::cout << "i=" << i << ", j=" << j + << ", idx=" << i * size_per_prop + j + << ", pqc=" << pqc_value + << ", sur=" << sur_value + << std::endl; +} + + if (pqc_value != 0) + { + double rel_err = (pqc_value - sur_value) / pqc_value; + err_sum += std::abs(rel_err); + sqr_err_sum += rel_err * rel_err; + count++; + } + if (pqc_value == 0 && sur_value != 0) + { + err_sum += 1.0; + sqr_err_sum += 1.0; + count++; + } + + if (pqc_value == 0 && sur_value == 0) + { + } + + // else: both cases are zero, skip (no error) + } + if (i == 0) + { + std::cout << "computeStats, i==0, err_sum: " << err_sum << std::endl; + std::cout << "computeStats, i==0, sqr_err_sum: " << sqr_err_sum << std::endl; + } + stats.mape[i] = (count > 0) ? (100.0 / count) * err_sum : 0.0; + stats.rrsme[i] = (count > 0) ? std::sqrt(sqr_err_sum / count) : 0.0; + } + +} inline std::vector shuffleVector(const std::vector &in_vector, uint32_t size_per_prop, @@ -355,30 +411,21 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, pkg_to_recv -= 1; free_workers++; } - if (probe_status.MPI_TAG == WITH_REL_ERROR) + if (probe_status.MPI_TAG == LOOP_CTRL) { MPI_Get_count(&probe_status, MPI_DOUBLE, &size); - std::cout << "[Master] Probed rel error from worker " << p - << ", size = " << size << std::endl; + // layout of buffer is [phreeqc][surrogate] + std::vector recv_buffer(size); - int half = size/2; - - std::vector rel_err_buffer(size); - std::vector rel_error(half); - MPI_Recv(rel_err_buffer.data(), size, MPI_DOUBLE, p, WITH_REL_ERROR, + MPI_Recv(recv_buffer.data(), size, MPI_DOUBLE, p, LOOP_CTRL, this->group_comm, MPI_STATUS_IGNORE); - + std::copy(recv_buffer.begin(), recv_buffer.begin() + (size / 2), + w_list[p - 1].send_addr); - std::copy(rel_err_buffer.begin(), rel_err_buffer.begin() + half, - w_list[p - 1].send_addr); - - std::copy(rel_err_buffer.begin() + half, rel_err_buffer.end(), rel_error.begin()); - - std::cout << "[Master] Received rel error buffer from worker " << p - << ", first value = " << (rel_err_buffer.empty() ? -1 : rel_err_buffer[0]) - << std::endl; + sur_shuffled.insert(sur_shuffled.end(), recv_buffer.begin() + (size / 2), + recv_buffer.begin() + size); w_list[p - 1].has_work = 0; pkg_to_recv -= 1; @@ -461,12 +508,18 @@ void poet::ChemistryModule::MasterRunParallel(double dt) static uint32_t iteration = 0; uint32_t control_iteration = static_cast(this->runtime_params->control_iteration_active ? 1 : 0); + if (control_iteration) + { + sur_shuffled.clear(); + sur_shuffled.reserve(this->n_cells * this->prop_count); + } /* start time measurement of sequential part */ seq_a = MPI_Wtime(); /* shuffle grid */ // grid.shuffleAndExport(mpi_buffer); + std::vector mpi_buffer = shuffleField(chem_field.AsVector(), this->n_cells, this->prop_count, wp_sizes_vector.size()); @@ -507,30 +560,6 @@ void poet::ChemistryModule::MasterRunParallel(double dt) // ... and try to receive them from workers who has finished their work MasterRecvPkgs(worker_list, pkg_to_recv, pkg_to_send > 0, free_workers); } - // to do: Statistik - - /* if control_iteration_active is true receive rel. error data and compare with epsilon */ - if (this->runtime_params->control_iteration_active) - { - - // do Statistik - /** - int rel_err_offset = size / 2; // or calculate as needed - - for (std::size_t ep_i = 0; ep_i < this->runtime_params->species_epsilon.size(); ep_i++) - { - if (rel_err_buffer[rel_err_offset + ep_i] > this->runtime_params->species_epsilon[ep_i]) - { - std::cout << "[Master] At least one relative error exceeded epsilon threshold!" - << std::endl; - std::cout << "value: " << rel_err_buffer[rel_err_offset + ep_i] << " epsilon: " - << this->runtime_params->species_epsilon[ep_i] << std::endl; - break; - } - } - */ - } - // Just to complete the progressbar std::cout << std::endl; @@ -551,6 +580,23 @@ void poet::ChemistryModule::MasterRunParallel(double dt) /* do master stuff */ + if (control_iteration) + { + control_iteration_counter++; + + std::vector sur_unshuffled{sur_shuffled}; + + unshuffleField(sur_shuffled, this->n_cells, this->prop_count, + wp_sizes_vector.size(), sur_unshuffled); + + error_stats stats(this->prop_count, control_iteration_counter * runtime_params->control_iteration); + + computeStats(out_vec, sur_unshuffled, this->n_cells, this->prop_count, stats); + error_stats_history.push_back(stats); + + // to do: control values to epsilon + } + /* start time measurement of master chemistry */ sim_e_chemistry = MPI_Wtime(); diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index a45a5bc23..1b35387c1 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -238,77 +238,48 @@ namespace poet phreeqc_time_end = MPI_Wtime(); if (control_iteration_active) - { - // increase size for relative error - std::size_t rel_error_size = s_curr_wp.size * this->prop_count; - - // extend mpi_buffer, for rel. error for every species - mpi_buffer.resize(count + rel_error_size); - std::size_t offset = count; - count += rel_error_size; - - // calc rel. error if phreeqc != surrogate + { + std::size_t sur_wp_offset = s_curr_wp.size * this->prop_count; + + mpi_buffer.resize(count + sur_wp_offset); + for (std::size_t wp_i = 0; wp_i < s_curr_wp_pqc.size; wp_i++) { - const auto &surrogate_result = s_curr_wp.output[wp_i]; - const auto &phreeqc_result = s_curr_wp_pqc.output[wp_i]; - - // std::cout << "surrogate_result.size() " << surrogate_result.size() << ", phreeqc_result " << phreeqc_result.size() << std::endl; - - // fill NaNs - if (surrogate_result.size() == 0) - { - for (std::size_t i = 0; i < this->prop_count; i++) - { - mpi_buffer[offset++] = std::numeric_limits::quiet_NaN(); - } - } - - // compute rel error - if (surrogate_result.size() == phreeqc_result.size()) - { - for (std::size_t i = 0; i < this->prop_count; i++) - { - double ref = phreeqc_result[i]; - double surrogate = surrogate_result[i]; - - if (std::abs(ref) > 1e-9) - { - mpi_buffer[offset++] = std::abs((surrogate - ref) / ref); - } - else - { - mpi_buffer[offset++] = 0.0; - } - } - } + std::copy(s_curr_wp_pqc.output[wp_i].begin(), s_curr_wp_pqc.output[wp_i].end(), + mpi_buffer.begin() + this->prop_count * wp_i); } + + // s_curr_wp only contains the interpolated data + // copy surrogate output after the the pqc output, mpi_buffer[pqc][interp] + + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) + { + std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), + mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i); + } + + count += sur_wp_offset; } - - poet::WorkPackage &s_curr_wp_copy = control_iteration_active ? s_curr_wp_pqc : s_curr_wp; - - for (std::size_t wp_i = 0; wp_i < s_curr_wp_copy.size; wp_i++) + else { - std::copy(s_curr_wp_copy.output[wp_i].begin(), s_curr_wp_copy.output[wp_i].end(), - mpi_buffer.begin() + this->prop_count * wp_i); + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) + { + std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), + mpi_buffer.begin() + this->prop_count * wp_i); + } } /* send results to master */ MPI_Request send_req; - int mpi_tag = control_iteration_active ? WITH_REL_ERROR : LOOP_WORK; + int mpi_tag = control_iteration_active ? LOOP_CTRL : LOOP_WORK; MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD, &send_req); - if (control_iteration_active) - { - std::cout << "[Worker " << this->comm_rank << "] Sent results." << std::endl; - } - if (dht_enabled || interp_enabled) { /* write results to DHT */ dht_fill_start = MPI_Wtime(); - dht->fillDHT(s_curr_wp_copy); + dht->fillDHT(control_iteration_active ? s_curr_wp_pqc : s_curr_wp); dht_fill_end = MPI_Wtime(); if (interp_enabled) diff --git a/src/IO/HDF5Functions.hpp b/src/IO/HDF5Functions.hpp index e7954a2f7..87687f2b9 100644 --- a/src/IO/HDF5Functions.hpp +++ b/src/IO/HDF5Functions.hpp @@ -1,8 +1,9 @@ -#pragma once +#pragma once #include #include "Datatypes.hpp" int write_checkpoint(const std::string &file_path, struct Checkpoint_s &&checkpoint); -int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint); \ No newline at end of file +int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint); + diff --git a/src/IO/StatsIO.cpp b/src/IO/StatsIO.cpp new file mode 100644 index 000000000..4312a46dd --- /dev/null +++ b/src/IO/StatsIO.cpp @@ -0,0 +1,37 @@ +#include "IO/StatsIO.hpp" +#include +#include +#include + +namespace poet +{ + void writeStatsToCSV(const std::vector &all_stats, + const std::vector &species_names, + const std::string &filename) + { + std::ofstream out(filename); + if (!out.is_open()) + { + std::cerr << "Could not open " << filename << " !" << std::endl; + return; + } + + // header + out << "Iteration, Species, MAPE, RRSME \n"; + + for (size_t i = 0; i < all_stats.size(); ++i) + { + for (size_t j = 0; j < species_names.size(); ++j) + { + out << all_stats[i].iteration << ",\t" + << species_names[j] << ",\t" + << all_stats[i].mape[j] << ",\t" + << all_stats[i].rrsme[j] << "\n"; + } + out << std::endl; + } + + out.close(); + std::cout << "Stats written to " << filename << "\n"; + } +} // namespace poet \ No newline at end of file diff --git a/src/IO/StatsIO.hpp b/src/IO/StatsIO.hpp new file mode 100644 index 000000000..a865cc64a --- /dev/null +++ b/src/IO/StatsIO.hpp @@ -0,0 +1,9 @@ +#include +#include "Chemistry/ChemistryModule.hpp" + +namespace poet +{ + void writeStatsToCSV(const std::vector &all_stats, + const std::vector &species_names, + const std::string &filename); +} // namespace poet diff --git a/src/poet.cpp b/src/poet.cpp index 408bb56ff..2f39673be 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -28,6 +28,7 @@ #include "DataStructures/Field.hpp" #include "IO/Datatypes.hpp" #include "IO/HDF5Functions.hpp" +#include "IO/StatsIO.hpp" #include "Init/InitialList.hpp" #include "Transport/DiffusionModule.hpp" @@ -67,7 +68,8 @@ static poet::DEFunc ReadRObj_R; static poet::DEFunc SaveRObj_R; static poet::DEFunc source_R; -static void init_global_functions(RInside &R) { +static void init_global_functions(RInside &R) +{ R.parseEval(kin_r_library); master_init_R = DEFunc("master_init"); master_iteration_end_R = DEFunc("master_iteration_end"); @@ -90,9 +92,15 @@ static void init_global_functions(RInside &R) { // R.parseEval("mysetup$state_C <- TMP"); // } -enum ParseRet { PARSER_OK, PARSER_ERROR, PARSER_HELP }; +enum ParseRet +{ + PARSER_OK, + PARSER_ERROR, + PARSER_HELP +}; -int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { +int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) +{ CLI::App app{"POET - Potsdam rEactive Transport simulator"}; @@ -174,9 +182,12 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { "Output directory of the simulation") ->required(); - try { + try + { app.parse(argc, argv); - } catch (const CLI::ParseError &e) { + } + catch (const CLI::ParseError &e) + { app.exit(e); return -1; } @@ -188,14 +199,16 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { if (params.as_qs) params.out_ext = "qs"; - if (MY_RANK == 0) { + if (MY_RANK == 0) + { // MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); MSG("Output format/extension is " + params.out_ext); MSG("Work Package Size: " + std::to_string(params.work_package_size)); MSG("DHT is " + BOOL_PRINT(params.use_dht)); MSG("AI Surrogate is " + BOOL_PRINT(params.use_ai_surrogate)); - if (params.use_dht) { + if (params.use_dht) + { // MSG("DHT strategy is " + std::to_string(simparams.dht_strategy)); // MDL: these should be outdated (?) // MSG("DHT key default digits (ignored if 'signif_vector' is " @@ -209,7 +222,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { // MSG("DHT load file is " + chem_params.dht_file); } - if (params.use_interp) { + if (params.use_interp) + { MSG("PHT interpolation enabled: " + BOOL_PRINT(params.use_interp)); MSG("PHT interp-size = " + std::to_string(params.interp_size)); MSG("PHT interp-min = " + std::to_string(params.interp_min_entries)); @@ -237,7 +251,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { // // log before rounding? // R["dht_log"] = simparams.dht_log; - try { + try + { Rcpp::List init_params_(ReadRObj_R(init_file)); params.init_params = init_params_; @@ -251,12 +266,13 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { params.timesteps = Rcpp::as>(global_rt_setup->operator[]("timesteps")); - params.control_iteration = - Rcpp::as(global_rt_setup->operator[]("control_iteration")); - params.species_epsilon = + params.control_iteration = + Rcpp::as(global_rt_setup->operator[]("control_iteration")); + params.species_epsilon = Rcpp::as>(global_rt_setup->operator[]("species_epsilon")); - - } catch (const std::exception &e) { + } + catch (const std::exception &e) + { ERRMSG("Error while parsing R scripts: " + std::string(e.what())); return ParseRet::PARSER_ERROR; } @@ -266,7 +282,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { // HACK: this is a step back as the order and also the count of fields is // predefined, but it will change in the future -void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) { +void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) +{ R["TMP"] = Rcpp::wrap(trans.AsVector()); R["TMP_PROPS"] = Rcpp::wrap(trans.GetProps()); R.parseEval(std::string("state_T <- setNames(data.frame(matrix(TMP, nrow=" + @@ -285,13 +302,15 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) { static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, DiffusionModule &diffusion, - ChemistryModule &chem) { + ChemistryModule &chem) +{ /* Iteration Count is dynamic, retrieving value from R (is only needed by * master for the following loop) */ uint32_t maxiter = params.timesteps.size(); - if (params.print_progress) { + if (params.print_progress) + { chem.setProgressBarPrintout(true); } R["TMP_PROPS"] = Rcpp::wrap(chem.getField().GetProps()); @@ -299,10 +318,11 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, /* SIMULATION LOOP */ double dSimTime{0}; - for (uint32_t iter = 1; iter < maxiter + 1; iter++) { + for (uint32_t iter = 1; iter < maxiter + 1; iter++) + { + + params.control_iteration_active = (iter % params.control_iteration == 0 && iter != 0); - params.control_iteration_active = (iter % params.control_iteration == 0); - double start_t = MPI_Wtime(); const double &dt = params.timesteps[iter - 1]; @@ -323,7 +343,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.getField().update(diffusion.getField()); // MSG("Chemistry start"); - if (params.use_ai_surrogate) { + if (params.use_ai_surrogate) + { double ai_start_t = MPI_Wtime(); // Save current values from the tug field as predictor for the ai step R["TMP"] = Rcpp::wrap(chem.getField().AsVector()); @@ -374,7 +395,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.simulate(dt); /* AI surrogate iterative training*/ - if (params.use_ai_surrogate) { + if (params.use_ai_surrogate) + { double ai_start_t = MPI_Wtime(); R["TMP"] = Rcpp::wrap(chem.getField().AsVector()); @@ -408,19 +430,32 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, // TODO: write checkpoint // checkpoint struct --> field and iteration - if (iter == 1) { - write_checkpoint("checkpoint1.hdf5", - {.field = chem.getField(), .iteration = iter}); - } else if (iter == 2) { + /*else if (iter == 2) { Checkpoint_s checkpoint_read{.field = chem.getField()}; read_checkpoint("checkpoint1.hdf5", checkpoint_read); iter = checkpoint_read.iteration; - } + }*/ diffusion.getField().update(chem.getField()); MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); + + /* + if (params.control_iteration_active) + { + std::string file_path = "checkpoint" + std::to_string(iter) + ".hdf5"; + write_checkpoint(file_path, + {.field = chem.getField(), .iteration = iter}); + } + + + if (iter % params.control_iteration == 0 && iter != 0) + { + write_checkpoint("checkpoint" + std::to_string(iter) + ".hdf5", + {.field = chem.getField(), .iteration = iter}); + } + */ // MSG(); } // END SIMULATION LOOP @@ -437,7 +472,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, Rcpp::List diffusion_profiling; diffusion_profiling["simtime"] = diffusion.getTransportTime(); - if (params.use_dht) { + if (params.use_dht) + { chem_profiling["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); chem_profiling["dht_evictions"] = Rcpp::wrap(chem.GetWorkerDHTEvictions()); chem_profiling["dht_get_time"] = Rcpp::wrap(chem.GetWorkerDHTGetTimings()); @@ -445,7 +481,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, Rcpp::wrap(chem.GetWorkerDHTFillTimings()); } - if (params.use_interp) { + if (params.use_interp) + { chem_profiling["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); chem_profiling["interp_r"] = @@ -466,11 +503,14 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.MasterLoopBreak(); + writeStatsToCSV(chem.error_stats_history, chem.getField().GetProps(), "stats_overview"); + return profiling; } std::vector getSpeciesNames(const Field &&field, int root, - MPI_Comm comm) { + MPI_Comm comm) +{ std::uint32_t n_elements; std::uint32_t n_string_size; @@ -480,11 +520,13 @@ std::vector getSpeciesNames(const Field &&field, int root, const bool is_master = root == rank; // first, the master sends all the species names iterative - if (is_master) { + if (is_master) + { n_elements = field.GetProps().size(); MPI_Bcast(&n_elements, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); - for (std::uint32_t i = 0; i < n_elements; i++) { + for (std::uint32_t i = 0; i < n_elements; i++) + { n_string_size = field.GetProps()[i].size(); MPI_Bcast(&n_string_size, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); MPI_Bcast(const_cast(field.GetProps()[i].c_str()), n_string_size, @@ -499,7 +541,8 @@ std::vector getSpeciesNames(const Field &&field, int root, std::vector species_names_out(n_elements); - for (std::uint32_t i = 0; i < n_elements; i++) { + for (std::uint32_t i = 0; i < n_elements; i++) + { MPI_Bcast(&n_string_size, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); char recv_buf[n_string_size]; @@ -512,7 +555,8 @@ std::vector getSpeciesNames(const Field &&field, int root, return species_names_out; } -std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) { +std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) +{ std::array base_totals; int rank; @@ -520,7 +564,8 @@ std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) { const bool is_master = root == rank; - if (is_master) { + if (is_master) + { const auto h_col = field["H"]; const auto o_col = field["O"]; @@ -535,7 +580,8 @@ std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) { return base_totals; } -bool getHasID(Field &&field, int root, MPI_Comm comm) { +bool getHasID(Field &&field, int root, MPI_Comm comm) +{ bool has_id; int rank; @@ -543,7 +589,8 @@ bool getHasID(Field &&field, int root, MPI_Comm comm) { const bool is_master = root == rank; - if (is_master) { + if (is_master) + { const auto ID_field = field["ID"]; std::set unique_IDs(ID_field.begin(), ID_field.end()); @@ -560,7 +607,8 @@ bool getHasID(Field &&field, int root, MPI_Comm comm) { return has_id; } -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) +{ int world_size; MPI_Init(&argc, &argv); @@ -571,7 +619,8 @@ int main(int argc, char *argv[]) { RInsidePOET &R = RInsidePOET::getInstance(); - if (MY_RANK == 0) { + if (MY_RANK == 0) + { MSG("Running POET version " + std::string(poet_version)); } @@ -579,7 +628,8 @@ int main(int argc, char *argv[]) { RuntimeParameters run_params; - if (parseInitValues(argc, argv, run_params) != 0) { + if (parseInitValues(argc, argv, run_params) != 0) + { MPI_Finalize(); return 0; } @@ -621,9 +671,12 @@ int main(int argc, char *argv[]) { chemistry.masterEnableSurrogates(surr_setup); - if (MY_RANK > 0) { + if (MY_RANK > 0) + { chemistry.WorkerLoop(); - } else { + } + else + { // R.parseEvalQ("mysetup <- setup"); // // if (MY_RANK == 0) { // get timestep vector from // // grid_init function ... // @@ -637,7 +690,8 @@ int main(int argc, char *argv[]) { R["out_ext"] = run_params.out_ext; R["out_dir"] = run_params.out_dir; - if (run_params.use_ai_surrogate) { + if (run_params.use_ai_surrogate) + { /* Incorporate ai surrogate from R */ R.parseEvalQ(ai_surrogate_r_library); /* Use dht species for model input and output */ @@ -686,7 +740,8 @@ int main(int argc, char *argv[]) { MPI_Finalize(); - if (MY_RANK == 0) { + if (MY_RANK == 0) + { MSG("done, bye!"); } diff --git a/src/poet.hpp.in b/src/poet.hpp.in index c08199b6d..e9fb2acac 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -53,7 +53,7 @@ struct RuntimeParameters { bool print_progress = false; bool control_iteration_active = false; - std::uint32_t control_iteration = 25; + std::uint32_t control_iteration = 1; static constexpr std::uint32_t WORK_PACKAGE_SIZE_DEFAULT = 32; std::uint32_t work_package_size = WORK_PACKAGE_SIZE_DEFAULT; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8bc494158..ddd8ed94a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,7 +12,7 @@ get_filename_component(TEST_RInsideSourceFile "RInsidePOET_funcs.R" REALPATH) configure_file(testDataStructures.hpp.in testDataStructures.hpp) target_include_directories(testPOET PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") -add_custom_target(check +add_custom_target(check_poet COMMAND $ DEPENDS testPOET ) diff --git a/test/testStats.cpp b/test/testStats.cpp new file mode 100644 index 000000000..355d89b0c --- /dev/null +++ b/test/testStats.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include + +#include + +TEST_CASE("Stats calculation") +{ + std::vector real = + { + 2, 2, 2, 2, // species 1 + 2.0, 0.01, 0.7, 0.5, // species 2 + 0.0, 0.0, 0.0, 0.0, // species 3 + 0.0, 0.0, 0.0, 0.0, // species 4 + -2.5, -0.02, -0.7, -0.5, // species 5 + 7.7, 6.01, 4.7, 0.5 // species 6 + }; + + std::vector pred = + { + 2, 2, 2, 2, + 2.0, 0.02, 0.6, 0.5, + 0.1, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, + 2.5, 0.01, 0.6, 0.5, + 2.8, 0.02, 0.7, 0.5 + }; + + poet::ChemistryModule::error_stats stats(6, 5); + poet::ChemistryModule::computeStats(real, pred, /*size_per_prop*/ 4, /*species_count*/ 6, stats); + + SUBCASE("Non-zero values") + { + + // species 1 is ID, should stay 0 + CHECK_EQ(stats.mape[0], 0); + CHECK_EQ(stats.rrsme[0], 0); + + /* + mape species 2 + cell0: |(2.0 - 2.0)/2.0| = 0 + cell1: |(0.01 - 0.02)/0.01| = 1 + cell2: |(0.7 - 0.6)/0.7| = 0.142857143 + cell3: |(0.5 - 0.5)/0.5| = 0 + mape = 1.142857143/ 4 = 0.285714286 *100 + rrsme species 1 + squared err sum = 1.02040816 + rrsme = sqrt(1.02040816/4) = 0.50507627 + */ + + CHECK_EQ(stats.mape[1], doctest::Approx(28.5714286).epsilon(1e-6)); + CHECK_EQ(stats.rrsme[1], doctest::Approx(0.50507627).epsilon(1e-6)); + } + + SUBCASE("Zero-denominator case") + { + /* + mape species 3 + cell0: |(0.0 - 0.1)/0.0| + cell1: |(0.0 - 0.0)/0.0| + cell2: |(0.0 - 0.0)/0.0| + cell3: |(0.0 - 0.0)/0.0| + mape = 1 *100 + rrsme = 1 + */ + + CHECK_EQ(stats.mape[2], 100.0); + CHECK_EQ(stats.rrsme[2], 1.0); + } + + SUBCASE("True and predicted values are zero") + { + /* + mape species 4 + cell0: |(0.0 - 0.0)/0.0| + cell1: |(0.0 - 0.0)/0.0| + cell2: |(0.0 - 0.0)/0.0| + cell3: |(0.0 - 0.0)/0.0| + mape = 0.0 + rrsme = 0.0 + */ + + CHECK_EQ(stats.mape[3], 0.0); + CHECK_EQ(stats.rrsme[3], 0.0); + } + + SUBCASE("Negative values") + { + /* + mape species 5 + cell0: |(-2.5 - 2.5)/-2.5| = 2 + cell1: |(-0.02 - 0.01)/-0.02| = 1.5 + cell2: |(-0.7 - 0.6)/-0.7| = 1.85714286 + cell3: |(-0.5 - 0.5)/-0.5| = 2 + mape = (100.0 / 4) * 7.35714286 = 183.92857143 + rrsme = sqrt(13.6989796 / 4) = 1.85060663 + */ + + CHECK_EQ(stats.mape[4], doctest::Approx(183.92857143).epsilon(1e-6)); + CHECK_EQ(stats.rrsme[4], doctest::Approx(1.85060663).epsilon(1e-6)); + } + + SUBCASE("Large differences") + { + /* + mape species 6 + cell0: |(7.7 - 2.8)/7.7| = 0.63636364 + cell1: |(6.01 - 0.02)/6.01| = 0.99667221 + cell2: |(4.7 - 0.7)/4.7| = 0.85106383 + cell3: |(0.5 - 0.5)/0.5| = 0 + mape = (100.0 / 4) * 2.48409968 = 62.102492 + rrsme = sqrt(2,12262382 / 4) = 0.72846136 + */ + + CHECK_EQ(stats.mape[5], doctest::Approx(62.102492).epsilon(1e-6)); + CHECK_EQ(stats.rrsme[5], doctest::Approx(0.72846136).epsilon(1e-6)); + } +} From 58ad215a18a760747e585df42db51f4ba1275713 Mon Sep 17 00:00:00 2001 From: rastogi Date: Mon, 1 Sep 2025 12:54:24 +0200 Subject: [PATCH 04/19] Initial control-loop setup. Unit tests for MAPE and RMSE added. Note: computeStats function is not working correctly yet. --- src/Chemistry/MasterFunctions.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index c1ce0b75e..59f2e660c 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -190,14 +190,6 @@ void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, double pqc_value = pqc_vector[i * size_per_prop + j]; double sur_value = sur_vector[i * size_per_prop + j]; -if (i == 0 && (j % 10000 == 0)) { - std::cout << "i=" << i << ", j=" << j - << ", idx=" << i * size_per_prop + j - << ", pqc=" << pqc_value - << ", sur=" << sur_value - << std::endl; -} - if (pqc_value != 0) { double rel_err = (pqc_value - sur_value) / pqc_value; @@ -211,11 +203,6 @@ if (i == 0 && (j % 10000 == 0)) { sqr_err_sum += 1.0; count++; } - - if (pqc_value == 0 && sur_value == 0) - { - } - // else: both cases are zero, skip (no error) } if (i == 0) From 7af47b167c10a8a7785fc4e25b268d5250aea48b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Tue, 2 Sep 2025 11:03:33 +0200 Subject: [PATCH 05/19] rename control work package and subsitute interpolated work package --- src/Chemistry/WorkerFunctions.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index 1b35387c1..0cdcb42c5 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -220,20 +220,20 @@ namespace poet /* if control iteration: create copy surrogate results (output and mappings) and then set them to zero, give this to phreeqc */ - poet::WorkPackage s_curr_wp_pqc = s_curr_wp; + poet::WorkPackage s_curr_wp_control = s_curr_wp; if (control_iteration_active) { - for (std::size_t wp_i = 0; wp_i < s_curr_wp_pqc.size; wp_i++) + for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) { - s_curr_wp_pqc.output[wp_i] = std::vector(this->prop_count, 0.0); - s_curr_wp_pqc.mapping[wp_i] = 0; + s_curr_wp_control.output[wp_i] = std::vector(this->prop_count, 0.0); + s_curr_wp_control.mapping[wp_i] = 0; } } phreeqc_time_start = MPI_Wtime(); - WorkerRunWorkPackage(control_iteration_active ? s_curr_wp_pqc : s_curr_wp, current_sim_time, dt); + WorkerRunWorkPackage(control_iteration_active ? s_curr_wp_control : s_curr_wp, current_sim_time, dt); phreeqc_time_end = MPI_Wtime(); @@ -243,9 +243,9 @@ namespace poet mpi_buffer.resize(count + sur_wp_offset); - for (std::size_t wp_i = 0; wp_i < s_curr_wp_pqc.size; wp_i++) + for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) { - std::copy(s_curr_wp_pqc.output[wp_i].begin(), s_curr_wp_pqc.output[wp_i].end(), + std::copy(s_curr_wp_control.output[wp_i].begin(), s_curr_wp_control.output[wp_i].end(), mpi_buffer.begin() + this->prop_count * wp_i); } @@ -253,9 +253,18 @@ namespace poet // copy surrogate output after the the pqc output, mpi_buffer[pqc][interp] for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) + { + if (!s_curr_wp.mapping[wp_i] == CHEM_PQC) // only copy if surrogate was used { std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i); + } else + { + // if pqc was used, copy pqc results again + std::copy(s_curr_wp_control.output[wp_i].begin(), s_curr_wp_control.output[wp_i].end(), + mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i); + } + } count += sur_wp_offset; @@ -279,7 +288,7 @@ namespace poet { /* write results to DHT */ dht_fill_start = MPI_Wtime(); - dht->fillDHT(control_iteration_active ? s_curr_wp_pqc : s_curr_wp); + dht->fillDHT(control_iteration_active ? s_curr_wp_control : s_curr_wp); dht_fill_end = MPI_Wtime(); if (interp_enabled) From 48b6b992bfbb61f98bff974bb66dc6d1d5041c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Tue, 2 Sep 2025 12:04:38 +0200 Subject: [PATCH 06/19] Add fgcs dolo benchmark --- bench/dolo/dolo_fgcs.pqi | 48 ++++++++++++++++ bench/dolo/dolo_fgcs_3.R | 116 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 bench/dolo/dolo_fgcs.pqi create mode 100644 bench/dolo/dolo_fgcs_3.R diff --git a/bench/dolo/dolo_fgcs.pqi b/bench/dolo/dolo_fgcs.pqi new file mode 100644 index 000000000..39e8553a4 --- /dev/null +++ b/bench/dolo/dolo_fgcs.pqi @@ -0,0 +1,48 @@ +SOLUTION 1 + units mol/kgw + water 1 + temperature 25 + pH 7 + pe 4 +PURE 1 + Calcite 0.0 1 +END + +RUN_CELLS + -cells 1 +END + +COPY solution 1 2 + +#PURE 2 +# O2g -0.1675 10 +KINETICS 2 + Calcite + -m 0.00207 + -parms 0.05 + -tol 1e-10 + Dolomite + -m 0.0 + -parms 0.01 + -tol 1e-10 +END + +SOLUTION 3 + units mol/kgw + water 1 + temp 25 + Mg 0.001 + Cl 0.002 +END + +SOLUTION 4 + units mol/kgw + water 1 + temp 25 + Mg 0.002 + Cl 0.004 +END + +RUN_CELLS + -cells 2-4 +END diff --git a/bench/dolo/dolo_fgcs_3.R b/bench/dolo/dolo_fgcs_3.R new file mode 100644 index 000000000..77d699b03 --- /dev/null +++ b/bench/dolo/dolo_fgcs_3.R @@ -0,0 +1,116 @@ +rows <- 400 +cols <- 400 + +grid_def <- matrix(2, nrow = rows, ncol = cols) + +# Define grid configuration for POET model +grid_setup <- list( + pqc_in_file = "./dolo_fgcs.pqi", + pqc_db_file = "./phreeqc_kin.dat", # Path to the database file for Phreeqc + grid_def = grid_def, # Definition of the grid, containing IDs according to the Phreeqc input script + grid_size = c(5, 5), # Size of the grid in meters + constant_cells = c() # IDs of cells with constant concentration +) + +bound_def_we <- list( + "type" = rep("constant", rows), + "sol_id" = rep(1, rows), + "cell" = seq(1, rows) +) + +bound_def_ns <- list( + "type" = rep("constant", cols), + "sol_id" = rep(1, cols), + "cell" = seq(1, cols) +) + +diffusion_setup <- list( + boundaries = list( + "W" = bound_def_we, + "E" = bound_def_we, + "N" = bound_def_ns, + "S" = bound_def_ns + ), + inner_boundaries = list( + "row" = floor(rows / 2), + "col" = floor(cols / 2), + "sol_id" = c(3) + ), + alpha_x = 1e-6, + alpha_y = 1e-6 +) + +check_sign_cal_dol_dht <- function(old, new) { + if ((old["Calcite"] == 0) != (new["Calcite"] == 0)) { + return(TRUE) + } + if ((old["Dolomite"] == 0) != (new["Dolomite"] == 0)) { + return(TRUE) + } + return(FALSE) +} + +check_sign_cal_dol_interp <- function(to_interp, data_set) { + dht_species <- c( + "H" = 3, + "O" = 3, + "C" = 6, + "Ca" = 6, + "Cl" = 3, + "Mg" = 5, + "Calcite" = 4, + "Dolomite" = 4 + ) + data_set <- as.data.frame(do.call(rbind, data_set), check.names = FALSE, optional = TRUE) + names(data_set) <- names(dht_species) + cal <- (data_set$Calcite == 0) == (to_interp["Calcite"] == 0) + dol <- (data_set$Dolomite == 0) == (to_interp["Dolomite"] == 0) + + cal_dol_same_sig <- cal == dol + return(rev(which(!cal_dol_same_sig))) +} + +check_neg_cal_dol <- function(result) { + neg_sign <- (result["Calcite"] < 0) || (result["Dolomite"] < 0) + return(neg_sign) +} + +# Optional when using Interpolation (example with less key species and custom +# significant digits) + +pht_species <- c( + "C" = 3, + "Ca" = 3, + "Mg" = 3, + "Cl" = 3, + "Calcite" = 3, + "Dolomite" = 3 +) + + +dht_species <- c( + "H" = 3, + "O" = 3, + "C" = 6, + "Ca" = 6, + "Cl" = 3, + "Mg" = 5, + "Calcite" = 4, + "Dolomite" = 4) + +chemistry_setup <- list( + dht_species = dht_species, + pht_species = pht_species, + hooks = list( + dht_fill = check_sign_cal_dol_dht, + interp_pre = check_sign_cal_dol_interp, + interp_post = check_neg_cal_dol + ) +) + +# Define a setup list for simulation configuration +setup <- list( + Grid = grid_setup, # Parameters related to the grid structure + Diffusion = diffusion_setup, # Parameters related to the diffusion process + Chemistry = chemistry_setup # Parameters related to the chemistry process +) From e58423ff0b26f6399bb3d6ee3b0bbdc78faa33ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Tue, 2 Sep 2025 13:05:47 +0200 Subject: [PATCH 07/19] stuff in computeStats --- src/Chemistry/MasterFunctions.cpp | 48 +++++++++++++++++++------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 59f2e660c..d452c4d7a 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -182,36 +182,48 @@ void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, { double err_sum = 0.0; double sqr_err_sum = 0.0; - int count = 0; for (uint32_t j = 0; j < size_per_prop; j++) { - double pqc_value = pqc_vector[i * size_per_prop + j]; - double sur_value = sur_vector[i * size_per_prop + j]; + const double &pqc_value = pqc_vector[i * size_per_prop + j]; + const double &sur_value = sur_vector[i * size_per_prop + j]; - if (pqc_value != 0) - { - double rel_err = (pqc_value - sur_value) / pqc_value; - err_sum += std::abs(rel_err); - sqr_err_sum += rel_err * rel_err; - count++; - } - if (pqc_value == 0 && sur_value != 0) - { - err_sum += 1.0; - sqr_err_sum += 1.0; - count++; + if (pqc_value == 0 && sur_value == 0) { + // + } else if (pqc_value == 0 && sur_value != 0) { + std::cout << "NOOOO! pqc = " << pqc_value << ", sur = " << sur_value << "\n"; + err_sum += 1.; + sqr_err_sum += 1.; + } else { + const double alpha = 1 - (sur_value/pqc_value); + err_sum += std::abs(alpha); + sqr_err_sum += alpha * alpha; } + + // if (pqc_value != 0) + // { + // double rel_err = (pqc_value - sur_value) / pqc_value; + // err_sum += std::abs(rel_err); + // sqr_err_sum += rel_err * rel_err; + // } + // if (pqc_value == 0 && sur_value != 0) + // { + // err_sum += 1.0; + // sqr_err_sum += 1.0; + // } // else: both cases are zero, skip (no error) + if (i == 6 && (j % 1000 == 0)) { + std::cout << "pqc = " << pqc_value << ", sur = " << sur_value << "\n"; + } } if (i == 0) { std::cout << "computeStats, i==0, err_sum: " << err_sum << std::endl; std::cout << "computeStats, i==0, sqr_err_sum: " << sqr_err_sum << std::endl; } - stats.mape[i] = (count > 0) ? (100.0 / count) * err_sum : 0.0; - stats.rrsme[i] = (count > 0) ? std::sqrt(sqr_err_sum / count) : 0.0; + stats.mape[i] = 100.0 * (err_sum / size_per_prop); + stats.rrsme[i] = (size_per_prop > 0) ? std::sqrt(sqr_err_sum / size_per_prop) : 0.0; } } @@ -636,4 +648,4 @@ void poet::ChemistryModule::masterSetField(Field field) PropagateFunctionType(ftype); ChemBCast(&this->prop_count, 1, MPI_UINT32_T); -} \ No newline at end of file +} From 4b835b07c2646d47876412a66ae4436740014694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Thu, 4 Sep 2025 13:54:43 +0200 Subject: [PATCH 08/19] formatting MasterFunctions.cpp --- src/Chemistry/MasterFunctions.cpp | 211 ++++++++++++------------------ 1 file changed, 84 insertions(+), 127 deletions(-) diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index d452c4d7a..0bb1c6edd 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -8,8 +8,7 @@ #include std::vector -poet::ChemistryModule::MasterGatherWorkerMetrics(int type) const -{ +poet::ChemistryModule::MasterGatherWorkerMetrics(int type) const { MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); uint32_t dummy; @@ -23,8 +22,7 @@ poet::ChemistryModule::MasterGatherWorkerMetrics(int type) const } std::vector -poet::ChemistryModule::MasterGatherWorkerTimings(int type) const -{ +poet::ChemistryModule::MasterGatherWorkerTimings(int type) const { MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); double dummy; @@ -37,36 +35,31 @@ poet::ChemistryModule::MasterGatherWorkerTimings(int type) const return timings; } -std::vector poet::ChemistryModule::GetWorkerPhreeqcTimings() const -{ +std::vector poet::ChemistryModule::GetWorkerPhreeqcTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_PHREEQC); } -std::vector poet::ChemistryModule::GetWorkerDHTGetTimings() const -{ +std::vector poet::ChemistryModule::GetWorkerDHTGetTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_DHT_GET); } -std::vector poet::ChemistryModule::GetWorkerDHTFillTimings() const -{ +std::vector poet::ChemistryModule::GetWorkerDHTFillTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_DHT_FILL); } -std::vector poet::ChemistryModule::GetWorkerIdleTimings() const -{ +std::vector poet::ChemistryModule::GetWorkerIdleTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IDLE); } -std::vector poet::ChemistryModule::GetWorkerDHTHits() const -{ +std::vector poet::ChemistryModule::GetWorkerDHTHits() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_DHT_HITS; @@ -84,8 +77,7 @@ std::vector poet::ChemistryModule::GetWorkerDHTHits() const return ret; } -std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const -{ +std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_DHT_EVICTIONS; @@ -104,40 +96,35 @@ std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const } std::vector -poet::ChemistryModule::GetWorkerInterpolationWriteTimings() const -{ +poet::ChemistryModule::GetWorkerInterpolationWriteTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_WRITE); } std::vector -poet::ChemistryModule::GetWorkerInterpolationReadTimings() const -{ +poet::ChemistryModule::GetWorkerInterpolationReadTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_READ); } std::vector -poet::ChemistryModule::GetWorkerInterpolationGatherTimings() const -{ +poet::ChemistryModule::GetWorkerInterpolationGatherTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_GATHER); } std::vector -poet::ChemistryModule::GetWorkerInterpolationFunctionCallTimings() const -{ +poet::ChemistryModule::GetWorkerInterpolationFunctionCallTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); return MasterGatherWorkerTimings(WORKER_IP_FC); } std::vector -poet::ChemistryModule::GetWorkerInterpolationCalls() const -{ +poet::ChemistryModule::GetWorkerInterpolationCalls() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_IP_CALLS; @@ -155,8 +142,7 @@ poet::ChemistryModule::GetWorkerInterpolationCalls() const return ret; } -std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const -{ +std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); type = WORKER_PHT_CACHE_HITS; @@ -175,16 +161,14 @@ std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const } void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, const std::vector &sur_vector, - uint32_t size_per_prop, uint32_t species_count, - error_stats &stats) -{ - for (uint32_t i = 0; i < species_count; i++) - { + uint32_t size_per_prop, + uint32_t species_count, + error_stats &stats) { + for (uint32_t i = 0; i < species_count; i++) { double err_sum = 0.0; double sqr_err_sum = 0.0; - for (uint32_t j = 0; j < size_per_prop; j++) - { + for (uint32_t j = 0; j < size_per_prop; j++) { const double &pqc_value = pqc_vector[i * size_per_prop + j]; const double &sur_value = sur_vector[i * size_per_prop + j]; @@ -192,11 +176,12 @@ void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, if (pqc_value == 0 && sur_value == 0) { // } else if (pqc_value == 0 && sur_value != 0) { - std::cout << "NOOOO! pqc = " << pqc_value << ", sur = " << sur_value << "\n"; + std::cout << "NOOOO! pqc = " << pqc_value << ", sur = " << sur_value + << "\n"; err_sum += 1.; sqr_err_sum += 1.; - } else { - const double alpha = 1 - (sur_value/pqc_value); + } else { + const double alpha = 1 - (sur_value / pqc_value); err_sum += std::abs(alpha); sqr_err_sum += alpha * alpha; } @@ -217,27 +202,24 @@ void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, std::cout << "pqc = " << pqc_value << ", sur = " << sur_value << "\n"; } } - if (i == 0) - { + if (i == 0) { std::cout << "computeStats, i==0, err_sum: " << err_sum << std::endl; - std::cout << "computeStats, i==0, sqr_err_sum: " << sqr_err_sum << std::endl; + std::cout << "computeStats, i==0, sqr_err_sum: " << sqr_err_sum + << std::endl; } - stats.mape[i] = 100.0 * (err_sum / size_per_prop); - stats.rrsme[i] = (size_per_prop > 0) ? std::sqrt(sqr_err_sum / size_per_prop) : 0.0; + stats.mape[i] = 100.0 * (err_sum / size_per_prop); + stats.rrsme[i] = + (size_per_prop > 0) ? std::sqrt(sqr_err_sum / size_per_prop) : 0.0; } - } inline std::vector shuffleVector(const std::vector &in_vector, uint32_t size_per_prop, - uint32_t wp_count) -{ + uint32_t wp_count) { std::vector out_buffer(in_vector.size()); uint32_t write_i = 0; - for (uint32_t i = 0; i < wp_count; i++) - { - for (uint32_t j = i; j < size_per_prop; j += wp_count) - { + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { out_buffer[write_i] = in_vector[j]; write_i++; } @@ -248,16 +230,12 @@ inline std::vector shuffleVector(const std::vector &in_vector, inline std::vector shuffleField(const std::vector &in_field, uint32_t size_per_prop, uint32_t species_count, - uint32_t wp_count) -{ + uint32_t wp_count) { std::vector out_buffer(in_field.size()); uint32_t write_i = 0; - for (uint32_t i = 0; i < wp_count; i++) - { - for (uint32_t j = i; j < size_per_prop; j += wp_count) - { - for (uint32_t k = 0; k < species_count; k++) - { + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < species_count; k++) { out_buffer[(write_i * species_count) + k] = in_field[(k * size_per_prop) + j]; } @@ -269,16 +247,12 @@ inline std::vector shuffleField(const std::vector &in_field, inline void unshuffleField(const std::vector &in_buffer, uint32_t size_per_prop, uint32_t species_count, - uint32_t wp_count, std::vector &out_field) -{ + uint32_t wp_count, std::vector &out_field) { uint32_t read_i = 0; - for (uint32_t i = 0; i < wp_count; i++) - { - for (uint32_t j = i; j < size_per_prop; j += wp_count) - { - for (uint32_t k = 0; k < species_count; k++) - { + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < species_count; k++) { out_field[(k * size_per_prop) + j] = in_buffer[(read_i * species_count) + k]; } @@ -287,15 +261,13 @@ inline void unshuffleField(const std::vector &in_buffer, } } -inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) -{ +inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { /* visual progress */ double progress = (float)(count_pkgs + 1) / n_wp; std::cout << "["; int pos = barWidth * progress; - for (int iprog = 0; iprog < barWidth; ++iprog) - { + for (int iprog = 0; iprog < barWidth; ++iprog) { if (iprog < pos) std::cout << "="; else if (iprog == pos) @@ -310,15 +282,13 @@ inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) inline void poet::ChemistryModule::MasterSendPkgs( worker_list_t &w_list, workpointer_t &work_pointer, int &pkg_to_send, - int &count_pkgs, int &free_workers, double dt, uint32_t iteration, uint32_t control_iteration, - const std::vector &wp_sizes_vector) -{ + int &count_pkgs, int &free_workers, double dt, uint32_t iteration, + uint32_t control_iteration, const std::vector &wp_sizes_vector) { /* declare variables */ int local_work_package_size; /* search for free workers and send work */ - for (int p = 0; p < this->comm_size - 1; p++) - { + for (int p = 0; p < this->comm_size - 1; p++) { if (w_list[p].has_work == 0 && pkg_to_send > 0) /* worker is free */ { /* to enable different work_package_size, set local copy of @@ -347,7 +317,9 @@ inline void poet::ChemistryModule::MasterSendPkgs( // current time of simulation (age) in seconds send_buffer[end_of_wp + 3] = this->simtime; // current work package start location in field - uint32_t wp_start_index = std::accumulate(wp_sizes_vector.begin(), std::next(wp_sizes_vector.begin(), count_pkgs), 0); + uint32_t wp_start_index = + std::accumulate(wp_sizes_vector.begin(), + std::next(wp_sizes_vector.begin(), count_pkgs), 0); send_buffer[end_of_wp + 4] = wp_start_index; // whether this iteration is a control iteration send_buffer[end_of_wp + 5] = control_iteration; @@ -369,8 +341,7 @@ inline void poet::ChemistryModule::MasterSendPkgs( inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, - int &free_workers) -{ + int &free_workers) { /* declare most of the variables here */ int need_to_receive = 1; double idle_a, idle_b; @@ -380,15 +351,13 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, // master_recv_a = MPI_Wtime(); /* start to loop as long there are packages to recv and the need to receive */ - while (need_to_receive && pkg_to_recv > 0) - { + while (need_to_receive && pkg_to_recv > 0) { // only of there are still packages to send and free workers are available if (to_send && free_workers > 0) // non blocking probing MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &need_to_receive, &probe_status); - else - { + else { idle_a = MPI_Wtime(); // blocking probing MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &probe_status); @@ -398,11 +367,9 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, /* if need_to_receive was set to true above, so there is a message to * receive */ - if (need_to_receive) - { + if (need_to_receive) { p = probe_status.MPI_SOURCE; - if (probe_status.MPI_TAG == LOOP_WORK) - { + if (probe_status.MPI_TAG == LOOP_WORK) { MPI_Get_count(&probe_status, MPI_DOUBLE, &size); MPI_Recv(w_list[p - 1].send_addr, size, MPI_DOUBLE, p, LOOP_WORK, this->group_comm, MPI_STATUS_IGNORE); @@ -410,8 +377,7 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, pkg_to_recv -= 1; free_workers++; } - if (probe_status.MPI_TAG == LOOP_CTRL) - { + if (probe_status.MPI_TAG == LOOP_CTRL) { MPI_Get_count(&probe_status, MPI_DOUBLE, &size); // layout of buffer is [phreeqc][surrogate] @@ -423,7 +389,8 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, std::copy(recv_buffer.begin(), recv_buffer.begin() + (size / 2), w_list[p - 1].send_addr); - sur_shuffled.insert(sur_shuffled.end(), recv_buffer.begin() + (size / 2), + sur_shuffled.insert(sur_shuffled.end(), + recv_buffer.begin() + (size / 2), recv_buffer.begin() + size); w_list[p - 1].has_work = 0; @@ -434,11 +401,9 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, } } -void poet::ChemistryModule::simulate(double dt) -{ +void poet::ChemistryModule::simulate(double dt) { double start_t{MPI_Wtime()}; - if (this->is_sequential) - { + if (this->is_sequential) { MasterRunSequential(); return; } @@ -448,8 +413,7 @@ void poet::ChemistryModule::simulate(double dt) this->chem_t += end_t - start_t; } -void poet::ChemistryModule::MasterRunSequential() -{ +void poet::ChemistryModule::MasterRunSequential() { // std::vector shuffled_field = // shuffleField(chem_field.AsVector(), n_cells, prop_count, 1); @@ -476,8 +440,7 @@ void poet::ChemistryModule::MasterRunSequential() // chem_field = out_vec; } -void poet::ChemistryModule::MasterRunParallel(double dt) -{ +void poet::ChemistryModule::MasterRunParallel(double dt) { /* declare most of the needed variables here */ double seq_a, seq_b, seq_c, seq_d; double worker_chemistry_a, worker_chemistry_b; @@ -490,14 +453,14 @@ void poet::ChemistryModule::MasterRunParallel(double dt) const std::vector wp_sizes_vector = CalculateWPSizesVector(this->n_cells, this->wp_size); - if (this->ai_surrogate_enabled) - { + if (this->ai_surrogate_enabled) { ftype = CHEM_AI_BCAST_VALIDITY; PropagateFunctionType(ftype); - this->ai_surrogate_validity_vector = shuffleVector(this->ai_surrogate_validity_vector, - this->n_cells, - wp_sizes_vector.size()); - ChemBCast(&this->ai_surrogate_validity_vector.front(), this->n_cells, MPI_INT); + this->ai_surrogate_validity_vector = + shuffleVector(this->ai_surrogate_validity_vector, this->n_cells, + wp_sizes_vector.size()); + ChemBCast(&this->ai_surrogate_validity_vector.front(), this->n_cells, + MPI_INT); } ftype = CHEM_WORK_LOOP; @@ -506,9 +469,9 @@ void poet::ChemistryModule::MasterRunParallel(double dt) MPI_Barrier(this->group_comm); static uint32_t iteration = 0; - uint32_t control_iteration = static_cast(this->runtime_params->control_iteration_active ? 1 : 0); - if (control_iteration) - { + uint32_t control_iteration = static_cast( + this->runtime_params->control_iteration_active ? 1 : 0); + if (control_iteration) { sur_shuffled.clear(); sur_shuffled.reserve(this->n_cells * this->prop_count); } @@ -542,19 +505,17 @@ void poet::ChemistryModule::MasterRunParallel(double dt) /* start send/recv loop */ // while there are still packages to recv - while (pkg_to_recv > 0) - { + while (pkg_to_recv > 0) { // print a progressbar to stdout - if (print_progessbar) - { + if (print_progessbar) { printProgressbar((int)i_pkgs, (int)wp_sizes_vector.size()); } // while there are still packages to send - if (pkg_to_send > 0) - { + if (pkg_to_send > 0) { // send packages to all free workers ... MasterSendPkgs(worker_list, work_pointer, pkg_to_send, i_pkgs, - free_workers, dt, iteration, control_iteration, wp_sizes_vector); + free_workers, dt, iteration, control_iteration, + wp_sizes_vector); } // ... and try to receive them from workers who has finished their work MasterRecvPkgs(worker_list, pkg_to_recv, pkg_to_send > 0, free_workers); @@ -579,8 +540,7 @@ void poet::ChemistryModule::MasterRunParallel(double dt) /* do master stuff */ - if (control_iteration) - { + if (control_iteration) { control_iteration_counter++; std::vector sur_unshuffled{sur_shuffled}; @@ -588,9 +548,11 @@ void poet::ChemistryModule::MasterRunParallel(double dt) unshuffleField(sur_shuffled, this->n_cells, this->prop_count, wp_sizes_vector.size(), sur_unshuffled); - error_stats stats(this->prop_count, control_iteration_counter * runtime_params->control_iteration); + error_stats stats(this->prop_count, control_iteration_counter * + runtime_params->control_iteration); - computeStats(out_vec, sur_unshuffled, this->n_cells, this->prop_count, stats); + computeStats(out_vec, sur_unshuffled, this->n_cells, this->prop_count, + stats); error_stats_history.push_back(stats); // to do: control values to epsilon @@ -606,8 +568,7 @@ void poet::ChemistryModule::MasterRunParallel(double dt) /* end time measurement of whole chemistry simulation */ /* advise workers to end chemistry iteration */ - for (int i = 1; i < this->comm_size; i++) - { + for (int i = 1; i < this->comm_size; i++) { MPI_Send(NULL, 0, MPI_DOUBLE, i, LOOP_END, this->group_comm); } @@ -615,32 +576,28 @@ void poet::ChemistryModule::MasterRunParallel(double dt) iteration++; } -void poet::ChemistryModule::MasterLoopBreak() -{ +void poet::ChemistryModule::MasterLoopBreak() { int type = CHEM_BREAK_MAIN_LOOP; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); } std::vector poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, - uint32_t wp_size) const -{ + uint32_t wp_size) const { bool mod_pkgs = (n_cells % wp_size) != 0; uint32_t n_packages = (uint32_t)(n_cells / wp_size) + static_cast(mod_pkgs); std::vector wp_sizes_vector(n_packages, 0); - for (int i = 0; i < n_cells; i++) - { + for (int i = 0; i < n_cells; i++) { wp_sizes_vector[i % n_packages] += 1; } return wp_sizes_vector; } -void poet::ChemistryModule::masterSetField(Field field) -{ +void poet::ChemistryModule::masterSetField(Field field) { this->chem_field = field; this->prop_count = field.GetProps().size(); From a667adbb8e5d194cec9111ddf516a3a552f3f9ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Thu, 4 Sep 2025 14:19:54 +0200 Subject: [PATCH 09/19] fix mssing handling of different storage locatios of work packages --- src/Chemistry/ChemistryModule.hpp | 3 ++- src/Chemistry/MasterFunctions.cpp | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index cc78ede87..2652ebd1e 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -347,6 +347,7 @@ namespace poet { char has_work = 0; double *send_addr; + double *surrogate_addr; }; using worker_list_t = std::vector; @@ -355,7 +356,7 @@ namespace poet void MasterRunParallel(double dt); void MasterRunSequential(); - void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, + void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, workpointer_t &sur_pointer, int &pkg_to_send, int &count_pkgs, int &free_workers, double dt, uint32_t iteration, uint32_t control_iteration, const std::vector &wp_sizes_vector); diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 0bb1c6edd..b23061913 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -159,6 +159,7 @@ std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { return ret; } + void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, const std::vector &sur_vector, uint32_t size_per_prop, @@ -281,8 +282,9 @@ inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { } inline void poet::ChemistryModule::MasterSendPkgs( - worker_list_t &w_list, workpointer_t &work_pointer, int &pkg_to_send, - int &count_pkgs, int &free_workers, double dt, uint32_t iteration, + worker_list_t &w_list, workpointer_t &work_pointer, + workpointer_t &sur_pointer, int &pkg_to_send, int &count_pkgs, + int &free_workers, double dt, uint32_t iteration, uint32_t control_iteration, const std::vector &wp_sizes_vector) { /* declare variables */ int local_work_package_size; @@ -299,6 +301,7 @@ inline void poet::ChemistryModule::MasterSendPkgs( /* note current processed work package in workerlist */ w_list[p].send_addr = work_pointer.base(); + w_list[p].surrogate_addr = sur_pointer.base(); /* push work pointer to next work package */ const uint32_t end_of_wp = local_work_package_size * this->prop_count; @@ -306,6 +309,7 @@ inline void poet::ChemistryModule::MasterSendPkgs( std::copy(work_pointer, work_pointer + end_of_wp, send_buffer.begin()); work_pointer += end_of_wp; + sur_pointer += end_of_wp; // fill send buffer starting with work_package ... // followed by: work_package_size @@ -389,9 +393,8 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, std::copy(recv_buffer.begin(), recv_buffer.begin() + (size / 2), w_list[p - 1].send_addr); - sur_shuffled.insert(sur_shuffled.end(), - recv_buffer.begin() + (size / 2), - recv_buffer.begin() + size); + std::copy(recv_buffer.begin() + (size / 2), recv_buffer.begin() + size, + w_list[p - 1].surrogate_addr); w_list[p - 1].has_work = 0; pkg_to_recv -= 1; @@ -486,11 +489,14 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { shuffleField(chem_field.AsVector(), this->n_cells, this->prop_count, wp_sizes_vector.size()); + this->sur_shuffled.resize(mpi_buffer.size()); + /* setup local variables */ pkg_to_send = wp_sizes_vector.size(); pkg_to_recv = wp_sizes_vector.size(); workpointer_t work_pointer = mpi_buffer.begin(); + workpointer_t sur_pointer = sur_shuffled.begin(); worker_list_t worker_list(this->comm_size - 1); free_workers = this->comm_size - 1; @@ -513,8 +519,8 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { // while there are still packages to send if (pkg_to_send > 0) { // send packages to all free workers ... - MasterSendPkgs(worker_list, work_pointer, pkg_to_send, i_pkgs, - free_workers, dt, iteration, control_iteration, + MasterSendPkgs(worker_list, work_pointer, sur_pointer, pkg_to_send, + i_pkgs, free_workers, dt, iteration, control_iteration, wp_sizes_vector); } // ... and try to receive them from workers who has finished their work From d7f3eef2839950d5c3eaa61b81d5ea9f7e8ae6c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Thu, 4 Sep 2025 14:20:08 +0200 Subject: [PATCH 10/19] cleanup computeStats() --- src/Chemistry/MasterFunctions.cpp | 47 +++++++++---------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index b23061913..645bcf304 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -165,49 +165,28 @@ void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, uint32_t size_per_prop, uint32_t species_count, error_stats &stats) { - for (uint32_t i = 0; i < species_count; i++) { + for (uint32_t i = 0; i < species_count; ++i) { double err_sum = 0.0; double sqr_err_sum = 0.0; + uint32_t base_idx = i * size_per_prop; - for (uint32_t j = 0; j < size_per_prop; j++) { + for (uint32_t j = 0; j < size_per_prop; ++j) { + const double pqc_value = pqc_vector[base_idx + j]; + const double sur_value = sur_vector[base_idx + j]; - const double &pqc_value = pqc_vector[i * size_per_prop + j]; - const double &sur_value = sur_vector[i * size_per_prop + j]; - - if (pqc_value == 0 && sur_value == 0) { - // - } else if (pqc_value == 0 && sur_value != 0) { - std::cout << "NOOOO! pqc = " << pqc_value << ", sur = " << sur_value - << "\n"; - err_sum += 1.; - sqr_err_sum += 1.; + if (pqc_value == 0.0) { + if (sur_value != 0.0) { + err_sum += 1.0; + sqr_err_sum += 1.0; + } + // Both zero: skip } else { - const double alpha = 1 - (sur_value / pqc_value); + double alpha = 1.0 - (sur_value / pqc_value); err_sum += std::abs(alpha); sqr_err_sum += alpha * alpha; } + } - // if (pqc_value != 0) - // { - // double rel_err = (pqc_value - sur_value) / pqc_value; - // err_sum += std::abs(rel_err); - // sqr_err_sum += rel_err * rel_err; - // } - // if (pqc_value == 0 && sur_value != 0) - // { - // err_sum += 1.0; - // sqr_err_sum += 1.0; - // } - // else: both cases are zero, skip (no error) - if (i == 6 && (j % 1000 == 0)) { - std::cout << "pqc = " << pqc_value << ", sur = " << sur_value << "\n"; - } - } - if (i == 0) { - std::cout << "computeStats, i==0, err_sum: " << err_sum << std::endl; - std::cout << "computeStats, i==0, sqr_err_sum: " << sqr_err_sum - << std::endl; - } stats.mape[i] = 100.0 * (err_sum / size_per_prop); stats.rrsme[i] = (size_per_prop > 0) ? std::sqrt(sqr_err_sum / size_per_prop) : 0.0; From 41d1a9895ca8dda82f97fb2dfc64f0726c91aae7 Mon Sep 17 00:00:00 2001 From: rastogi Date: Fri, 12 Sep 2025 09:47:00 +0200 Subject: [PATCH 11/19] rollback implemented, triggered when MAPE exceeds epsilon --- src/poet.cpp | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/src/poet.cpp b/src/poet.cpp index 2f39673be..2f1abd5f1 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -300,6 +300,35 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) *global_rt_setup = R["setup"]; } +bool checkAndRollback(ChemistryModule &chem, RuntimeParameters ¶ms, uint32_t &iter) +{ + for (uint32_t i = 0; i < chem.error_stats_history.size(); i++) + { + if (iter == chem.error_stats_history[i].iteration) + { + for (uint32_t j = 0; j < params.species_epsilon.size(); j++) + { + if (params.species_epsilon[j] < chem.error_stats_history[i].mape[j] && chem.error_stats_history[i].mape[j] != 0 && chem.control_iteration_counter > 1) + { + uint32_t rollback_iter = iter - params.control_iteration; + + std::cout << chem.getField().GetProps()[j] << " with a MAPE value of " << chem.error_stats_history[i].mape[j] << " exceeds epsilon of " + << params.species_epsilon[j] << "! " << std::endl; + + Checkpoint_s checkpoint_read{.field = chem.getField()}; + read_checkpoint("checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); + iter = checkpoint_read.iteration; + + chem.control_iteration_counter--; + + return true; + } + } + } + } + return false; +} + static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, DiffusionModule &diffusion, ChemistryModule &chem) @@ -441,21 +470,15 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); - /* - if (params.control_iteration_active) + if (iter % params.control_iteration == 0) { - std::string file_path = "checkpoint" + std::to_string(iter) + ".hdf5"; - write_checkpoint(file_path, - {.field = chem.getField(), .iteration = iter}); - } + writeStatsToCSV(chem.error_stats_history, chem.getField().GetProps(), "stats_overview"); - - if (iter % params.control_iteration == 0 && iter != 0) - { write_checkpoint("checkpoint" + std::to_string(iter) + ".hdf5", {.field = chem.getField(), .iteration = iter}); + checkAndRollback(chem, params, iter); + } - */ // MSG(); } // END SIMULATION LOOP @@ -503,8 +526,6 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.MasterLoopBreak(); - writeStatsToCSV(chem.error_stats_history, chem.getField().GetProps(), "stats_overview"); - return profiling; } From 15e397ecf20e4cc58cf999cea80746fd20852a97 Mon Sep 17 00:00:00 2001 From: rastogi Date: Thu, 2 Oct 2025 13:20:53 +0200 Subject: [PATCH 12/19] feat(control): dynamic prototype, penalty_iteration, error while disabling surrogate fixed --- CMakeLists.txt | 4 +- src/Chemistry/ChemistryModule.hpp | 1 + src/Chemistry/MasterFunctions.cpp | 13 +++ src/Chemistry/WorkerFunctions.cpp | 175 ++++++++++++++++-------------- src/poet.cpp | 87 ++++++++++----- src/poet.hpp.in | 5 + 6 files changed, 171 insertions(+), 114 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1550f0962..fa7f009a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,12 +28,12 @@ if (POET_PREPROCESS_BENCHS) endif() # as tug will also pull in doctest as a dependency -set(TUG_ENABLE_TESTING ON CACHE BOOL "" FORCE) +set(TUG_ENABLE_TESTING OFF CACHE BOOL "" FORCE) add_subdirectory(ext/tug EXCLUDE_FROM_ALL) add_subdirectory(ext/iphreeqc EXCLUDE_FROM_ALL) -option(POET_ENABLE_TESTING "Build test suite for POET" ON) +option(POET_ENABLE_TESTING "Build test suite for POET" OFF) if (POET_ENABLE_TESTING) add_subdirectory(test) diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index 2652ebd1e..836b0f237 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -299,6 +299,7 @@ namespace poet CHEM_DHT_SIGNIF_VEC, CHEM_DHT_SNAPS, CHEM_DHT_READ_FILE, + CHEM_INTERP, CHEM_IP_ENABLE, CHEM_IP_MIN_ENTRIES, CHEM_IP_SIGNIF_VEC, diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 645bcf304..683985134 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -448,6 +448,19 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { ftype = CHEM_WORK_LOOP; PropagateFunctionType(ftype); + ftype = CHEM_INTERP; + PropagateFunctionType(ftype); + + if(this->runtime_params->rollback_simulation){ + this->interp_enabled = false; + int interp_flag = 0; + ChemBCast(&interp_flag, 1, MPI_INT); + } else { + this->interp_enabled = true; + int interp_flag = 1; + ChemBCast(&interp_flag, 1, MPI_INT); + } + MPI_Barrier(this->group_comm); static uint32_t iteration = 0; diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index 0cdcb42c5..11c70d7cc 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -34,105 +34,112 @@ namespace poet return ret_str; } - void poet::ChemistryModule::WorkerLoop() - { - struct worker_s timings; - - // HACK: defining the worker iteration count here, which will increment after - // each CHEM_ITER_END message - uint32_t iteration = 1; - bool loop = true; - - while (loop) + void poet::ChemistryModule::WorkerLoop() { - int func_type; - PropagateFunctionType(func_type); + struct worker_s timings; - switch (func_type) + // HACK: defining the worker iteration count here, which will increment after + // each CHEM_ITER_END message + uint32_t iteration = 1; + bool loop = true; + + while (loop) { - case CHEM_FIELD_INIT: - { - ChemBCast(&this->prop_count, 1, MPI_UINT32_T); - if (this->ai_surrogate_enabled) + int func_type; + PropagateFunctionType(func_type); + + switch (func_type) { - this->ai_surrogate_validity_vector.resize( - this->n_cells); // resize statt reserve? - } - break; - } - case CHEM_AI_BCAST_VALIDITY: - { - // Receive the index vector of valid ai surrogate predictions - MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells, - MPI_INT, 0, this->group_comm); - break; - } - case CHEM_WORK_LOOP: - { - WorkerProcessPkgs(timings, iteration); - break; - } - case CHEM_PERF: - { - int type; - ChemBCast(&type, 1, MPI_INT); - if (type < WORKER_DHT_HITS) + case CHEM_FIELD_INIT: { - WorkerPerfToMaster(type, timings); + ChemBCast(&this->prop_count, 1, MPI_UINT32_T); + if (this->ai_surrogate_enabled) + { + this->ai_surrogate_validity_vector.resize( + this->n_cells); // resize statt reserve? + } break; } - WorkerMetricsToMaster(type); - break; - } - case CHEM_BREAK_MAIN_LOOP: - { - WorkerPostSim(iteration); - loop = false; - break; - } - default: - { - throw std::runtime_error("Worker received unknown tag from master."); - } + case CHEM_AI_BCAST_VALIDITY: + { + // Receive the index vector of valid ai surrogate predictions + MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells, + MPI_INT, 0, this->group_comm); + break; + } + case CHEM_INTERP: + { + int interp_flag; + ChemBCast(&interp_flag, 1, MPI_INT); + this->interp_enabled = (interp_flag == 1); + break; + } + case CHEM_WORK_LOOP: + { + WorkerProcessPkgs(timings, iteration); + break; + } + case CHEM_PERF: + { + int type; + ChemBCast(&type, 1, MPI_INT); + if (type < WORKER_DHT_HITS) + { + WorkerPerfToMaster(type, timings); + break; + } + WorkerMetricsToMaster(type); + break; + } + case CHEM_BREAK_MAIN_LOOP: + { + WorkerPostSim(iteration); + loop = false; + break; + } + default: + { + throw std::runtime_error("Worker received unknown tag from master."); + } + } } } - } - void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, - uint32_t &iteration) - { - MPI_Status probe_status; - bool loop = true; - - MPI_Barrier(this->group_comm); - - while (loop) + void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, + uint32_t &iteration) { - double idle_a = MPI_Wtime(); - MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status); - double idle_b = MPI_Wtime(); + MPI_Status probe_status; + bool loop = true; - switch (probe_status.MPI_TAG) - { - case LOOP_WORK: - { - timings.idle_t += idle_b - idle_a; - int count; - MPI_Get_count(&probe_status, MPI_DOUBLE, &count); + MPI_Barrier(this->group_comm); - WorkerDoWork(probe_status, count, timings); - break; - } - case LOOP_END: + while (loop) { - WorkerPostIter(probe_status, iteration); - iteration++; - loop = false; - break; - } + double idle_a = MPI_Wtime(); + MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status); + double idle_b = MPI_Wtime(); + + switch (probe_status.MPI_TAG) + { + case LOOP_WORK: + { + timings.idle_t += idle_b - idle_a; + int count; + MPI_Get_count(&probe_status, MPI_DOUBLE, &count); + + WorkerDoWork(probe_status, count, timings); + break; + } + case LOOP_END: + { + WorkerPostIter(probe_status, iteration); + iteration++; + loop = false; + break; + } + } } } - } void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, int double_count, @@ -254,7 +261,7 @@ namespace poet for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { - if (!s_curr_wp.mapping[wp_i] == CHEM_PQC) // only copy if surrogate was used + if (s_curr_wp.mapping[wp_i] != CHEM_PQC) // only copy if surrogate was used { std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i); diff --git a/src/poet.cpp b/src/poet.cpp index 2f1abd5f1..0f558b5d7 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -270,6 +270,10 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) Rcpp::as(global_rt_setup->operator[]("control_iteration")); params.species_epsilon = Rcpp::as>(global_rt_setup->operator[]("species_epsilon")); + params.penalty_iteration = + Rcpp::as(global_rt_setup->operator[]("penalty_iteration")); + params.max_penalty_iteration = + Rcpp::as(global_rt_setup->operator[]("max_penalty_iteration")); } catch (const std::exception &e) { @@ -302,31 +306,50 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) bool checkAndRollback(ChemistryModule &chem, RuntimeParameters ¶ms, uint32_t &iter) { - for (uint32_t i = 0; i < chem.error_stats_history.size(); i++) + const std::vector &latest_mape = chem.error_stats_history.back().mape; + + for (uint32_t j = 0; j < params.species_epsilon.size(); j++) { - if (iter == chem.error_stats_history[i].iteration) + if (params.species_epsilon[j] < latest_mape[j] && latest_mape[j] != 0) { - for (uint32_t j = 0; j < params.species_epsilon.size(); j++) - { - if (params.species_epsilon[j] < chem.error_stats_history[i].mape[j] && chem.error_stats_history[i].mape[j] != 0 && chem.control_iteration_counter > 1) - { - uint32_t rollback_iter = iter - params.control_iteration; + uint32_t rollback_iter = iter - (iter % params.control_iteration); - std::cout << chem.getField().GetProps()[j] << " with a MAPE value of " << chem.error_stats_history[i].mape[j] << " exceeds epsilon of " - << params.species_epsilon[j] << "! " << std::endl; + std::cout << chem.getField().GetProps()[j] << " with a MAPE value of " << latest_mape[j] << " exceeds epsilon of " + << params.species_epsilon[j] << "! " << std::endl; - Checkpoint_s checkpoint_read{.field = chem.getField()}; - read_checkpoint("checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); - iter = checkpoint_read.iteration; + Checkpoint_s checkpoint_read{.field = chem.getField()}; + read_checkpoint("checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); + iter = checkpoint_read.iteration; - chem.control_iteration_counter--; - - return true; - } - } + return true; + } + } + MSG("All spezies are below their threshold values"); + return false; +} + +void updatePenaltyLogic(RuntimeParameters ¶ms, bool roolback_happend) +{ + if (roolback_happend) + { + params.rollback_simulation = true; + params.penalty_counter = params.penalty_iteration; + std::cout << "Penalty counter reset to: " << params.penalty_counter << std::endl; + MSG("Rollback! Penalty phase started for " + std::to_string(params.penalty_iteration) + " iterations."); + } + else + { + if (params.rollback_simulation && params.penalty_counter == 0) + { + params.rollback_simulation = false; + MSG("Penalty phase ended. Interpolation re-enabled."); + } + else if (!params.rollback_simulation) + { + params.penalty_iteration = std::min(params.penalty_iteration *= 2, params.max_penalty_iteration); + MSG("Stable surrogate phase detected. Penalty iteration doubled to " + std::to_string(params.penalty_iteration) + " iterations."); } } - return false; } static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, @@ -344,13 +367,21 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, } R["TMP_PROPS"] = Rcpp::wrap(chem.getField().GetProps()); + params.next_penalty_check = params.penalty_iteration; + /* SIMULATION LOOP */ double dSimTime{0}; for (uint32_t iter = 1; iter < maxiter + 1; iter++) { + // Penalty countdown + if (params.rollback_simulation && params.penalty_counter > 0) + { + params.penalty_counter--; + std::cout << "Penalty counter: " << params.penalty_counter << std::endl; + } - params.control_iteration_active = (iter % params.control_iteration == 0 && iter != 0); + params.control_iteration_active = (iter % params.control_iteration == 0 /* && iter != 0 */); double start_t = MPI_Wtime(); @@ -459,12 +490,6 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, // TODO: write checkpoint // checkpoint struct --> field and iteration - /*else if (iter == 2) { - Checkpoint_s checkpoint_read{.field = chem.getField()}; - read_checkpoint("checkpoint1.hdf5", checkpoint_read); - iter = checkpoint_read.iteration; - }*/ - diffusion.getField().update(chem.getField()); MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + @@ -473,12 +498,18 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, if (iter % params.control_iteration == 0) { writeStatsToCSV(chem.error_stats_history, chem.getField().GetProps(), "stats_overview"); - write_checkpoint("checkpoint" + std::to_string(iter) + ".hdf5", {.field = chem.getField(), .iteration = iter}); - checkAndRollback(chem, params, iter); - } + + if (iter == params.next_penalty_check) + { + bool roolback_happend = checkAndRollback(chem, params, iter); + updatePenaltyLogic(params, roolback_happend); + + params.next_penalty_check = iter + params.penalty_iteration; + } + // MSG(); } // END SIMULATION LOOP diff --git a/src/poet.hpp.in b/src/poet.hpp.in index e9fb2acac..a5b82c150 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -52,6 +52,11 @@ struct RuntimeParameters { bool print_progress = false; + std::uint32_t penalty_iteration = 0; + std::uint32_t max_penalty_iteration = 0; + std::uint32_t penalty_counter = 0; + std::uint32_t next_penalty_check = 0; + bool rollback_simulation = false; bool control_iteration_active = false; std::uint32_t control_iteration = 1; From 9f1d69982d3b6ac6bfd6f5ca6d80593cff4a47d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Thu, 25 Sep 2025 15:08:13 +0200 Subject: [PATCH 13/19] feat(grid): enable cell_ID integration in chemistry data flow --- R_lib/init_r_lib.R | 3 +++ src/Chemistry/WorkerFunctions.cpp | 13 +++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/R_lib/init_r_lib.R b/R_lib/init_r_lib.R index f3f1adeed..aa82998bd 100644 --- a/R_lib/init_r_lib.R +++ b/R_lib/init_r_lib.R @@ -60,6 +60,9 @@ pqc_to_grid <- function(pqc_mat, grid) { # Convert the result matrix to a data frame res_df <- as.data.frame(result_mat) + # Add cell_ID column to beginning of res_df + res_df <- cbind(cell_ID = seq(0, nrow(res_df) - 1), res_df) + # Remove all columns which only contain NaN # res_df <- res_df[, colSums(is.na(res_df)) != nrow(res_df)] diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index 11c70d7cc..b354f986d 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -428,14 +428,23 @@ namespace poet { to_ignore.push_back(wp_id); } - } + + // HACK: remove the first element (cell_id) before sending to phreeqc + inout_chem[wp_id].erase( + inout_chem[wp_id].begin(), inout_chem[wp_id].begin() + 1); + } + this->pqc_runner->run(inout_chem, dTimestep, to_ignore); for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) { if (work_package.mapping[wp_id] == CHEM_PQC) { - work_package.output[wp_id] = inout_chem[wp_id]; + // HACK: as we removed the first element (cell_id) before sending to phreeqc, + // copy back with an offset of 1 + work_package.output[wp_id] = work_package.input[wp_id]; + std::copy(inout_chem[wp_id].begin(), inout_chem[wp_id].end(), + work_package.output[wp_id].begin() + 1); } } } From 9374b267739b89f71b822ec1b1d5766960e30780 Mon Sep 17 00:00:00 2001 From: rastogi Date: Wed, 15 Oct 2025 10:15:21 +0200 Subject: [PATCH 14/19] Control component with minimum features --- src/Chemistry/ChemistryModule.hpp | 46 +- src/Chemistry/MasterFunctions.cpp | 128 +++-- src/Chemistry/SurrogateModels/DHT_Wrapper.cpp | 538 ++++++++++-------- .../SurrogateModels/InterpolationModule.cpp | 255 +++++---- src/Chemistry/WorkerFunctions.cpp | 40 +- src/IO/StatsIO.cpp | 28 +- src/IO/StatsIO.hpp | 2 +- src/poet.cpp | 118 ++-- src/poet.hpp.in | 20 +- 9 files changed, 664 insertions(+), 511 deletions(-) diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index 836b0f237..c6f57bbec 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -185,6 +185,13 @@ namespace poet */ auto GetMasterLoopTime() const { return this->send_recv_t; } + + auto GetMasterCtrlLogicTime() const { return this->ctrl_t; } + + auto GetMasterCtrlBcastTime() const { return this->bcast_ctrl_t; } + + auto GetMasterRecvCtrlLogicTime() const { return this->recv_ctrl_t; } + /** * **Master only** Collect and return all accumulated timings recorded by * workers to run Phreeqc simulation. @@ -214,6 +221,8 @@ namespace poet */ std::vector GetWorkerIdleTimings() const; + std::vector GetWorkerControlTimings() const; + /** * **Master only** Collect and return DHT hits of all workers. * @@ -262,25 +271,29 @@ namespace poet std::vector ai_surrogate_validity_vector; RuntimeParameters *runtime_params = nullptr; - uint32_t control_iteration_counter = 0; - struct error_stats + struct SimulationErrorStats { std::vector mape; - std::vector rrsme; - uint32_t iteration; + std::vector rrmse; + uint32_t iteration; // iterations in simulation after rollbacks + uint32_t rollback_count; - error_stats(size_t species_count, size_t iter) - : mape(species_count, 0.0), rrsme(species_count, 0.0), iteration(iter) {} + SimulationErrorStats(size_t species_count, uint32_t iter, uint32_t counter) + : mape(species_count, 0.0), + rrmse(species_count, 0.0), + iteration(iter), + rollback_count(counter){} }; - std::vector error_stats_history; + std::vector error_history; + + static void computeSpeciesErrors(const std::vector &reference_values, + const std::vector &surrogate_values, + uint32_t size_per_prop, + uint32_t species_count, + SimulationErrorStats &species_error_stats); - static void computeStats(const std::vector &pqc_vector, - const std::vector &sur_vector, - uint32_t size_per_prop, uint32_t species_count, - error_stats &stats); - protected: void initializeDHT(uint32_t size_mb, const NamedVector &key_species, @@ -319,6 +332,7 @@ namespace poet enum { WORKER_PHREEQC, + WORKER_CTRL_ITER, WORKER_DHT_GET, WORKER_DHT_FILL, WORKER_IDLE, @@ -342,6 +356,7 @@ namespace poet double dht_get = 0.; double dht_fill = 0.; double idle_t = 0.; + double ctrl_t = 0.; }; struct worker_info_s @@ -410,6 +425,7 @@ namespace poet poet::DHT_Wrapper *dht = nullptr; + bool dht_fill_during_rollback{false}; bool interp_enabled{false}; std::unique_ptr interp; @@ -431,6 +447,10 @@ namespace poet double seq_t = 0.; double send_recv_t = 0.; + double ctrl_t = 0.; + double bcast_ctrl_t = 0.; + double recv_ctrl_t = 0.; + std::array base_totals{0}; bool print_progessbar{false}; @@ -449,7 +469,7 @@ namespace poet std::unique_ptr pqc_runner; - std::vector sur_shuffled; + std::vector sur_shuffled; }; } // namespace poet diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 683985134..c2710bf8b 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -41,6 +41,12 @@ std::vector poet::ChemistryModule::GetWorkerPhreeqcTimings() const { return MasterGatherWorkerTimings(WORKER_PHREEQC); } +std::vector poet::ChemistryModule::GetWorkerControlTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_CTRL_ITER); +} + std::vector poet::ChemistryModule::GetWorkerDHTGetTimings() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); @@ -160,35 +166,35 @@ std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { return ret; } -void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, - const std::vector &sur_vector, +void poet::ChemistryModule::computeSpeciesErrors(const std::vector &reference_values, + const std::vector &surrogate_values, uint32_t size_per_prop, uint32_t species_count, - error_stats &stats) { + SimulationErrorStats &species_error_stats) { for (uint32_t i = 0; i < species_count; ++i) { double err_sum = 0.0; double sqr_err_sum = 0.0; uint32_t base_idx = i * size_per_prop; for (uint32_t j = 0; j < size_per_prop; ++j) { - const double pqc_value = pqc_vector[base_idx + j]; - const double sur_value = sur_vector[base_idx + j]; + const double ref_value = reference_values[base_idx + j]; + const double sur_value = surrogate_values[base_idx + j]; - if (pqc_value == 0.0) { + if (ref_value == 0.0) { if (sur_value != 0.0) { err_sum += 1.0; sqr_err_sum += 1.0; } // Both zero: skip } else { - double alpha = 1.0 - (sur_value / pqc_value); + double alpha = 1.0 - (sur_value / ref_value); err_sum += std::abs(alpha); sqr_err_sum += alpha * alpha; } } - stats.mape[i] = 100.0 * (err_sum / size_per_prop); - stats.rrsme[i] = + species_error_stats.mape[i] = 100.0 * (err_sum / size_per_prop); + species_error_stats.rrmse[i] = (size_per_prop > 0) ? std::sqrt(sqr_err_sum / size_per_prop) : 0.0; } } @@ -264,7 +270,7 @@ inline void poet::ChemistryModule::MasterSendPkgs( worker_list_t &w_list, workpointer_t &work_pointer, workpointer_t &sur_pointer, int &pkg_to_send, int &count_pkgs, int &free_workers, double dt, uint32_t iteration, - uint32_t control_iteration, const std::vector &wp_sizes_vector) { + uint32_t control_interval, const std::vector &wp_sizes_vector) { /* declare variables */ int local_work_package_size; @@ -305,7 +311,7 @@ inline void poet::ChemistryModule::MasterSendPkgs( std::next(wp_sizes_vector.begin(), count_pkgs), 0); send_buffer[end_of_wp + 4] = wp_start_index; // whether this iteration is a control iteration - send_buffer[end_of_wp + 5] = control_iteration; + send_buffer[end_of_wp + 5] = control_interval; /* ATTENTION Worker p has rank p+1 */ // MPI_Send(send_buffer, end_of_wp + BUFFER_OFFSET, MPI_DOUBLE, p + 1, @@ -329,6 +335,7 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, int need_to_receive = 1; double idle_a, idle_b; int p, size; + double recv_a, recv_b; MPI_Status probe_status; // master_recv_a = MPI_Wtime(); @@ -361,6 +368,7 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, free_workers++; } if (probe_status.MPI_TAG == LOOP_CTRL) { + recv_a = MPI_Wtime(); MPI_Get_count(&probe_status, MPI_DOUBLE, &size); // layout of buffer is [phreeqc][surrogate] @@ -378,6 +386,8 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, w_list[p - 1].has_work = 0; pkg_to_recv -= 1; free_workers++; + recv_b = MPI_Wtime(); + this->recv_ctrl_t += recv_b - recv_a; } } } @@ -432,6 +442,10 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { int i_pkgs; int ftype; + double ctrl_a, ctrl_b; + double worker_ctrl_a, worker_ctrl_b; + double ctrl_bcast_a, ctrl_bcast_b; + const std::vector wp_sizes_vector = CalculateWPSizesVector(this->n_cells, this->wp_size); @@ -445,28 +459,44 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { MPI_INT); } - ftype = CHEM_WORK_LOOP; - PropagateFunctionType(ftype); + /* start time measurement of broadcasting interpolation status */ + ctrl_bcast_a = MPI_Wtime(); ftype = CHEM_INTERP; PropagateFunctionType(ftype); - if(this->runtime_params->rollback_simulation){ + int interp_flag = 0; + int dht_fill_flag = 0; + + if(this->runtime_params->rollback_enabled){ this->interp_enabled = false; - int interp_flag = 0; - ChemBCast(&interp_flag, 1, MPI_INT); - } else { + this->dht_fill_during_rollback = true; + interp_flag = 0; + dht_fill_flag = 1; + } + else { this->interp_enabled = true; - int interp_flag = 1; - ChemBCast(&interp_flag, 1, MPI_INT); + this->dht_fill_during_rollback = false; + interp_flag = 1; + dht_fill_flag = 0; } + ChemBCast(&interp_flag, 1, MPI_INT); + ChemBCast(&dht_fill_flag, 1, MPI_INT); + + /* end time measurement of broadcasting interpolation status */ + ctrl_bcast_b = MPI_Wtime(); + this->bcast_ctrl_t += ctrl_bcast_b - ctrl_bcast_a; + + ftype = CHEM_WORK_LOOP; + PropagateFunctionType(ftype); MPI_Barrier(this->group_comm); static uint32_t iteration = 0; - uint32_t control_iteration = static_cast( - this->runtime_params->control_iteration_active ? 1 : 0); - if (control_iteration) { + + uint32_t control_logic_enabled = this->runtime_params->control_interval_enabled ? 1 : 0; + + if (control_logic_enabled) { sur_shuffled.clear(); sur_shuffled.reserve(this->n_cells * this->prop_count); } @@ -512,7 +542,7 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { if (pkg_to_send > 0) { // send packages to all free workers ... MasterSendPkgs(worker_list, work_pointer, sur_pointer, pkg_to_send, - i_pkgs, free_workers, dt, iteration, control_iteration, + i_pkgs, free_workers, dt, iteration, control_logic_enabled, wp_sizes_vector); } // ... and try to receive them from workers who has finished their work @@ -522,39 +552,43 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { // Just to complete the progressbar std::cout << std::endl; - /* stop time measurement of chemistry time needed for send/recv loop */ - worker_chemistry_b = MPI_Wtime(); - this->send_recv_t += worker_chemistry_b - worker_chemistry_a; + /* stop time measurement of chemistry time needed for send/recv loop */ + worker_chemistry_b = MPI_Wtime(); + this->send_recv_t += worker_chemistry_b - worker_chemistry_a; - /* start time measurement of sequential part */ - seq_c = MPI_Wtime(); + /* start time measurement of sequential part */ + seq_c = MPI_Wtime(); - /* unshuffle grid */ - // grid.importAndUnshuffle(mpi_buffer); - std::vector out_vec{mpi_buffer}; - unshuffleField(mpi_buffer, this->n_cells, this->prop_count, - wp_sizes_vector.size(), out_vec); - chem_field = out_vec; + /* unshuffle grid */ + // grid.importAndUnshuffle(mpi_buffer); + std::vector out_vec{mpi_buffer}; + unshuffleField(mpi_buffer, this->n_cells, this->prop_count, + wp_sizes_vector.size(), out_vec); + chem_field = out_vec; - /* do master stuff */ + /* do master stuff */ - if (control_iteration) { - control_iteration_counter++; + /* start time measurement of control logic */ + ctrl_a = MPI_Wtime(); - std::vector sur_unshuffled{sur_shuffled}; + if (control_logic_enabled && !this->runtime_params->rollback_enabled) { - unshuffleField(sur_shuffled, this->n_cells, this->prop_count, - wp_sizes_vector.size(), sur_unshuffled); + std::vector sur_unshuffled{sur_shuffled};; - error_stats stats(this->prop_count, control_iteration_counter * - runtime_params->control_iteration); + unshuffleField(sur_shuffled, this->n_cells, this->prop_count, + wp_sizes_vector.size(), sur_unshuffled); - computeStats(out_vec, sur_unshuffled, this->n_cells, this->prop_count, - stats); - error_stats_history.push_back(stats); + SimulationErrorStats stats(this->prop_count, this->runtime_params->global_iter, this->runtime_params->rollback_counter); + + computeSpeciesErrors(out_vec, sur_unshuffled, this->n_cells, this->prop_count, stats); + + error_history.push_back(stats); + } + + /* end time measurement of control logic */ + ctrl_b = MPI_Wtime(); + this->ctrl_t += ctrl_b - ctrl_a; - // to do: control values to epsilon - } /* start time measurement of master chemistry */ sim_e_chemistry = MPI_Wtime(); diff --git a/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp b/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp index 83db27ff8..8ee59bf8f 100644 --- a/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp +++ b/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp @@ -36,315 +36,357 @@ using namespace std; -namespace poet { +namespace poet +{ -DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, - const NamedVector &key_species, - const std::vector &key_indices, - const std::vector &_output_names, - const InitialList::ChemistryHookFunctions &_hooks, - uint32_t data_count, bool _with_interp, - bool _has_het_ids) - : key_count(key_indices.size()), data_count(data_count), - input_key_elements(key_indices), communicator(dht_comm), - key_species(key_species), output_names(_output_names), hooks(_hooks), - with_interp(_with_interp), has_het_ids(_has_het_ids) { - // initialize DHT object - // key size = count of key elements + timestep - uint32_t key_size = (key_count + 1) * sizeof(Lookup_Keyelement); - uint32_t data_size = - (data_count + (with_interp ? input_key_elements.size() : 0)) * - sizeof(double); - uint32_t buckets_per_process = - static_cast(dht_size / (data_size + key_size)); - dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, - &poet::Murmur2_64A); + DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, + const NamedVector &key_species, + const std::vector &key_indices, + const std::vector &_output_names, + const InitialList::ChemistryHookFunctions &_hooks, + uint32_t data_count, bool _with_interp, + bool _has_het_ids) + : key_count(key_indices.size()), data_count(data_count), + input_key_elements(key_indices), communicator(dht_comm), + key_species(key_species), output_names(_output_names), hooks(_hooks), + with_interp(_with_interp), has_het_ids(_has_het_ids) + { + // initialize DHT object + // key size = count of key elements + timestep + uint32_t key_size = (key_count + 1) * sizeof(Lookup_Keyelement); + uint32_t data_size = + (data_count + (with_interp ? input_key_elements.size() : 0)) * + sizeof(double); + uint32_t buckets_per_process = + static_cast(dht_size / (data_size + key_size)); + dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, + &poet::Murmur2_64A); - dht_signif_vector = key_species.getValues(); + dht_signif_vector = key_species.getValues(); - // this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); + // this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); - this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); + this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); - const auto key_names = key_species.getNames(); + const auto key_names = key_species.getNames(); - auto tot_h = std::find(key_names.begin(), key_names.end(), "H"); - if (tot_h != key_names.end()) { - this->dht_prop_type_vector[tot_h - key_names.begin()] = DHT_TYPE_TOTAL; - } - auto tot_o = std::find(key_names.begin(), key_names.end(), "O"); - if (tot_o != key_names.end()) { - this->dht_prop_type_vector[tot_o - key_names.begin()] = DHT_TYPE_TOTAL; - } - auto charge = std::find(key_names.begin(), key_names.end(), "Charge"); - if (charge != key_names.end()) { - this->dht_prop_type_vector[charge - key_names.begin()] = DHT_TYPE_CHARGE; - } -} - -DHT_Wrapper::~DHT_Wrapper() { - // free DHT - DHT_free(dht_object, NULL, NULL); -} -auto DHT_Wrapper::checkDHT(WorkPackage &work_package) - -> const DHT_ResultObject & { - - const auto length = work_package.size; - - std::vector bucket_writer( - this->data_count + (with_interp ? input_key_elements.size() : 0)); - - // loop over every grid cell contained in work package - for (int i = 0; i < length; i++) { - // point to current grid cell - auto &key_vector = dht_results.keys[i]; - - // overwrite input with data from DHT, IF value is found in DHT - int res = - DHT_read(this->dht_object, key_vector.data(), bucket_writer.data()); - - switch (res) { - case DHT_SUCCESS: - work_package.output[i] = - (with_interp - ? inputAndRatesToOutput(bucket_writer, work_package.input[i]) - : bucket_writer); - work_package.mapping[i] = CHEM_DHT; - this->dht_hits++; - break; - case DHT_READ_MISS: - break; + auto tot_h = std::find(key_names.begin(), key_names.end(), "H"); + if (tot_h != key_names.end()) + { + this->dht_prop_type_vector[tot_h - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto tot_o = std::find(key_names.begin(), key_names.end(), "O"); + if (tot_o != key_names.end()) + { + this->dht_prop_type_vector[tot_o - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto charge = std::find(key_names.begin(), key_names.end(), "Charge"); + if (charge != key_names.end()) + { + this->dht_prop_type_vector[charge - key_names.begin()] = DHT_TYPE_CHARGE; } } - return dht_results; -} + DHT_Wrapper::~DHT_Wrapper() + { + // free DHT + DHT_free(dht_object, NULL, NULL); + } + auto DHT_Wrapper::checkDHT(WorkPackage &work_package) + -> const DHT_ResultObject & + { -void DHT_Wrapper::fillDHT(const WorkPackage &work_package) { + const auto length = work_package.size; - const auto length = work_package.size; + std::vector bucket_writer( + this->data_count + (with_interp ? input_key_elements.size() : 0)); - // loop over every grid cell contained in work package - dht_results.locations.resize(length); - dht_results.filledDHT = std::vector(length, false); - for (int i = 0; i < length; i++) { - // If true grid cell was simulated, needs to be inserted into dht - if (work_package.mapping[i] != CHEM_PQC) { - continue; - } + // loop over every grid cell contained in work package + for (int i = 0; i < length; i++) + { + // point to current grid cell + auto &key_vector = dht_results.keys[i]; - if (work_package.input[i][0] != 2) { - continue; - } + // overwrite input with data from DHT, IF value is found in DHT + int res = + DHT_read(this->dht_object, key_vector.data(), bucket_writer.data()); - // check if calcite or dolomite is absent and present, resp.n and vice - // versa in input/output. If this is the case -> Do not write to DHT! - // HACK: hardcoded, should be fixed! - if (hooks.dht_fill.isValid()) { - NamedVector old_values(output_names, work_package.input[i]); - NamedVector new_values(output_names, work_package.output[i]); - - if (Rcpp::as(hooks.dht_fill(old_values, new_values))) { - continue; + switch (res) + { + case DHT_SUCCESS: + work_package.output[i] = + (with_interp + ? inputAndRatesToOutput(bucket_writer, work_package.input[i]) + : bucket_writer); + work_package.mapping[i] = CHEM_DHT; + this->dht_hits++; + break; + case DHT_READ_MISS: + break; } } - uint32_t proc, index; - auto &key = dht_results.keys[i]; - const auto data = - (with_interp ? outputToInputAndRates(work_package.input[i], - work_package.output[i]) - : work_package.output[i]); - // void *data = (void *)&(work_package[i * this->data_count]); - // fuzz data (round, logarithm etc.) + return dht_results; + } - // insert simulated data with fuzzed key into DHT - int res = DHT_write(this->dht_object, key.data(), - const_cast(data.data()), &proc, &index); + void DHT_Wrapper::fillDHT(const WorkPackage &work_package) + { - dht_results.locations[i] = {proc, index}; + const auto length = work_package.size; - // if data was successfully written ... - if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { - dht_evictions++; + // loop over every grid cell contained in work package + dht_results.locations.resize(length); + dht_results.filledDHT = std::vector(length, false); + for (int i = 0; i < length; i++) + { + // If true grid cell was simulated, needs to be inserted into dht + if (work_package.mapping[i] != CHEM_PQC) + { + continue; + } + + if (work_package.input[i][1] != 2) + { + continue; + } + + // check if calcite or dolomite is absent and present, resp.n and vice + // versa in input/output. If this is the case -> Do not write to DHT! + // HACK: hardcoded, should be fixed! + if (hooks.dht_fill.isValid()) + { + NamedVector old_values(output_names, work_package.input[i]); + NamedVector new_values(output_names, work_package.output[i]); + + if (Rcpp::as(hooks.dht_fill(old_values, new_values))) + { + continue; + } + } + + uint32_t proc, index; + auto &key = dht_results.keys[i]; + const auto data = + (with_interp ? outputToInputAndRates(work_package.input[i], + work_package.output[i]) + : work_package.output[i]); + // void *data = (void *)&(work_package[i * this->data_count]); + // fuzz data (round, logarithm etc.) + + // insert simulated data with fuzzed key into DHT + int res = DHT_write(this->dht_object, key.data(), + const_cast(data.data()), &proc, &index); + + dht_results.locations[i] = {proc, index}; + + // if data was successfully written ... + if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) + { + dht_evictions++; + } + + dht_results.filledDHT[i] = true; + } + } + + inline std::vector + DHT_Wrapper::outputToInputAndRates(const std::vector &old_results, + const std::vector &new_results) + { + const int prefix_size = this->input_key_elements.size(); + + std::vector output(prefix_size + this->data_count); + std::copy(new_results.begin(), new_results.end(), + output.begin() + prefix_size); + + for (int i = 0; i < prefix_size; i++) + { + const int data_elem_i = input_key_elements[i]; + output[i] = old_results[data_elem_i]; + output[prefix_size + data_elem_i] -= old_results[data_elem_i]; } - dht_results.filledDHT[i] = true; - } -} - -inline std::vector -DHT_Wrapper::outputToInputAndRates(const std::vector &old_results, - const std::vector &new_results) { - const int prefix_size = this->input_key_elements.size(); - - std::vector output(prefix_size + this->data_count); - std::copy(new_results.begin(), new_results.end(), - output.begin() + prefix_size); - - for (int i = 0; i < prefix_size; i++) { - const int data_elem_i = input_key_elements[i]; - output[i] = old_results[data_elem_i]; - output[prefix_size + data_elem_i] -= old_results[data_elem_i]; + return output; } - return output; -} + inline std::vector + DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data, + const std::vector &input_values) + { + const int prefix_size = this->input_key_elements.size(); -inline std::vector -DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data, - const std::vector &input_values) { - const int prefix_size = this->input_key_elements.size(); + std::vector output(input_values); - std::vector output(input_values); + for (int i = 0; i < prefix_size; i++) + { + const int data_elem_i = input_key_elements[i]; + output[data_elem_i] += dht_data[i]; + } - for (int i = 0; i < prefix_size; i++) { - const int data_elem_i = input_key_elements[i]; - output[data_elem_i] += dht_data[i]; + return output; } - return output; -} + inline std::vector + DHT_Wrapper::outputToRates(const std::vector &old_results, + const std::vector &new_results) + { + std::vector output(new_results); -inline std::vector -DHT_Wrapper::outputToRates(const std::vector &old_results, - const std::vector &new_results) { - std::vector output(new_results); + for (const auto &data_elem_i : input_key_elements) + { + output[data_elem_i] -= old_results[data_elem_i]; + } - for (const auto &data_elem_i : input_key_elements) { - output[data_elem_i] -= old_results[data_elem_i]; + return output; } - return output; -} + inline std::vector + DHT_Wrapper::ratesToOutput(const std::vector &dht_data, + const std::vector &input_values) + { + std::vector output(input_values); -inline std::vector -DHT_Wrapper::ratesToOutput(const std::vector &dht_data, - const std::vector &input_values) { - std::vector output(input_values); + for (const auto &data_elem_i : input_key_elements) + { + output[data_elem_i] += dht_data[data_elem_i]; + } - for (const auto &data_elem_i : input_key_elements) { - output[data_elem_i] += dht_data[data_elem_i]; + return output; } - return output; -} + // void DHT_Wrapper::resultsToWP(std::vector &work_package) { + // for (int i = 0; i < dht_results.length; i++) { + // if (!dht_results.needPhreeqc[i]) { + // std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), + // work_package.begin() + (data_count * i)); + // } + // } + // } -// void DHT_Wrapper::resultsToWP(std::vector &work_package) { -// for (int i = 0; i < dht_results.length; i++) { -// if (!dht_results.needPhreeqc[i]) { -// std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), -// work_package.begin() + (data_count * i)); -// } -// } -// } - -int DHT_Wrapper::tableToFile(const char *filename) { - int res = DHT_to_file(dht_object, filename); - return res; -} - -int DHT_Wrapper::fileToTable(const char *filename) { - int res = DHT_from_file(dht_object, filename); - if (res != DHT_SUCCESS) + int DHT_Wrapper::tableToFile(const char *filename) + { + int res = DHT_to_file(dht_object, filename); return res; + } + + int DHT_Wrapper::fileToTable(const char *filename) + { + int res = DHT_from_file(dht_object, filename); + if (res != DHT_SUCCESS) + return res; #ifdef DHT_STATISTICS - DHT_print_statistics(dht_object); + DHT_print_statistics(dht_object); #endif - return DHT_SUCCESS; -} - -void DHT_Wrapper::printStatistics() { - int res; - - res = DHT_print_statistics(dht_object); - - if (res != DHT_SUCCESS) { - // MPI ERROR ... WHAT TO DO NOW? - // RUNNING CIRCLES WHILE SCREAMING + return DHT_SUCCESS; } -} -LookupKey DHT_Wrapper::fuzzForDHT_R(const std::vector &cell, - double dt) { - const auto c_zero_val = std::pow(10, AQUEOUS_EXP); + void DHT_Wrapper::printStatistics() + { + int res; - NamedVector input_nv(this->output_names, cell); + res = DHT_print_statistics(dht_object); - const std::vector eval_vec = - Rcpp::as>(hooks.dht_fuzz(input_nv)); - assert(eval_vec.size() == this->key_count); - LookupKey vecFuzz(this->key_count + 1 + has_het_ids, {.0}); + if (res != DHT_SUCCESS) + { + // MPI ERROR ... WHAT TO DO NOW? + // RUNNING CIRCLES WHILE SCREAMING + } + } - DHT_Rounder rounder; + LookupKey DHT_Wrapper::fuzzForDHT_R(const std::vector &cell, + double dt) + { + const auto c_zero_val = std::pow(10, AQUEOUS_EXP); - int totals_i = 0; - // introduce fuzzing to allow more hits in DHT - // loop over every variable of grid cell - for (std::uint32_t i = 0; i < eval_vec.size(); i++) { - double curr_key = eval_vec[i]; - if (curr_key != 0) { - if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { - curr_key -= base_totals[totals_i++]; + NamedVector input_nv(this->output_names, cell); + + const std::vector eval_vec = + Rcpp::as>(hooks.dht_fuzz(input_nv)); + assert(eval_vec.size() == this->key_count); + LookupKey vecFuzz(this->key_count + 1 + has_het_ids, {.0}); + + DHT_Rounder rounder; + + int totals_i = 0; + // introduce fuzzing to allow more hits in DHT + // loop over every variable of grid cell + for (std::uint32_t i = 0; i < eval_vec.size(); i++) + { + double curr_key = eval_vec[i]; + if (curr_key != 0) + { + if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) + { + curr_key -= base_totals[totals_i++]; + } + vecFuzz[i] = + rounder.round(curr_key, dht_signif_vector[i], + this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); } - vecFuzz[i] = - rounder.round(curr_key, dht_signif_vector[i], - this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); } - } - // add timestep to the end of the key as double value - vecFuzz[this->key_count].fp_element = dt; - if (has_het_ids) { - vecFuzz[this->key_count + 1].fp_element = cell[0]; + // add timestep to the end of the key as double value + vecFuzz[this->key_count].fp_element = dt; + if (has_het_ids) + { + vecFuzz[this->key_count + 1].fp_element = cell[0]; + } + + return vecFuzz; } - return vecFuzz; -} + LookupKey DHT_Wrapper::fuzzForDHT(const std::vector &cell, double dt) + { + const auto c_zero_val = std::pow(10, AQUEOUS_EXP); -LookupKey DHT_Wrapper::fuzzForDHT(const std::vector &cell, double dt) { - const auto c_zero_val = std::pow(10, AQUEOUS_EXP); + LookupKey vecFuzz(this->key_count + 1 + has_het_ids, {.0}); + DHT_Rounder rounder; - LookupKey vecFuzz(this->key_count + 1 + has_het_ids, {.0}); - DHT_Rounder rounder; - - int totals_i = 0; - // introduce fuzzing to allow more hits in DHT - // loop over every variable of grid cell - for (std::uint32_t i = 0; i < input_key_elements.size(); i++) { - if (input_key_elements[i] == DHT_KEY_INPUT_CUSTOM) { - continue; - } - double curr_key = cell[input_key_elements[i]]; - if (curr_key != 0) { - if (curr_key < c_zero_val && - this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { + int totals_i = 0; + // introduce fuzzing to allow more hits in DHT + // loop over every variable of grid cell + for (std::uint32_t i = 0; i < input_key_elements.size(); i++) + { + if (input_key_elements[i] == DHT_KEY_INPUT_CUSTOM) + { continue; } - if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { - curr_key -= base_totals[totals_i++]; + double curr_key = cell[input_key_elements[i]]; + if (curr_key != 0) + { + if (curr_key < c_zero_val && + this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) + { + continue; + } + if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) + { + curr_key -= base_totals[totals_i++]; + } + vecFuzz[i] = + rounder.round(curr_key, dht_signif_vector[i], + this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); } - vecFuzz[i] = - rounder.round(curr_key, dht_signif_vector[i], - this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); } - } - // add timestep to the end of the key as double value - vecFuzz[this->key_count].fp_element = dt; - if (has_het_ids) { - vecFuzz[this->key_count + 1].fp_element = cell[0]; + // add timestep to the end of the key as double value + vecFuzz[this->key_count].fp_element = dt; + if (has_het_ids) + { + vecFuzz[this->key_count + 1].fp_element = cell[0]; + } + + return vecFuzz; } - return vecFuzz; -} + void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) + { + if (signif_vec.size() != this->key_count) + { + throw std::runtime_error( + "Significant vector size mismatches count of key elements."); + } -void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) { - if (signif_vec.size() != this->key_count) { - throw std::runtime_error( - "Significant vector size mismatches count of key elements."); + this->dht_signif_vector = signif_vec; } - - this->dht_signif_vector = signif_vec; -} } // namespace poet diff --git a/src/Chemistry/SurrogateModels/InterpolationModule.cpp b/src/Chemistry/SurrogateModels/InterpolationModule.cpp index db730d00d..e3b93c599 100644 --- a/src/Chemistry/SurrogateModels/InterpolationModule.cpp +++ b/src/Chemistry/SurrogateModels/InterpolationModule.cpp @@ -25,152 +25,175 @@ #include #include -extern "C" { +extern "C" +{ #include "DHT.h" } -namespace poet { +namespace poet +{ -InterpolationModule::InterpolationModule( - std::uint32_t entries_per_bucket, std::uint64_t size_per_process, - std::uint32_t min_entries_needed, DHT_Wrapper &dht, - const NamedVector &interp_key_signifs, - const std::vector &dht_key_indices, - const std::vector &_out_names, - const InitialList::ChemistryHookFunctions &_hooks) - : dht_instance(dht), key_signifs(interp_key_signifs), - key_indices(dht_key_indices), min_entries_needed(min_entries_needed), - dht_names(dht.getKeySpecies().getNames()), out_names(_out_names), - hooks(_hooks) { + InterpolationModule::InterpolationModule( + std::uint32_t entries_per_bucket, std::uint64_t size_per_process, + std::uint32_t min_entries_needed, DHT_Wrapper &dht, + const NamedVector &interp_key_signifs, + const std::vector &dht_key_indices, + const std::vector &_out_names, + const InitialList::ChemistryHookFunctions &_hooks) + : dht_instance(dht), key_signifs(interp_key_signifs), + key_indices(dht_key_indices), min_entries_needed(min_entries_needed), + dht_names(dht.getKeySpecies().getNames()), out_names(_out_names), + hooks(_hooks) + { - initPHT(this->key_signifs.size(), entries_per_bucket, size_per_process, - dht.getCommunicator()); + initPHT(this->key_signifs.size(), entries_per_bucket, size_per_process, + dht.getCommunicator()); - pht->setSourceDHT(dht.getDHT()); -} + pht->setSourceDHT(dht.getDHT()); + } -void InterpolationModule::initPHT(std::uint32_t key_count, - std::uint32_t entries_per_bucket, - std::uint32_t size_per_process, - MPI_Comm communicator) { - uint32_t key_size = key_count * sizeof(Lookup_Keyelement) + sizeof(double); - uint32_t data_size = sizeof(DHT_Location); + void InterpolationModule::initPHT(std::uint32_t key_count, + std::uint32_t entries_per_bucket, + std::uint32_t size_per_process, + MPI_Comm communicator) + { + uint32_t key_size = key_count * sizeof(Lookup_Keyelement) + sizeof(double); + uint32_t data_size = sizeof(DHT_Location); - pht = std::make_unique( - key_size, data_size, entries_per_bucket, size_per_process, communicator); -} + pht = std::make_unique( + key_size, data_size, entries_per_bucket, size_per_process, communicator); + } -void InterpolationModule::writePairs() { - const auto in = this->dht_instance.getDHTResults(); - for (int i = 0; i < in.filledDHT.size(); i++) { - if (in.filledDHT[i]) { - const auto coarse_key = roundKey(in.keys[i]); - pht->writeLocationToPHT(coarse_key, in.locations[i]); + void InterpolationModule::writePairs() + { + const auto in = this->dht_instance.getDHTResults(); + for (int i = 0; i < in.filledDHT.size(); i++) + { + if (in.filledDHT[i]) + { + const auto coarse_key = roundKey(in.keys[i]); + pht->writeLocationToPHT(coarse_key, in.locations[i]); + } } } -} -void InterpolationModule::tryInterpolation(WorkPackage &work_package) { - interp_result.status.resize(work_package.size, NOT_NEEDED); + void InterpolationModule::tryInterpolation(WorkPackage &work_package) + { + interp_result.status.resize(work_package.size, NOT_NEEDED); - const auto dht_results = this->dht_instance.getDHTResults(); + const auto dht_results = this->dht_instance.getDHTResults(); - for (int wp_i = 0; wp_i < work_package.size; wp_i++) { - if (work_package.input[wp_i][0] != 2) { - interp_result.status[wp_i] = INSUFFICIENT_DATA; - continue; - } - - if (work_package.mapping[wp_i] != CHEM_PQC) { - interp_result.status[wp_i] = NOT_NEEDED; - continue; - } - - const auto rounded_key = roundKey(dht_results.keys[wp_i]); - - auto pht_result = - pht->query(rounded_key, this->min_entries_needed, - dht_instance.getInputCount(), dht_instance.getOutputCount()); - - if (pht_result.size < this->min_entries_needed) { - interp_result.status[wp_i] = INSUFFICIENT_DATA; - continue; - } - - if (hooks.interp_pre.isValid()) { - NamedVector nv_in(this->out_names, work_package.input[wp_i]); - - std::vector rm_indices = Rcpp::as>( - hooks.interp_pre(nv_in, pht_result.in_values)); - - pht_result.size -= rm_indices.size(); - - if (pht_result.size < this->min_entries_needed) { + for (int wp_i = 0; wp_i < work_package.size; wp_i++) + { + if (work_package.input[wp_i][1] != 2) + { interp_result.status[wp_i] = INSUFFICIENT_DATA; continue; } - for (const auto &index : rm_indices) { - pht_result.in_values.erase( - std::next(pht_result.in_values.begin(), index - 1)); - pht_result.out_values.erase( - std::next(pht_result.out_values.begin(), index - 1)); + if (work_package.mapping[wp_i] != CHEM_PQC) + { + interp_result.status[wp_i] = NOT_NEEDED; + continue; } - } -#ifdef POET_PHT_ADD - this->pht->incrementReadCounter(roundKey(rounded_key)); -#endif + const auto rounded_key = roundKey(dht_results.keys[wp_i]); - const int cell_id = static_cast(work_package.input[wp_i][0]); + auto pht_result = + pht->query(rounded_key, this->min_entries_needed, + dht_instance.getInputCount(), dht_instance.getOutputCount()); - if (!to_calc_cache.contains(cell_id)) { - const std::vector &to_calc = dht_instance.getKeyElements(); - std::vector keep_indices; + if (pht_result.size < this->min_entries_needed) + { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } - for (std::size_t i = 0; i < to_calc.size(); i++) { - if (!std::isnan(work_package.input[wp_i][to_calc[i]])) { - keep_indices.push_back(to_calc[i]); + if (hooks.interp_pre.isValid()) + { + NamedVector nv_in(this->out_names, work_package.input[wp_i]); + + std::vector rm_indices = Rcpp::as>( + hooks.interp_pre(nv_in, pht_result.in_values)); + + pht_result.size -= rm_indices.size(); + + if (pht_result.size < this->min_entries_needed) + { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } + + for (const auto &index : rm_indices) + { + pht_result.in_values.erase( + std::next(pht_result.in_values.begin(), index - 1)); + pht_result.out_values.erase( + std::next(pht_result.out_values.begin(), index - 1)); } } - to_calc_cache[cell_id] = keep_indices; +#ifdef POET_PHT_ADD + this->pht->incrementReadCounter(roundKey(rounded_key)); +#endif + + const int cell_id = static_cast(work_package.input[wp_i][1]); + + if (!to_calc_cache.contains(cell_id)) + { + const std::vector &to_calc = dht_instance.getKeyElements(); + std::vector keep_indices; + + for (std::size_t i = 0; i < to_calc.size(); i++) + { + if (!std::isnan(work_package.input[wp_i][to_calc[i]])) + { + keep_indices.push_back(to_calc[i]); + } + } + + to_calc_cache[cell_id] = keep_indices; + } + + double start_fc = MPI_Wtime(); + + work_package.output[wp_i] = + f_interpolate(to_calc_cache[cell_id], work_package.input[wp_i], + pht_result.in_values, pht_result.out_values); + + if (hooks.interp_post.isValid()) + { + NamedVector nv_result(this->out_names, work_package.output[wp_i]); + if (Rcpp::as(hooks.interp_post(nv_result))) + { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } + } + + // interp_result.results[i][0] = mean_water; + this->interp_t += MPI_Wtime() - start_fc; + + this->interpolations++; + + work_package.mapping[wp_i] = CHEM_INTERP; + interp_result.status[wp_i] = RES_OK; } + } - double start_fc = MPI_Wtime(); - - work_package.output[wp_i] = - f_interpolate(to_calc_cache[cell_id], work_package.input[wp_i], - pht_result.in_values, pht_result.out_values); - - if (hooks.interp_post.isValid()) { - NamedVector nv_result(this->out_names, work_package.output[wp_i]); - if (Rcpp::as(hooks.interp_post(nv_result))) { - interp_result.status[wp_i] = INSUFFICIENT_DATA; - continue; + void InterpolationModule::resultsToWP(std::vector &work_package) const + { + for (uint32_t i = 0; i < interp_result.status.size(); i++) + { + if (interp_result.status[i] == RES_OK) + { + const std::size_t length = + interp_result.results[i].end() - interp_result.results[i].begin(); + std::copy(interp_result.results[i].begin(), + interp_result.results[i].end(), + work_package.begin() + (length * i)); } } - - // interp_result.results[i][0] = mean_water; - this->interp_t += MPI_Wtime() - start_fc; - - this->interpolations++; - - work_package.mapping[wp_i] = CHEM_INTERP; - interp_result.status[wp_i] = RES_OK; } -} - -void InterpolationModule::resultsToWP(std::vector &work_package) const { - for (uint32_t i = 0; i < interp_result.status.size(); i++) { - if (interp_result.status[i] == RES_OK) { - const std::size_t length = - interp_result.results[i].end() - interp_result.results[i].begin(); - std::copy(interp_result.results[i].begin(), - interp_result.results[i].end(), - work_package.begin() + (length * i)); - } - } -} } // namespace poet diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index b354f986d..4406ec65d 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -69,9 +69,14 @@ namespace poet } case CHEM_INTERP: { - int interp_flag; + int interp_flag = 0; + int dht_fill_flag = 0; + ChemBCast(&interp_flag, 1, MPI_INT); + ChemBCast(&dht_fill_flag, 1, MPI_INT); + this->interp_enabled = (interp_flag == 1); + this->dht_fill_during_rollback = (dht_fill_flag == 1); break; } case CHEM_WORK_LOOP: @@ -150,13 +155,14 @@ namespace poet double dht_get_start, dht_get_end; double phreeqc_time_start, phreeqc_time_end; double dht_fill_start, dht_fill_end; + double ctrl_time_c, ctrl_time_d; uint32_t iteration; double dt; double current_sim_time; uint32_t wp_start_index; int count = double_count; - bool control_iteration_active = false; + bool control_logic_enabled = false; std::vector mpi_buffer(count); /* receive */ @@ -183,7 +189,7 @@ namespace poet // current work package start location in field wp_start_index = mpi_buffer[count + 4]; - control_iteration_active = (mpi_buffer[count + 5] == 1); + control_logic_enabled = (mpi_buffer[count + 5] == 1); for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { @@ -229,7 +235,7 @@ namespace poet poet::WorkPackage s_curr_wp_control = s_curr_wp; - if (control_iteration_active) + if (control_logic_enabled) { for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) { @@ -240,12 +246,15 @@ namespace poet phreeqc_time_start = MPI_Wtime(); - WorkerRunWorkPackage(control_iteration_active ? s_curr_wp_control : s_curr_wp, current_sim_time, dt); + WorkerRunWorkPackage(control_logic_enabled ? s_curr_wp_control : s_curr_wp, current_sim_time, dt); phreeqc_time_end = MPI_Wtime(); - if (control_iteration_active) - { + if (control_logic_enabled) + { + /* start time measurement for copying control workpackage */ + ctrl_time_c = MPI_Wtime(); + std::size_t sur_wp_offset = s_curr_wp.size * this->prop_count; mpi_buffer.resize(count + sur_wp_offset); @@ -275,6 +284,10 @@ namespace poet } count += sur_wp_offset; + + /* end time measurement for copying control workpackage */ + ctrl_time_d = MPI_Wtime(); + timings.ctrl_t += ctrl_time_d - ctrl_time_c; } else { @@ -288,14 +301,14 @@ namespace poet /* send results to master */ MPI_Request send_req; - int mpi_tag = control_iteration_active ? LOOP_CTRL : LOOP_WORK; + int mpi_tag = control_logic_enabled ? LOOP_CTRL : LOOP_WORK; MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD, &send_req); - if (dht_enabled || interp_enabled) + if (dht_enabled || interp_enabled || dht_fill_during_rollback) { /* write results to DHT */ dht_fill_start = MPI_Wtime(); - dht->fillDHT(control_iteration_active ? s_curr_wp_control : s_curr_wp); + dht->fillDHT(control_logic_enabled ? s_curr_wp_control : s_curr_wp); dht_fill_end = MPI_Wtime(); if (interp_enabled) @@ -306,7 +319,6 @@ namespace poet } timings.phreeqc_t += phreeqc_time_end - phreeqc_time_start; - MPI_Wait(&send_req, MPI_STATUS_IGNORE); } @@ -460,6 +472,12 @@ namespace poet this->group_comm); break; } + case WORKER_CTRL_ITER: + { + MPI_Gather(&timings.ctrl_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; + } case WORKER_DHT_GET: { MPI_Gather(&timings.dht_get, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, diff --git a/src/IO/StatsIO.cpp b/src/IO/StatsIO.cpp index 4312a46dd..5155ffd1f 100644 --- a/src/IO/StatsIO.cpp +++ b/src/IO/StatsIO.cpp @@ -2,10 +2,11 @@ #include #include #include +#include // for std::setw and std::setprecision namespace poet { - void writeStatsToCSV(const std::vector &all_stats, + void writeStatsToCSV(const std::vector &all_stats, const std::vector &species_names, const std::string &filename) { @@ -17,21 +18,32 @@ namespace poet } // header - out << "Iteration, Species, MAPE, RRSME \n"; + out << std::left << std::setw(15) << "Iteration" + << std::setw(15) << "Rollback" + << std::setw(15) << "Species" + << std::setw(15) << "MAPE" + << std::setw(15) << "RRSME" << "\n"; + out << std::string(75, '-') << "\n"; // separator line + + // data rows for (size_t i = 0; i < all_stats.size(); ++i) { for (size_t j = 0; j < species_names.size(); ++j) { - out << all_stats[i].iteration << ",\t" - << species_names[j] << ",\t" - << all_stats[i].mape[j] << ",\t" - << all_stats[i].rrsme[j] << "\n"; + out << std::left + << std::setw(15) << all_stats[i].iteration + << std::setw(15) << all_stats[i].rollback_count + << std::setw(15) << species_names[j] + << std::setw(15) << all_stats[i].mape[j] + << std::setw(15) << all_stats[i].rrmse[j] + << "\n"; } - out << std::endl; + out << "\n"; // blank line between iterations } out.close(); std::cout << "Stats written to " << filename << "\n"; } -} // namespace poet \ No newline at end of file +} + // namespace poet \ No newline at end of file diff --git a/src/IO/StatsIO.hpp b/src/IO/StatsIO.hpp index a865cc64a..a7fd1c606 100644 --- a/src/IO/StatsIO.hpp +++ b/src/IO/StatsIO.hpp @@ -3,7 +3,7 @@ namespace poet { - void writeStatsToCSV(const std::vector &all_stats, + void writeStatsToCSV(const std::vector &all_stats, const std::vector &species_names, const std::string &filename); } // namespace poet diff --git a/src/poet.cpp b/src/poet.cpp index 0f558b5d7..4cc308788 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -253,7 +253,6 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) try { - Rcpp::List init_params_(ReadRObj_R(init_file)); params.init_params = init_params_; @@ -266,14 +265,12 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) params.timesteps = Rcpp::as>(global_rt_setup->operator[]("timesteps")); - params.control_iteration = - Rcpp::as(global_rt_setup->operator[]("control_iteration")); - params.species_epsilon = - Rcpp::as>(global_rt_setup->operator[]("species_epsilon")); - params.penalty_iteration = - Rcpp::as(global_rt_setup->operator[]("penalty_iteration")); - params.max_penalty_iteration = - Rcpp::as(global_rt_setup->operator[]("max_penalty_iteration")); + params.control_interval = + Rcpp::as(global_rt_setup->operator[]("control_interval")); + params.checkpoint_interval = + Rcpp::as(global_rt_setup->operator[]("checkpoint_interval")); + params.mape_threshold = + Rcpp::as>(global_rt_setup->operator[]("mape_threshold")); } catch (const std::exception &e) { @@ -304,53 +301,38 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) *global_rt_setup = R["setup"]; } -bool checkAndRollback(ChemistryModule &chem, RuntimeParameters ¶ms, uint32_t &iter) +bool triggerRollbackIfExceeded(ChemistryModule &chem, RuntimeParameters ¶ms, uint32_t ¤t_iteration) { - const std::vector &latest_mape = chem.error_stats_history.back().mape; + const std::vector &mape_values = chem.error_history.back().mape; - for (uint32_t j = 0; j < params.species_epsilon.size(); j++) + for (uint32_t i = 0; i < params.mape_threshold.size(); i++) { - if (params.species_epsilon[j] < latest_mape[j] && latest_mape[j] != 0) + // Skip if no meaningful MAPE value + if(mape_values[i] == 0){ + continue; + } + if (mape_values[i] > params.mape_threshold[i]) { - uint32_t rollback_iter = iter - (iter % params.control_iteration); + uint32_t rollback_iteration = ((current_iteration - 1) / params.checkpoint_interval) * params.checkpoint_interval; - std::cout << chem.getField().GetProps()[j] << " with a MAPE value of " << latest_mape[j] << " exceeds epsilon of " - << params.species_epsilon[j] << "! " << std::endl; + MSG("[THRESHOLD EXCEEDED] " + chem.getField().GetProps()[i] + " has MAPE = " + + std::to_string(mape_values[i]) + " exceeding threshold = " + std::to_string(params.mape_threshold[i]) + + " → rolling back to iteration " + std::to_string(rollback_iteration)); Checkpoint_s checkpoint_read{.field = chem.getField()}; - read_checkpoint("checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); - iter = checkpoint_read.iteration; + read_checkpoint("checkpoint" + std::to_string(rollback_iteration) + ".hdf5", checkpoint_read); + current_iteration = checkpoint_read.iteration; + // Rollback happend return true; - } + } } - MSG("All spezies are below their threshold values"); + + MSG("All species are within their error thresholds."); return false; } -void updatePenaltyLogic(RuntimeParameters ¶ms, bool roolback_happend) -{ - if (roolback_happend) - { - params.rollback_simulation = true; - params.penalty_counter = params.penalty_iteration; - std::cout << "Penalty counter reset to: " << params.penalty_counter << std::endl; - MSG("Rollback! Penalty phase started for " + std::to_string(params.penalty_iteration) + " iterations."); - } - else - { - if (params.rollback_simulation && params.penalty_counter == 0) - { - params.rollback_simulation = false; - MSG("Penalty phase ended. Interpolation re-enabled."); - } - else if (!params.rollback_simulation) - { - params.penalty_iteration = std::min(params.penalty_iteration *= 2, params.max_penalty_iteration); - MSG("Stable surrogate phase detected. Penalty iteration doubled to " + std::to_string(params.penalty_iteration) + " iterations."); - } - } -} + static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, DiffusionModule &diffusion, @@ -367,21 +349,25 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, } R["TMP_PROPS"] = Rcpp::wrap(chem.getField().GetProps()); - params.next_penalty_check = params.penalty_iteration; - /* SIMULATION LOOP */ double dSimTime{0}; + double chkTime = 0.0; + for (uint32_t iter = 1; iter < maxiter + 1; iter++) { - // Penalty countdown - if (params.rollback_simulation && params.penalty_counter > 0) - { - params.penalty_counter--; - std::cout << "Penalty counter: " << params.penalty_counter << std::endl; + // Rollback countdowm + if (params.rollback_enabled) { + if (params.sur_disabled_counter > 0) { + --params.sur_disabled_counter; + MSG("Rollback counter: " + std::to_string(params.sur_disabled_counter)); + } else { + params.rollback_enabled = false; + } } - params.control_iteration_active = (iter % params.control_iteration == 0 /* && iter != 0 */); + params.global_iter = iter; + params.control_interval_enabled = (iter % params.control_interval == 0); double start_t = MPI_Wtime(); @@ -495,20 +481,27 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); - if (iter % params.control_iteration == 0) - { - writeStatsToCSV(chem.error_stats_history, chem.getField().GetProps(), "stats_overview"); + double chk_start = MPI_Wtime(); + + if(iter % params.checkpoint_interval == 0){ + MSG("Writing checkpoint of iteration " + std::to_string(iter)); write_checkpoint("checkpoint" + std::to_string(iter) + ".hdf5", {.field = chem.getField(), .iteration = iter}); } - if (iter == params.next_penalty_check) + if (params.control_interval_enabled && !params.rollback_enabled) { - bool roolback_happend = checkAndRollback(chem, params, iter); - updatePenaltyLogic(params, roolback_happend); + writeStatsToCSV(chem.error_history, chem.getField().GetProps(), "stats_overview"); - params.next_penalty_check = iter + params.penalty_iteration; + if(triggerRollbackIfExceeded(chem, params, iter)){ + params.rollback_enabled = true; + params.rollback_counter ++; + params.sur_disabled_counter = params.control_interval; + MSG("Interpolation disabled for the next " + std::to_string(params.control_interval) + "."); + } } + double chk_end = MPI_Wtime(); + chkTime += chk_end - chk_start; // MSG(); } // END SIMULATION LOOP @@ -526,6 +519,13 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, Rcpp::List diffusion_profiling; diffusion_profiling["simtime"] = diffusion.getTransportTime(); + Rcpp::List ctrl_profiling; + ctrl_profiling["checkpointing_time"] = chkTime; + ctrl_profiling["ctrl_logic_master"] = chem.GetMasterCtrlLogicTime(); + ctrl_profiling["bcast_ctrl_logic_master"] = chem.GetMasterCtrlBcastTime(); + ctrl_profiling["recv_ctrl_logic_maser"] = chem.GetMasterRecvCtrlLogicTime(); + ctrl_profiling["ctrl_logic_worker"] = Rcpp::wrap(chem.GetWorkerControlTimings()); + if (params.use_dht) { chem_profiling["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); @@ -554,6 +554,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, profiling["simtime"] = dSimTime; profiling["chemistry"] = chem_profiling; profiling["diffusion"] = diffusion_profiling; + profiling["ctrl_logic"] = ctrl_profiling; + chem.MasterLoopBreak(); diff --git a/src/poet.hpp.in b/src/poet.hpp.in index a5b82c150..6f9f0fabf 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -38,10 +38,10 @@ static const inline std::string ai_surrogate_r_library = R"(@R_AI_SURROGATE_LIB@)"; static const inline std::string r_runtime_parameters = "mysetup"; -struct RuntimeParameters { +struct RuntimeParameters +{ std::string out_dir; std::vector timesteps; - std::vector species_epsilon; Rcpp::List init_params; @@ -52,13 +52,15 @@ struct RuntimeParameters { bool print_progress = false; - std::uint32_t penalty_iteration = 0; - std::uint32_t max_penalty_iteration = 0; - std::uint32_t penalty_counter = 0; - std::uint32_t next_penalty_check = 0; - bool rollback_simulation = false; - bool control_iteration_active = false; - std::uint32_t control_iteration = 1; + bool rollback_enabled = false; + bool control_interval_enabled = false; + std::uint32_t global_iter = 0; + std::uint32_t sur_disabled_counter = 0; + std::uint32_t rollback_counter = 0; + std::uint32_t checkpoint_interval = 0; + std::uint32_t control_interval = 0; + std::vector mape_threshold; + std::vector rrmse_threshold; static constexpr std::uint32_t WORK_PACKAGE_SIZE_DEFAULT = 32; std::uint32_t work_package_size = WORK_PACKAGE_SIZE_DEFAULT; From abb756d118c005f5e5acca7436c145bc8ff5debc Mon Sep 17 00:00:00 2001 From: rastogi Date: Wed, 15 Oct 2025 11:32:29 +0200 Subject: [PATCH 15/19] Added rrmse threshold values --- src/poet.cpp | 51 ++++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/poet.cpp b/src/poet.cpp index 4cc308788..b40eeedac 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -271,6 +271,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) Rcpp::as(global_rt_setup->operator[]("checkpoint_interval")); params.mape_threshold = Rcpp::as>(global_rt_setup->operator[]("mape_threshold")); + params.rrmse_threshold = + Rcpp::as>(global_rt_setup->operator[]("rrmse_threshold")); } catch (const std::exception &e) { @@ -303,37 +305,40 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) bool triggerRollbackIfExceeded(ChemistryModule &chem, RuntimeParameters ¶ms, uint32_t ¤t_iteration) { - const std::vector &mape_values = chem.error_history.back().mape; + const auto &mape = chem.error_history.back().mape; + const auto &rrmse = chem.error_history.back().rrmse; + const auto &props = chem.getField().GetProps(); - for (uint32_t i = 0; i < params.mape_threshold.size(); i++) - { - // Skip if no meaningful MAPE value - if(mape_values[i] == 0){ - continue; - } - if (mape_values[i] > params.mape_threshold[i]) + for (uint32_t i = 0; i < params.mape_threshold.size(); ++i) { - uint32_t rollback_iteration = ((current_iteration - 1) / params.checkpoint_interval) * params.checkpoint_interval; + // Skip invalid entries + if ((mape[i] == 0 && rrmse[i] == 0)) + continue; - MSG("[THRESHOLD EXCEEDED] " + chem.getField().GetProps()[i] + " has MAPE = " + - std::to_string(mape_values[i]) + " exceeding threshold = " + std::to_string(params.mape_threshold[i]) + - " → rolling back to iteration " + std::to_string(rollback_iteration)); + bool mape_exceeded = mape[i] > params.mape_threshold[i]; + bool rrmse_exceeded = rrmse[i] > params.rrmse_threshold[i]; - Checkpoint_s checkpoint_read{.field = chem.getField()}; - read_checkpoint("checkpoint" + std::to_string(rollback_iteration) + ".hdf5", checkpoint_read); - current_iteration = checkpoint_read.iteration; + if (mape_exceeded || rrmse_exceeded) + { + uint32_t rollback_iter = ((current_iteration - 1) / params.checkpoint_interval) * params.checkpoint_interval; + std::string metric = mape_exceeded ? "MAPE" : "RRMSE"; + double value = mape_exceeded ? mape[i] : rrmse[i]; + double threshold = mape_exceeded ? params.mape_threshold[i] : params.rrmse_threshold[i]; - // Rollback happend - return true; + MSG("[THRESHOLD EXCEEDED] " + props[i] + " has " + metric + " = " + + std::to_string(value) + " exceeding threshold = " + std::to_string(threshold) + + " → rolling back to iteration " + std::to_string(rollback_iter)); + + Checkpoint_s checkpoint_read{.field = chem.getField()}; + read_checkpoint("checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); + current_iteration = checkpoint_read.iteration; + return true; // rollback happened + } } - } - - MSG("All species are within their error thresholds."); - return false; + MSG("All species are within their MAPE and RRMSE thresholds."); + return false; } - - static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, DiffusionModule &diffusion, ChemistryModule &chem) From 354ce2e1bbeedabd1a8a43c3f05d7ace20051914 Mon Sep 17 00:00:00 2001 From: rastogi Date: Fri, 17 Oct 2025 15:37:49 +0200 Subject: [PATCH 16/19] Modified IO functions --- src/IO/HDF5Functions.hpp | 4 ++-- src/IO/StatsIO.cpp | 12 ++++++++---- src/IO/StatsIO.hpp | 1 + src/IO/checkpoint.cpp | 22 ++++++++++++++++++---- src/poet.cpp | 6 +++--- 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/IO/HDF5Functions.hpp b/src/IO/HDF5Functions.hpp index 87687f2b9..2eec6d2b7 100644 --- a/src/IO/HDF5Functions.hpp +++ b/src/IO/HDF5Functions.hpp @@ -3,7 +3,7 @@ #include #include "Datatypes.hpp" -int write_checkpoint(const std::string &file_path, struct Checkpoint_s &&checkpoint); -int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint); +int write_checkpoint(const std::string &dir_path, const std::string &file_name, struct Checkpoint_s &&checkpoint); +int read_checkpoint(const std::string &dir_path, const std::string &file_name, struct Checkpoint_s &checkpoint); diff --git a/src/IO/StatsIO.cpp b/src/IO/StatsIO.cpp index 5155ffd1f..8e3c2978c 100644 --- a/src/IO/StatsIO.cpp +++ b/src/IO/StatsIO.cpp @@ -2,15 +2,19 @@ #include #include #include -#include // for std::setw and std::setprecision +#include +#include namespace poet { void writeStatsToCSV(const std::vector &all_stats, const std::vector &species_names, + const std::string &out_dir, const std::string &filename) { - std::ofstream out(filename); + std::filesystem::path full_path = std::filesystem::path(out_dir) / filename; + + std::ofstream out(full_path); if (!out.is_open()) { std::cerr << "Could not open " << filename << " !" << std::endl; @@ -24,7 +28,7 @@ namespace poet << std::setw(15) << "MAPE" << std::setw(15) << "RRSME" << "\n"; - out << std::string(75, '-') << "\n"; // separator line + out << std::string(75, '-') << "\n"; // data rows for (size_t i = 0; i < all_stats.size(); ++i) @@ -39,7 +43,7 @@ namespace poet << std::setw(15) << all_stats[i].rrmse[j] << "\n"; } - out << "\n"; // blank line between iterations + out << "\n"; } out.close(); diff --git a/src/IO/StatsIO.hpp b/src/IO/StatsIO.hpp index a7fd1c606..cb432f939 100644 --- a/src/IO/StatsIO.hpp +++ b/src/IO/StatsIO.hpp @@ -5,5 +5,6 @@ namespace poet { void writeStatsToCSV(const std::vector &all_stats, const std::vector &species_names, + const std::string &out_dir, const std::string &filename); } // namespace poet diff --git a/src/IO/checkpoint.cpp b/src/IO/checkpoint.cpp index f197c22f5..e053700a0 100644 --- a/src/IO/checkpoint.cpp +++ b/src/IO/checkpoint.cpp @@ -1,13 +1,20 @@ #include "IO/Datatypes.hpp" #include #include +#include -int write_checkpoint(const std::string &file_path, struct Checkpoint_s &&checkpoint){ +namespace fs = std::filesystem; +int write_checkpoint(const std::string &dir_path, const std::string &file_name, struct Checkpoint_s &&checkpoint){ + + if (!fs::exists(dir_path)) { + std::cerr << "Directory does not exist: " << dir_path << std::endl; + return -1; + } + fs::path file_path = fs::path(dir_path) / file_name; // TODO: errorhandling H5Easy::File file(file_path, H5Easy::File::Overwrite); - H5Easy::dump(file, "/MetaParam/Iterations", checkpoint.iteration); H5Easy::dump(file, "/Grid/Names", checkpoint.field.GetProps()); H5Easy::dump(file, "/Grid/Chemistry", checkpoint.field.As2DVector()); @@ -15,9 +22,16 @@ int write_checkpoint(const std::string &file_path, struct Checkpoint_s &&checkpo return 0; } -int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint){ +int read_checkpoint(const std::string &dir_path, const std::string &file_name, struct Checkpoint_s &checkpoint){ + + fs::path file_path = fs::path(dir_path) / file_name; - H5Easy::File file(file_path, H5Easy::File::ReadOnly); + if (!fs::exists(file_path)) { + std::cerr << "File does not exist: " << file_path << std::endl; + return -1; + } + + H5Easy::File file(file_path, H5Easy::File::ReadOnly); checkpoint.iteration = H5Easy::load(file, "/MetaParam/Iterations"); diff --git a/src/poet.cpp b/src/poet.cpp index b40eeedac..4b920aa02 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -330,7 +330,7 @@ bool triggerRollbackIfExceeded(ChemistryModule &chem, RuntimeParameters ¶ms, " → rolling back to iteration " + std::to_string(rollback_iter)); Checkpoint_s checkpoint_read{.field = chem.getField()}; - read_checkpoint("checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); + read_checkpoint(params.out_dir, "checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); current_iteration = checkpoint_read.iteration; return true; // rollback happened } @@ -490,13 +490,13 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, if(iter % params.checkpoint_interval == 0){ MSG("Writing checkpoint of iteration " + std::to_string(iter)); - write_checkpoint("checkpoint" + std::to_string(iter) + ".hdf5", + write_checkpoint(params.out_dir, "checkpoint" + std::to_string(iter) + ".hdf5", {.field = chem.getField(), .iteration = iter}); } if (params.control_interval_enabled && !params.rollback_enabled) { - writeStatsToCSV(chem.error_history, chem.getField().GetProps(), "stats_overview"); + writeStatsToCSV(chem.error_history, chem.getField().GetProps(), params.out_dir,"stats_overview"); if(triggerRollbackIfExceeded(chem, params, iter)){ params.rollback_enabled = true; From 71269166eac7874c5d9a7f373c29c4b2bfd51a72 Mon Sep 17 00:00:00 2001 From: rastogi Date: Sun, 19 Oct 2025 11:49:52 +0200 Subject: [PATCH 17/19] migrate: separate control logic from ChemistryModule into dedicated ControlModule --- src/CMakeLists.txt | 1 + src/Chemistry/ChemistryModule.hpp | 849 ++++++++++++++---------------- src/Chemistry/MasterFunctions.cpp | 128 ++--- src/Chemistry/WorkerFunctions.cpp | 2 +- src/Control/ControlModule.cpp | 131 +++++ src/Control/ControlModule.hpp | 110 ++++ src/IO/StatsIO.cpp | 2 +- src/IO/StatsIO.hpp | 4 +- src/poet.cpp | 227 +++----- src/poet.hpp.in | 3 +- 10 files changed, 771 insertions(+), 686 deletions(-) create mode 100644 src/Control/ControlModule.cpp create mode 100644 src/Control/ControlModule.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a9849a768..940848898 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,6 +33,7 @@ add_library(POETLib Chemistry/SurrogateModels/HashFunctions.cpp Chemistry/SurrogateModels/InterpolationModule.cpp Chemistry/SurrogateModels/ProximityHashTable.cpp + Control/ControlModule.cpp ) set(POET_TUG_APPROACH "Implicit" CACHE STRING "tug numerical approach to use") diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index c6f57bbec..73be52c60 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -4,17 +4,14 @@ #include "DataStructures/Field.hpp" #include "DataStructures/NamedVector.hpp" - #include "ChemistryDefs.hpp" - +#include "Control/ControlModule.hpp" #include "Init/InitialList.hpp" #include "NameDouble.h" #include "SurrogateModels/DHT_Wrapper.hpp" #include "SurrogateModels/Interpolation.hpp" - -#include "poet.hpp" - #include "PhreeqcRunner.hpp" + #include #include #include @@ -23,454 +20,412 @@ #include #include -namespace poet -{ +namespace poet { + class ControlModule; +/** + * \brief Wrapper around PhreeqcRM to provide POET specific parallelization with + * easy access. + */ +class ChemistryModule { +public: /** - * \brief Wrapper around PhreeqcRM to provide POET specific parallelization with - * easy access. + * Creates a new instance of Chemistry module with given grid cell count, work + * package size and communicator. + * + * This constructor shall only be called by the master. To create workers, see + * ChemistryModule::createWorker . + * + * When the use of parallelization is intended, the nxyz value shall be set to + * 1 to save memory and only one node is needed for initialization. + * + * \param nxyz Count of grid cells to allocate and initialize for each + * process. For parellel use set to 1 at the master. + * \param wp_size Count of grid cells to fill each work package at maximum. + * \param communicator MPI communicator to distribute work in. */ - class ChemistryModule - { - public: - /** - * Creates a new instance of Chemistry module with given grid cell count, work - * package size and communicator. - * - * This constructor shall only be called by the master. To create workers, see - * ChemistryModule::createWorker . - * - * When the use of parallelization is intended, the nxyz value shall be set to - * 1 to save memory and only one node is needed for initialization. - * - * \param nxyz Count of grid cells to allocate and initialize for each - * process. For parellel use set to 1 at the master. - * \param wp_size Count of grid cells to fill each work package at maximum. - * \param communicator MPI communicator to distribute work in. - */ - ChemistryModule(uint32_t wp_size, - const InitialList::ChemistryInit chem_params, - MPI_Comm communicator); - - /** - * Deconstructor, which frees DHT data structure if used. - */ - ~ChemistryModule(); - - void masterSetField(Field field); - /** - * Run the chemical simulation with parameters set. - */ - void simulate(double dt); - - /** - * Returns all known species names, including not only aqueous species, but - * also equilibrium, exchange, surface and kinetic reactants. - */ - // auto GetPropNames() const { return this->prop_names; } - - /** - * Return the accumulated runtime in seconds for chemical simulation. - */ - auto GetChemistryTime() const { return this->chem_t; } - - void setFilePadding(std::uint32_t maxiter) - { - this->file_pad = - static_cast(std::ceil(std::log10(maxiter + 1))); - } - - struct SurrogateSetup - { - std::vector prop_names; - std::array base_totals; - bool has_het_ids; - - bool dht_enabled; - std::uint32_t dht_size_mb; - int dht_snaps; - std::string dht_out_dir; - - bool interp_enabled; - std::uint32_t interp_bucket_size; - std::uint32_t interp_size_mb; - std::uint32_t interp_min_entries; - bool ai_surrogate_enabled; - }; - - void masterEnableSurrogates(const SurrogateSetup &setup) - { - // FIXME: This is a hack to get the prop_names and prop_count from the setup - this->prop_names = setup.prop_names; - this->prop_count = setup.prop_names.size(); - - this->dht_enabled = setup.dht_enabled; - this->interp_enabled = setup.interp_enabled; - this->ai_surrogate_enabled = setup.ai_surrogate_enabled; - - this->base_totals = setup.base_totals; - - if (this->dht_enabled || this->interp_enabled) - { - this->initializeDHT(setup.dht_size_mb, this->params.dht_species, - setup.has_het_ids); - - if (setup.dht_snaps != DHT_SNAPS_DISABLED) - { - this->setDHTSnapshots(setup.dht_snaps, setup.dht_out_dir); - } - } - - if (this->interp_enabled) - { - this->initializeInterp(setup.interp_bucket_size, setup.interp_size_mb, - setup.interp_min_entries, - this->params.interp_species); - } - } - - /** - * Intended to alias input parameters for grid initialization with a single - * value per species. - */ - using SingleCMap = std::unordered_map; - - /** - * Intended to alias input parameters for grid initialization with mutlitple - * values per species. - */ - using VectorCMap = std::unordered_map>; - - /** - * Enumerating DHT file options - */ - enum - { - DHT_SNAPS_DISABLED = 0, //!< disabled file output - DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation - DHT_SNAPS_ITEREND //!< output snapshots after each iteration - }; - - /** - * **Only called by workers!** Start the worker listening loop. - */ - void WorkerLoop(); - - /** - * **Called by master** Advise the workers to break the loop. - */ - void MasterLoopBreak(); - - /** - * **Master only** Return count of grid cells. - */ - auto GetNCells() const { return this->n_cells; } - /** - * **Master only** Return work package size. - */ - auto GetWPSize() const { return this->wp_size; } - /** - * **Master only** Return the time in seconds the master spent waiting for any - * free worker. - */ - auto GetMasterIdleTime() const { return this->idle_t; } - /** - * **Master only** Return the time in seconds the master spent in sequential - * part of the simulation, including times for shuffling/unshuffling field - * etc. - */ - auto GetMasterSequentialTime() const { return this->seq_t; } - /** - * **Master only** Return the time in seconds the master spent in the - * send/receive loop. - */ - auto GetMasterLoopTime() const { return this->send_recv_t; } - - - auto GetMasterCtrlLogicTime() const { return this->ctrl_t; } - - auto GetMasterCtrlBcastTime() const { return this->bcast_ctrl_t; } - - auto GetMasterRecvCtrlLogicTime() const { return this->recv_ctrl_t; } - - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to run Phreeqc simulation. - * - * \return Vector of all accumulated Phreeqc timings. - */ - std::vector GetWorkerPhreeqcTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to get values from the DHT. - * - * \return Vector of all accumulated DHT get times. - */ - std::vector GetWorkerDHTGetTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to write values to the DHT. - * - * \return Vector of all accumulated DHT fill times. - */ - std::vector GetWorkerDHTFillTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers waiting for work packages from the master. - * - * \return Vector of all accumulated waiting times. - */ - std::vector GetWorkerIdleTimings() const; - - std::vector GetWorkerControlTimings() const; - - /** - * **Master only** Collect and return DHT hits of all workers. - * - * \return Vector of all count of DHT hits. - */ - std::vector GetWorkerDHTHits() const; - - /** - * **Master only** Collect and return DHT evictions of all workers. - * - * \return Vector of all count of DHT evictions. - */ - std::vector GetWorkerDHTEvictions() const; - - /** - * **Master only** Returns the current state of the chemical field. - * - * \return Reference to the chemical field. - */ - Field &getField() { return this->chem_field; } - - /** - * **Master only** Enable/disable progress bar. - * - * \param enabled True if print progressbar, false if not. - */ - void setProgressBarPrintout(bool enabled) - { - this->print_progessbar = enabled; - }; - - /** - * **Master only** Set the ai surrogate validity vector from R - */ - void set_ai_surrogate_validity_vector(std::vector r_vector); - - std::vector GetWorkerInterpolationCalls() const; - - std::vector GetWorkerInterpolationWriteTimings() const; - std::vector GetWorkerInterpolationReadTimings() const; - std::vector GetWorkerInterpolationGatherTimings() const; - std::vector GetWorkerInterpolationFunctionCallTimings() const; - - std::vector GetWorkerPHTCacheHits() const; - - std::vector ai_surrogate_validity_vector; - - RuntimeParameters *runtime_params = nullptr; - - struct SimulationErrorStats - { - std::vector mape; - std::vector rrmse; - uint32_t iteration; // iterations in simulation after rollbacks - uint32_t rollback_count; - - SimulationErrorStats(size_t species_count, uint32_t iter, uint32_t counter) - : mape(species_count, 0.0), - rrmse(species_count, 0.0), - iteration(iter), - rollback_count(counter){} - }; - - std::vector error_history; - - static void computeSpeciesErrors(const std::vector &reference_values, - const std::vector &surrogate_values, - uint32_t size_per_prop, - uint32_t species_count, - SimulationErrorStats &species_error_stats); - - protected: - void initializeDHT(uint32_t size_mb, - const NamedVector &key_species, - bool has_het_ids); - void setDHTSnapshots(int type, const std::string &out_dir); - void setDHTReadFile(const std::string &input_file); - - void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, - std::uint32_t min_entries, - const NamedVector &key_species); - - enum - { - CHEM_FIELD_INIT, - CHEM_DHT_ENABLE, - CHEM_DHT_SIGNIF_VEC, - CHEM_DHT_SNAPS, - CHEM_DHT_READ_FILE, - CHEM_INTERP, - CHEM_IP_ENABLE, - CHEM_IP_MIN_ENTRIES, - CHEM_IP_SIGNIF_VEC, - CHEM_WORK_LOOP, - CHEM_PERF, - CHEM_BREAK_MAIN_LOOP, - CHEM_AI_BCAST_VALIDITY - }; - - enum - { - LOOP_WORK, - LOOP_END, - LOOP_CTRL - }; - - enum - { - WORKER_PHREEQC, - WORKER_CTRL_ITER, - WORKER_DHT_GET, - WORKER_DHT_FILL, - WORKER_IDLE, - WORKER_IP_WRITE, - WORKER_IP_READ, - WORKER_IP_GATHER, - WORKER_IP_FC, - WORKER_DHT_HITS, - WORKER_DHT_EVICTIONS, - WORKER_PHT_CACHE_HITS, - WORKER_IP_CALLS - }; - - std::vector interp_calls; - std::vector dht_hits; - std::vector dht_evictions; - - struct worker_s - { - double phreeqc_t = 0.; - double dht_get = 0.; - double dht_fill = 0.; - double idle_t = 0.; - double ctrl_t = 0.; - }; - - struct worker_info_s - { - char has_work = 0; - double *send_addr; - double *surrogate_addr; - }; - - using worker_list_t = std::vector; - using workpointer_t = std::vector::iterator; - - void MasterRunParallel(double dt); - void MasterRunSequential(); - - void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, workpointer_t &sur_pointer, - int &pkg_to_send, int &count_pkgs, int &free_workers, - double dt, uint32_t iteration, uint32_t control_iteration, - const std::vector &wp_sizes_vector); - void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, - int &free_workers); - - std::vector MasterGatherWorkerTimings(int type) const; - std::vector MasterGatherWorkerMetrics(int type) const; - - void WorkerProcessPkgs(struct worker_s &timings, uint32_t &iteration); - - void WorkerDoWork(MPI_Status &probe_status, int double_count, - struct worker_s &timings); - void WorkerPostIter(MPI_Status &prope_status, uint32_t iteration); - void WorkerPostSim(uint32_t iteration); - - void WorkerWriteDHTDump(uint32_t iteration); - void WorkerReadDHTDump(const std::string &dht_input_file); - - void WorkerPerfToMaster(int type, const struct worker_s &timings); - void WorkerMetricsToMaster(int type); - - void WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, - double dTimestep); - - std::vector CalculateWPSizesVector(uint32_t n_cells, - uint32_t wp_size) const; - std::vector shuffleField(const std::vector &in_field, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count); - void unshuffleField(const std::vector &in_buffer, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count, std::vector &out_field); - std::vector - parseDHTSpeciesVec(const NamedVector &key_species, - const std::vector &to_compare) const; - - void BCastStringVec(std::vector &io); - - int comm_size, comm_rank; - MPI_Comm group_comm; - - bool is_sequential; - bool is_master; - - uint32_t wp_size; - bool dht_enabled{false}; - int dht_snaps_type{DHT_SNAPS_DISABLED}; - std::string dht_file_out_dir; - - poet::DHT_Wrapper *dht = nullptr; - - bool dht_fill_during_rollback{false}; - bool interp_enabled{false}; - std::unique_ptr interp; - - bool ai_surrogate_enabled{false}; - - static constexpr uint32_t BUFFER_OFFSET = 6; - - inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const - { - MPI_Bcast(buf, count, datatype, 0, this->group_comm); - } - - inline void PropagateFunctionType(int &type) const - { - ChemBCast(&type, 1, MPI_INT); - } - double simtime = 0.; - double idle_t = 0.; - double seq_t = 0.; - double send_recv_t = 0.; - - double ctrl_t = 0.; - double bcast_ctrl_t = 0.; - double recv_ctrl_t = 0.; - - std::array base_totals{0}; - - bool print_progessbar{false}; - - std::uint8_t file_pad{1}; - - double chem_t{0.}; - - uint32_t n_cells = 0; - uint32_t prop_count = 0; + ChemistryModule(uint32_t wp_size, + const InitialList::ChemistryInit chem_params, + MPI_Comm communicator); + + /** + * Deconstructor, which frees DHT data structure if used. + */ + ~ChemistryModule(); + + void masterSetField(Field field); + /** + * Run the chemical simulation with parameters set. + */ + void simulate(double dt); + + /** + * Returns all known species names, including not only aqueous species, but + * also equilibrium, exchange, surface and kinetic reactants. + */ + // auto GetPropNames() const { return this->prop_names; } + + /** + * Return the accumulated runtime in seconds for chemical simulation. + */ + auto GetChemistryTime() const { return this->chem_t; } + + void setFilePadding(std::uint32_t maxiter) { + this->file_pad = + static_cast(std::ceil(std::log10(maxiter + 1))); + } + + struct SurrogateSetup { std::vector prop_names; + std::array base_totals; + bool has_het_ids; - Field chem_field; + bool dht_enabled; + std::uint32_t dht_size_mb; + int dht_snaps; + std::string dht_out_dir; - const InitialList::ChemistryInit params; - - std::unique_ptr pqc_runner; - - std::vector sur_shuffled; + bool interp_enabled; + std::uint32_t interp_bucket_size; + std::uint32_t interp_size_mb; + std::uint32_t interp_min_entries; + bool ai_surrogate_enabled; }; + + void masterEnableSurrogates(const SurrogateSetup &setup) { + // FIXME: This is a hack to get the prop_names and prop_count from the setup + this->prop_names = setup.prop_names; + this->prop_count = setup.prop_names.size(); + + this->dht_enabled = setup.dht_enabled; + this->interp_enabled = setup.interp_enabled; + this->ai_surrogate_enabled = setup.ai_surrogate_enabled; + + this->base_totals = setup.base_totals; + + if (this->dht_enabled || this->interp_enabled) { + this->initializeDHT(setup.dht_size_mb, this->params.dht_species, + setup.has_het_ids); + + if (setup.dht_snaps != DHT_SNAPS_DISABLED) { + this->setDHTSnapshots(setup.dht_snaps, setup.dht_out_dir); + } + } + + if (this->interp_enabled) { + this->initializeInterp(setup.interp_bucket_size, setup.interp_size_mb, + setup.interp_min_entries, + this->params.interp_species); + } + } + + /** + * Intended to alias input parameters for grid initialization with a single + * value per species. + */ + using SingleCMap = std::unordered_map; + + /** + * Intended to alias input parameters for grid initialization with mutlitple + * values per species. + */ + using VectorCMap = std::unordered_map>; + + /** + * Enumerating DHT file options + */ + enum { + DHT_SNAPS_DISABLED = 0, //!< disabled file output + DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation + DHT_SNAPS_ITEREND //!< output snapshots after each iteration + }; + + /** + * **Only called by workers!** Start the worker listening loop. + */ + void WorkerLoop(); + + /** + * **Called by master** Advise the workers to break the loop. + */ + void MasterLoopBreak(); + + /** + * **Master only** Return count of grid cells. + */ + auto GetNCells() const { return this->n_cells; } + /** + * **Master only** Return work package size. + */ + auto GetWPSize() const { return this->wp_size; } + /** + * **Master only** Return the time in seconds the master spent waiting for any + * free worker. + */ + auto GetMasterIdleTime() const { return this->idle_t; } + /** + * **Master only** Return the time in seconds the master spent in sequential + * part of the simulation, including times for shuffling/unshuffling field + * etc. + */ + auto GetMasterSequentialTime() const { return this->seq_t; } + /** + * **Master only** Return the time in seconds the master spent in the + * send/receive loop. + */ + auto GetMasterLoopTime() const { return this->send_recv_t; } + + auto GetMasterCtrlLogicTime() const { return this->ctrl_t; } + + auto GetMasterCtrlBcastTime() const { return this->bcast_ctrl_t; } + + auto GetMasterRecvCtrlLogicTime() const { return this->recv_ctrl_t; } + + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to run Phreeqc simulation. + * + * \return Vector of all accumulated Phreeqc timings. + */ + std::vector GetWorkerPhreeqcTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to get values from the DHT. + * + * \return Vector of all accumulated DHT get times. + */ + std::vector GetWorkerDHTGetTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to write values to the DHT. + * + * \return Vector of all accumulated DHT fill times. + */ + std::vector GetWorkerDHTFillTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers waiting for work packages from the master. + * + * \return Vector of all accumulated waiting times. + */ + std::vector GetWorkerIdleTimings() const; + + std::vector GetWorkerControlTimings() const; + + /** + * **Master only** Collect and return DHT hits of all workers. + * + * \return Vector of all count of DHT hits. + */ + std::vector GetWorkerDHTHits() const; + + /** + * **Master only** Collect and return DHT evictions of all workers. + * + * \return Vector of all count of DHT evictions. + */ + std::vector GetWorkerDHTEvictions() const; + + /** + * **Master only** Returns the current state of the chemical field. + * + * \return Reference to the chemical field. + */ + Field &getField() { return this->chem_field; } + + /** + * **Master only** Enable/disable progress bar. + * + * \param enabled True if print progressbar, false if not. + */ + void setProgressBarPrintout(bool enabled) { + this->print_progessbar = enabled; + }; + + /** + * **Master only** Set the ai surrogate validity vector from R + */ + void set_ai_surrogate_validity_vector(std::vector r_vector); + + std::vector GetWorkerInterpolationCalls() const; + + std::vector GetWorkerInterpolationWriteTimings() const; + std::vector GetWorkerInterpolationReadTimings() const; + std::vector GetWorkerInterpolationGatherTimings() const; + std::vector GetWorkerInterpolationFunctionCallTimings() const; + + std::vector GetWorkerPHTCacheHits() const; + + std::vector ai_surrogate_validity_vector; + +protected: + void initializeDHT(uint32_t size_mb, + const NamedVector &key_species, + bool has_het_ids); + void setDHTSnapshots(int type, const std::string &out_dir); + void setDHTReadFile(const std::string &input_file); + + void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, + std::uint32_t min_entries, + const NamedVector &key_species); + + enum { + CHEM_FIELD_INIT, + CHEM_DHT_ENABLE, + CHEM_DHT_SIGNIF_VEC, + CHEM_DHT_SNAPS, + CHEM_DHT_READ_FILE, + CHEM_IP, // Control Flag + CHEM_IP_ENABLE, + CHEM_IP_MIN_ENTRIES, + CHEM_IP_SIGNIF_VEC, + CHEM_WORK_LOOP, + CHEM_PERF, + CHEM_BREAK_MAIN_LOOP, + CHEM_AI_BCAST_VALIDITY + }; + + enum { LOOP_WORK, LOOP_END, LOOP_CTRL }; + + enum { + WORKER_PHREEQC, + WORKER_CTRL_ITER, + WORKER_DHT_GET, + WORKER_DHT_FILL, + WORKER_IDLE, + WORKER_IP_WRITE, + WORKER_IP_READ, + WORKER_IP_GATHER, + WORKER_IP_FC, + WORKER_DHT_HITS, + WORKER_DHT_EVICTIONS, + WORKER_PHT_CACHE_HITS, + WORKER_IP_CALLS + }; + + std::vector interp_calls; + std::vector dht_hits; + std::vector dht_evictions; + + struct worker_s { + double phreeqc_t = 0.; + double dht_get = 0.; + double dht_fill = 0.; + double idle_t = 0.; + double ctrl_t = 0.; + }; + + struct worker_info_s { + char has_work = 0; + double *send_addr; + double *surrogate_addr; + }; + + using worker_list_t = std::vector; + using workpointer_t = std::vector::iterator; + + void MasterRunParallel(double dt); + void MasterRunSequential(); + + void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, + workpointer_t &sur_pointer, int &pkg_to_send, + int &count_pkgs, int &free_workers, double dt, + uint32_t iteration, uint32_t control_iteration, + const std::vector &wp_sizes_vector); + void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, + int &free_workers); + + std::vector MasterGatherWorkerTimings(int type) const; + std::vector MasterGatherWorkerMetrics(int type) const; + + void WorkerProcessPkgs(struct worker_s &timings, uint32_t &iteration); + + void WorkerDoWork(MPI_Status &probe_status, int double_count, + struct worker_s &timings); + void WorkerPostIter(MPI_Status &prope_status, uint32_t iteration); + void WorkerPostSim(uint32_t iteration); + + void WorkerWriteDHTDump(uint32_t iteration); + void WorkerReadDHTDump(const std::string &dht_input_file); + + void WorkerPerfToMaster(int type, const struct worker_s &timings); + void WorkerMetricsToMaster(int type); + + void WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, + double dTimestep); + + std::vector CalculateWPSizesVector(uint32_t n_cells, + uint32_t wp_size) const; + std::vector shuffleField(const std::vector &in_field, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count); + void unshuffleField(const std::vector &in_buffer, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count, std::vector &out_field); + std::vector + parseDHTSpeciesVec(const NamedVector &key_species, + const std::vector &to_compare) const; + + void BCastStringVec(std::vector &io); + + int comm_size, comm_rank; + MPI_Comm group_comm; + + bool is_sequential; + bool is_master; + + uint32_t wp_size; + bool dht_enabled{false}; + int dht_snaps_type{DHT_SNAPS_DISABLED}; + std::string dht_file_out_dir; + + poet::DHT_Wrapper *dht = nullptr; + + bool dht_fill_during_rollback{false}; + bool interp_enabled{false}; + std::unique_ptr interp; + + bool ai_surrogate_enabled{false}; + + static constexpr uint32_t BUFFER_OFFSET = 6; + + inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { + MPI_Bcast(buf, count, datatype, 0, this->group_comm); + } + + inline void PropagateFunctionType(int &type) const { + ChemBCast(&type, 1, MPI_INT); + } + double simtime = 0.; + double idle_t = 0.; + double seq_t = 0.; + double send_recv_t = 0.; + + double ctrl_t = 0.; + double bcast_ctrl_t = 0.; + double recv_ctrl_t = 0.; + + std::array base_totals{0}; + + bool print_progessbar{false}; + + std::uint8_t file_pad{1}; + + double chem_t{0.}; + + uint32_t n_cells = 0; + uint32_t prop_count = 0; + std::vector prop_names; + + Field chem_field; + + const InitialList::ChemistryInit params; + + std::unique_ptr pqc_runner; + + std::unique_ptr ctrl_module; + + //std::vector sur_shuffled; +}; } // namespace poet #endif // CHEMISTRYMODULE_H_ diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index c2710bf8b..4c75fb3cd 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -166,39 +165,6 @@ std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { return ret; } -void poet::ChemistryModule::computeSpeciesErrors(const std::vector &reference_values, - const std::vector &surrogate_values, - uint32_t size_per_prop, - uint32_t species_count, - SimulationErrorStats &species_error_stats) { - for (uint32_t i = 0; i < species_count; ++i) { - double err_sum = 0.0; - double sqr_err_sum = 0.0; - uint32_t base_idx = i * size_per_prop; - - for (uint32_t j = 0; j < size_per_prop; ++j) { - const double ref_value = reference_values[base_idx + j]; - const double sur_value = surrogate_values[base_idx + j]; - - if (ref_value == 0.0) { - if (sur_value != 0.0) { - err_sum += 1.0; - sqr_err_sum += 1.0; - } - // Both zero: skip - } else { - double alpha = 1.0 - (sur_value / ref_value); - err_sum += std::abs(alpha); - sqr_err_sum += alpha * alpha; - } - } - - species_error_stats.mape[i] = 100.0 * (err_sum / size_per_prop); - species_error_stats.rrmse[i] = - (size_per_prop > 0) ? std::sqrt(sqr_err_sum / size_per_prop) : 0.0; - } -} - inline std::vector shuffleVector(const std::vector &in_vector, uint32_t size_per_prop, uint32_t wp_count) { @@ -269,8 +235,8 @@ inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { inline void poet::ChemistryModule::MasterSendPkgs( worker_list_t &w_list, workpointer_t &work_pointer, workpointer_t &sur_pointer, int &pkg_to_send, int &count_pkgs, - int &free_workers, double dt, uint32_t iteration, - uint32_t control_interval, const std::vector &wp_sizes_vector) { + int &free_workers, double dt, uint32_t iteration, uint32_t control_interval, + const std::vector &wp_sizes_vector) { /* declare variables */ int local_work_package_size; @@ -335,7 +301,7 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, int need_to_receive = 1; double idle_a, idle_b; int p, size; - double recv_a, recv_b; + double recv_a, recv_b; MPI_Status probe_status; // master_recv_a = MPI_Wtime(); @@ -461,28 +427,9 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { /* start time measurement of broadcasting interpolation status */ ctrl_bcast_a = MPI_Wtime(); - - ftype = CHEM_INTERP; + ftype = CHEM_IP; PropagateFunctionType(ftype); - - int interp_flag = 0; - int dht_fill_flag = 0; - - if(this->runtime_params->rollback_enabled){ - this->interp_enabled = false; - this->dht_fill_during_rollback = true; - interp_flag = 0; - dht_fill_flag = 1; - } - else { - this->interp_enabled = true; - this->dht_fill_during_rollback = false; - interp_flag = 1; - dht_fill_flag = 0; - } - ChemBCast(&interp_flag, 1, MPI_INT); - ChemBCast(&dht_fill_flag, 1, MPI_INT); - + ctrl_module->BCastControlFlags(); /* end time measurement of broadcasting interpolation status */ ctrl_bcast_b = MPI_Wtime(); this->bcast_ctrl_t += ctrl_bcast_b - ctrl_bcast_a; @@ -494,11 +441,12 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { static uint32_t iteration = 0; - uint32_t control_logic_enabled = this->runtime_params->control_interval_enabled ? 1 : 0; + uint32_t control_logic_enabled = + ctrl_module->control_interval_enabled ? 1 : 0; if (control_logic_enabled) { - sur_shuffled.clear(); - sur_shuffled.reserve(this->n_cells * this->prop_count); + ctrl_module->sur_shuffled.clear(); + ctrl_module->sur_shuffled.reserve(this->n_cells * this->prop_count); } /* start time measurement of sequential part */ @@ -511,14 +459,14 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { shuffleField(chem_field.AsVector(), this->n_cells, this->prop_count, wp_sizes_vector.size()); - this->sur_shuffled.resize(mpi_buffer.size()); + ctrl_module->sur_shuffled.resize(mpi_buffer.size()); /* setup local variables */ pkg_to_send = wp_sizes_vector.size(); pkg_to_recv = wp_sizes_vector.size(); workpointer_t work_pointer = mpi_buffer.begin(); - workpointer_t sur_pointer = sur_shuffled.begin(); + workpointer_t sur_pointer = ctrl_module->sur_shuffled.begin(); worker_list_t worker_list(this->comm_size - 1); free_workers = this->comm_size - 1; @@ -552,43 +500,37 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { // Just to complete the progressbar std::cout << std::endl; - /* stop time measurement of chemistry time needed for send/recv loop */ - worker_chemistry_b = MPI_Wtime(); - this->send_recv_t += worker_chemistry_b - worker_chemistry_a; + /* stop time measurement of chemistry time needed for send/recv loop */ + worker_chemistry_b = MPI_Wtime(); + this->send_recv_t += worker_chemistry_b - worker_chemistry_a; - /* start time measurement of sequential part */ - seq_c = MPI_Wtime(); + /* start time measurement of sequential part */ + seq_c = MPI_Wtime(); - /* unshuffle grid */ - // grid.importAndUnshuffle(mpi_buffer); - std::vector out_vec{mpi_buffer}; - unshuffleField(mpi_buffer, this->n_cells, this->prop_count, - wp_sizes_vector.size(), out_vec); - chem_field = out_vec; + /* unshuffle grid */ + // grid.importAndUnshuffle(mpi_buffer); + std::vector out_vec{mpi_buffer}; + unshuffleField(mpi_buffer, this->n_cells, this->prop_count, + wp_sizes_vector.size(), out_vec); + chem_field = out_vec; - /* do master stuff */ + /* do master stuff */ - /* start time measurement of control logic */ - ctrl_a = MPI_Wtime(); + /* start time measurement of control logic */ + ctrl_a = MPI_Wtime(); - if (control_logic_enabled && !this->runtime_params->rollback_enabled) { + if (control_logic_enabled && !ctrl_module->rollback_enabled) { + std::cout << "[Master] Control logic enabled for this iteration." << std::endl; + std::vector sur_unshuffled{ctrl_module->sur_shuffled}; + unshuffleField(ctrl_module->sur_shuffled, this->n_cells, this->prop_count, + wp_sizes_vector.size(), sur_unshuffled); - std::vector sur_unshuffled{sur_shuffled};; - - unshuffleField(sur_shuffled, this->n_cells, this->prop_count, - wp_sizes_vector.size(), sur_unshuffled); - - SimulationErrorStats stats(this->prop_count, this->runtime_params->global_iter, this->runtime_params->rollback_counter); - - computeSpeciesErrors(out_vec, sur_unshuffled, this->n_cells, this->prop_count, stats); - - error_history.push_back(stats); - } - - /* end time measurement of control logic */ - ctrl_b = MPI_Wtime(); - this->ctrl_t += ctrl_b - ctrl_a; + ctrl_module->computeSpeciesErrors(out_vec, sur_unshuffled, this->n_cells); + } + /* end time measurement of control logic */ + ctrl_b = MPI_Wtime(); + this->ctrl_t += ctrl_b - ctrl_a; /* start time measurement of master chemistry */ sim_e_chemistry = MPI_Wtime(); diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index 4406ec65d..8cf15fe92 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -67,7 +67,7 @@ namespace poet MPI_INT, 0, this->group_comm); break; } - case CHEM_INTERP: + case CHEM_IP: { int interp_flag = 0; int dht_fill_flag = 0; diff --git a/src/Control/ControlModule.cpp b/src/Control/ControlModule.cpp new file mode 100644 index 000000000..a5a71d577 --- /dev/null +++ b/src/Control/ControlModule.cpp @@ -0,0 +1,131 @@ +#include "ControlModule.hpp" +#include "IO/Datatypes.hpp" +#include "IO/HDF5Functions.hpp" +#include "IO/StatsIO.hpp" +#include + +bool poet::ControlModule::isControlIteration(uint32_t iter) { + control_interval_enabled = (iter % control_interval == 0); + if (control_interval_enabled) { + MSG("[Control] Control interval triggered at iteration " + + std::to_string(iter)); + } + return control_interval_enabled; +} + +void poet::ControlModule::beginIteration() { + if (rollback_enabled) { + if (sur_disabled_counter > 0) { + sur_disabled_counter--; + MSG("Rollback counter: " + std::to_string(sur_disabled_counter)); + } else { + rollback_enabled = false; + } + } +} + +void poet::ControlModule::endIteration(uint32_t iter) { + /* Writing a checkpointing */ + if (checkpoint_interval > 0 && iter % checkpoint_interval == 0) { + MSG("Writing checkpoint of iteration " + std::to_string(iter)); + write_checkpoint(out_dir, "checkpoint" + std::to_string(iter) + ".hdf5", + {.field = chem->getField(), .iteration = iter}); + } + + /* Control Logic*/ + if (control_interval_enabled && !rollback_enabled) { + writeStatsToCSV(error_history, species_names, out_dir, + "stats_overview"); + + if (triggerRollbackIfExceeded(*chem, *params, iter)) { + rollback_enabled = true; + rollback_counter++; + sur_disabled_counter = control_interval; + MSG("Interpolation disabled for the next " + + std::to_string(control_interval) + "."); + } + } +} + +void poet::ControlModule::BCastControlFlags() { + int interp_flag = rollback_enabled ? 0 : 1; + int dht_fill_flag = rollback_enabled ? 1 : 0; + chem->ChemBCast(&interp_flag, 1, MPI_INT); + chem->ChemBCast(&dht_fill_flag, 1, MPI_INT); +} + +bool poet::ControlModule::triggerRollbackIfExceeded(ChemistryModule &chem, + RuntimeParameters ¶ms, + uint32_t &iter) { + + if (error_history.empty()) { + MSG("No error history yet; skipping rollback check."); + return false; + } + + const auto &mape = chem.error_history.back().mape; + const auto &props = chem.getField().GetProps(); + + for (uint32_t i = 0; i < params.mape_threshold.size(); ++i) { + // Skip invalid entries + if (mape[i] == 0) { + continue; + } + bool mape_exceeded = mape[i] > params.mape_threshold[i]; + + if (mape_exceeded) { + uint32_t rollback_iter = ((iter - 1) / params.checkpoint_interval) * + params.checkpoint_interval; + + MSG("[THRESHOLD EXCEEDED] " + props[i] + + " has MAPE = " + std::to_string(mape[i]) + + " exceeding threshold = " + std::to_string(params.mape_threshold[i]) + + " → rolling back to iteration " + std::to_string(rollback_iter)); + + Checkpoint_s checkpoint_read{.field = chem.getField()}; + read_checkpoint(params.out_dir, + "checkpoint" + std::to_string(rollback_iter) + ".hdf5", + checkpoint_read); + iter = checkpoint_read.iteration; + return true; + } + } + MSG("All species are within their MAPE and RRMSE thresholds."); + return false; +} + +void poet::ControlModule::computeSpeciesErrors( + const std::vector &reference_values, + const std::vector &surrogate_values, uint32_t size_per_prop) { + + SimulationErrorStats species_error_stats(species_count, params->global_iter, + rollback_counter); + + for (uint32_t i = 0; i < species_count; ++i) { + double err_sum = 0.0; + double sqr_err_sum = 0.0; + uint32_t base_idx = i * size_per_prop; + + for (uint32_t j = 0; j < size_per_prop; ++j) { + const double ref_value = reference_values[base_idx + j]; + const double sur_value = surrogate_values[base_idx + j]; + + if (ref_value == 0.0) { + if (sur_value != 0.0) { + err_sum += 1.0; + sqr_err_sum += 1.0; + } + // Both zero: skip + } else { + double alpha = 1.0 - (sur_value / ref_value); + err_sum += std::abs(alpha); + sqr_err_sum += alpha * alpha; + } + } + + species_error_stats.mape[i] = 100.0 * (err_sum / size_per_prop); + species_error_stats.rrmse[i] = + (size_per_prop > 0) ? std::sqrt(sqr_err_sum / size_per_prop) : 0.0; + } + error_history.push_back(species_error_stats); +} diff --git a/src/Control/ControlModule.hpp b/src/Control/ControlModule.hpp new file mode 100644 index 000000000..6bd848d07 --- /dev/null +++ b/src/Control/ControlModule.hpp @@ -0,0 +1,110 @@ +#ifndef CONTROLMODULE_H_ +#define CONTROLMODULE_H_ + +#include "Base/Macros.hpp" +#include "Chemistry/ChemistryModule.hpp" +#include "poet.hpp" + +#include +#include +#include + +namespace poet { + +class ChemistryModule; + +class ControlModule { + +public: + ControlModule(RuntimeParameters *run_params, ChemistryModule *chem_module) + : params(run_params), chem(chem_module) {}; + + /* Control configuration*/ + std::vector species_names; + uint32_t species_count = 0; + std::string out_dir; + + bool rollback_enabled = false; + bool control_interval_enabled = false; + + std::uint32_t global_iter = 0; + std::uint32_t sur_disabled_counter = 0; + std::uint32_t rollback_counter = 0; + std::uint32_t checkpoint_interval = 0; + std::uint32_t control_interval = 0; + + std::vector mape_threshold; + std::vector rrmse_threshold; + + double ctrl_t = 0.; + double bcast_ctrl_t = 0.; + double recv_ctrl_t = 0.; + + /* Buffer for shuffled surrogate data */ + std::vector sur_shuffled; + + bool isControlIteration(uint32_t iter); + + void beginIteration(); + + void endIteration(uint32_t iter); + + void BCastControlFlags(); + + bool triggerRollbackIfExceeded(ChemistryModule &chem, + RuntimeParameters ¶ms, uint32_t &iter); + + struct SimulationErrorStats { + std::vector mape; + std::vector rrmse; + uint32_t iteration; // iterations in simulation after rollbacks + uint32_t rollback_count; + + SimulationErrorStats(size_t species_count, uint32_t iter, uint32_t counter) + : mape(species_count, 0.0), rrmse(species_count, 0.0), iteration(iter), + rollback_count(counter) {} + }; + + static void computeSpeciesErrors(const std::vector &reference_values, + const std::vector &surrogate_values, + uint32_t size_per_prop); + + std::vector error_history; + + struct ControlSetup { + std::string out_dir; + std::uint32_t checkpoint_interval; + std::uint32_t control_interval; + std::uint32_t species_count; + + std::vector species_names; + std::vector mape_threshold; + std::vector rrmse_threshold; + }; + + void enableControlLogic(const ControlSetup &setup) { + out_dir = setup.out_dir; + checkpoint_interval = setup.checkpoint_interval; + control_interval = setup.control_interval; + species_count = setup.species_count; + + species_names = setup.species_names; + mape_threshold = setup.mape_threshold; + rrmse_threshold = setup.rrmse_threshold; + } + + /* Profiling getters */ + auto GetMasterCtrlLogicTime() const { return this->ctrl_t; } + + auto GetMasterCtrlBcastTime() const { return this->bcast_ctrl_t; } + + auto GetMasterRecvCtrlLogicTime() const { return this->recv_ctrl_t; } + +private: + RuntimeParameters *params; + ChemistryModule *chem; +}; + +} // namespace poet + +#endif // CONTROLMODULE_H_ \ No newline at end of file diff --git a/src/IO/StatsIO.cpp b/src/IO/StatsIO.cpp index 8e3c2978c..0b82a191a 100644 --- a/src/IO/StatsIO.cpp +++ b/src/IO/StatsIO.cpp @@ -7,7 +7,7 @@ namespace poet { - void writeStatsToCSV(const std::vector &all_stats, + void writeStatsToCSV(const std::vector &all_stats, const std::vector &species_names, const std::string &out_dir, const std::string &filename) diff --git a/src/IO/StatsIO.hpp b/src/IO/StatsIO.hpp index cb432f939..e208d4bbb 100644 --- a/src/IO/StatsIO.hpp +++ b/src/IO/StatsIO.hpp @@ -1,9 +1,9 @@ #include -#include "Chemistry/ChemistryModule.hpp" +#include "Control/ControlModule.hpp" namespace poet { - void writeStatsToCSV(const std::vector &all_stats, + void writeStatsToCSV(const std::vector &all_stats, const std::vector &species_names, const std::string &out_dir, const std::string &filename); diff --git a/src/poet.cpp b/src/poet.cpp index 4b920aa02..48260f3c7 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -25,10 +25,8 @@ #include "Base/RInsidePOET.hpp" #include "CLI/CLI.hpp" #include "Chemistry/ChemistryModule.hpp" +#include "Control/ControlManager.hpp" #include "DataStructures/Field.hpp" -#include "IO/Datatypes.hpp" -#include "IO/HDF5Functions.hpp" -#include "IO/StatsIO.hpp" #include "Init/InitialList.hpp" #include "Transport/DiffusionModule.hpp" @@ -68,8 +66,7 @@ static poet::DEFunc ReadRObj_R; static poet::DEFunc SaveRObj_R; static poet::DEFunc source_R; -static void init_global_functions(RInside &R) -{ +static void init_global_functions(RInside &R) { R.parseEval(kin_r_library); master_init_R = DEFunc("master_init"); master_iteration_end_R = DEFunc("master_iteration_end"); @@ -92,15 +89,9 @@ static void init_global_functions(RInside &R) // R.parseEval("mysetup$state_C <- TMP"); // } -enum ParseRet -{ - PARSER_OK, - PARSER_ERROR, - PARSER_HELP -}; +enum ParseRet { PARSER_OK, PARSER_ERROR, PARSER_HELP }; -int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) -{ +int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { CLI::App app{"POET - Potsdam rEactive Transport simulator"}; @@ -182,12 +173,9 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) "Output directory of the simulation") ->required(); - try - { + try { app.parse(argc, argv); - } - catch (const CLI::ParseError &e) - { + } catch (const CLI::ParseError &e) { app.exit(e); return -1; } @@ -199,16 +187,14 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) if (params.as_qs) params.out_ext = "qs"; - if (MY_RANK == 0) - { + if (MY_RANK == 0) { // MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); MSG("Output format/extension is " + params.out_ext); MSG("Work Package Size: " + std::to_string(params.work_package_size)); MSG("DHT is " + BOOL_PRINT(params.use_dht)); MSG("AI Surrogate is " + BOOL_PRINT(params.use_ai_surrogate)); - if (params.use_dht) - { + if (params.use_dht) { // MSG("DHT strategy is " + std::to_string(simparams.dht_strategy)); // MDL: these should be outdated (?) // MSG("DHT key default digits (ignored if 'signif_vector' is " @@ -222,8 +208,7 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) // MSG("DHT load file is " + chem_params.dht_file); } - if (params.use_interp) - { + if (params.use_interp) { MSG("PHT interpolation enabled: " + BOOL_PRINT(params.use_interp)); MSG("PHT interp-size = " + std::to_string(params.interp_size)); MSG("PHT interp-min = " + std::to_string(params.interp_min_entries)); @@ -251,8 +236,7 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) // // log before rounding? // R["dht_log"] = simparams.dht_log; - try - { + try { Rcpp::List init_params_(ReadRObj_R(init_file)); params.init_params = init_params_; @@ -269,13 +253,11 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) Rcpp::as(global_rt_setup->operator[]("control_interval")); params.checkpoint_interval = Rcpp::as(global_rt_setup->operator[]("checkpoint_interval")); - params.mape_threshold = - Rcpp::as>(global_rt_setup->operator[]("mape_threshold")); - params.rrmse_threshold = - Rcpp::as>(global_rt_setup->operator[]("rrmse_threshold")); - } - catch (const std::exception &e) - { + params.mape_threshold = Rcpp::as>( + global_rt_setup->operator[]("mape_threshold")); + params.rrmse_threshold = Rcpp::as>( + global_rt_setup->operator[]("rrmse_threshold")); + } catch (const std::exception &e) { ERRMSG("Error while parsing R scripts: " + std::string(e.what())); return ParseRet::PARSER_ERROR; } @@ -285,8 +267,7 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) // HACK: this is a step back as the order and also the count of fields is // predefined, but it will change in the future -void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) -{ +void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) { R["TMP"] = Rcpp::wrap(trans.AsVector()); R["TMP_PROPS"] = Rcpp::wrap(trans.GetProps()); R.parseEval(std::string("state_T <- setNames(data.frame(matrix(TMP, nrow=" + @@ -303,53 +284,15 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) *global_rt_setup = R["setup"]; } -bool triggerRollbackIfExceeded(ChemistryModule &chem, RuntimeParameters ¶ms, uint32_t ¤t_iteration) -{ - const auto &mape = chem.error_history.back().mape; - const auto &rrmse = chem.error_history.back().rrmse; - const auto &props = chem.getField().GetProps(); - - for (uint32_t i = 0; i < params.mape_threshold.size(); ++i) - { - // Skip invalid entries - if ((mape[i] == 0 && rrmse[i] == 0)) - continue; - - bool mape_exceeded = mape[i] > params.mape_threshold[i]; - bool rrmse_exceeded = rrmse[i] > params.rrmse_threshold[i]; - - if (mape_exceeded || rrmse_exceeded) - { - uint32_t rollback_iter = ((current_iteration - 1) / params.checkpoint_interval) * params.checkpoint_interval; - std::string metric = mape_exceeded ? "MAPE" : "RRMSE"; - double value = mape_exceeded ? mape[i] : rrmse[i]; - double threshold = mape_exceeded ? params.mape_threshold[i] : params.rrmse_threshold[i]; - - MSG("[THRESHOLD EXCEEDED] " + props[i] + " has " + metric + " = " + - std::to_string(value) + " exceeding threshold = " + std::to_string(threshold) + - " → rolling back to iteration " + std::to_string(rollback_iter)); - - Checkpoint_s checkpoint_read{.field = chem.getField()}; - read_checkpoint(params.out_dir, "checkpoint" + std::to_string(rollback_iter) + ".hdf5", checkpoint_read); - current_iteration = checkpoint_read.iteration; - return true; // rollback happened - } - } - MSG("All species are within their MAPE and RRMSE thresholds."); - return false; -} - static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, DiffusionModule &diffusion, - ChemistryModule &chem) -{ + ChemistryModule &chem, ControlModule &control) { /* Iteration Count is dynamic, retrieving value from R (is only needed by * master for the following loop) */ uint32_t maxiter = params.timesteps.size(); - if (params.print_progress) - { + if (params.print_progress) { chem.setProgressBarPrintout(true); } R["TMP_PROPS"] = Rcpp::wrap(chem.getField().GetProps()); @@ -359,20 +302,24 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, double dSimTime{0}; double chkTime = 0.0; - for (uint32_t iter = 1; iter < maxiter + 1; iter++) - { + for (uint32_t iter = 1; iter < maxiter + 1; iter++) { // Rollback countdowm - if (params.rollback_enabled) { - if (params.sur_disabled_counter > 0) { + + /* + if (params.rollback_enabled) { + if (params.sur_disabled_counter > 0) { --params.sur_disabled_counter; MSG("Rollback counter: " + std::to_string(params.sur_disabled_counter)); - } else { + } else { params.rollback_enabled = false; } } + */ + control.beginIteration(iter); - params.global_iter = iter; - params.control_interval_enabled = (iter % params.control_interval == 0); + // params.global_iter = iter; + control.isControlIteration(iter); + // params.control_interval_enabled = (iter % params.control_interval == 0); double start_t = MPI_Wtime(); @@ -389,13 +336,12 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, /* run transport */ diffusion.simulate(dt); - chem.runtime_params = ¶ms; + // chem.runtime_params = ¶ms; chem.getField().update(diffusion.getField()); // MSG("Chemistry start"); - if (params.use_ai_surrogate) - { + if (params.use_ai_surrogate) { double ai_start_t = MPI_Wtime(); // Save current values from the tug field as predictor for the ai step R["TMP"] = Rcpp::wrap(chem.getField().AsVector()); @@ -446,8 +392,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.simulate(dt); /* AI surrogate iterative training*/ - if (params.use_ai_surrogate) - { + if (params.use_ai_surrogate) { double ai_start_t = MPI_Wtime(); R["TMP"] = Rcpp::wrap(chem.getField().AsVector()); @@ -487,25 +432,32 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, std::to_string(maxiter)); double chk_start = MPI_Wtime(); + control.endIteration(iter) + /* + if (iter % params.checkpoint_interval == 0) { + MSG("Writing checkpoint of iteration " + std::to_string(iter)); + write_checkpoint(params.out_dir, + "checkpoint" + std::to_string(iter) + ".hdf5", + {.field = chem.getField(), .iteration = iter}); + } - if(iter % params.checkpoint_interval == 0){ - MSG("Writing checkpoint of iteration " + std::to_string(iter)); - write_checkpoint(params.out_dir, "checkpoint" + std::to_string(iter) + ".hdf5", - {.field = chem.getField(), .iteration = iter}); - } - if (params.control_interval_enabled && !params.rollback_enabled) - { - writeStatsToCSV(chem.error_history, chem.getField().GetProps(), params.out_dir,"stats_overview"); + if (params.control_interval_enabled && !params.rollback_enabled) { + writeStatsToCSV(chem.error_history, chem.getField().GetProps(), + params.out_dir, "stats_overview"); - if(triggerRollbackIfExceeded(chem, params, iter)){ - params.rollback_enabled = true; - params.rollback_counter ++; - params.sur_disabled_counter = params.control_interval; - MSG("Interpolation disabled for the next " + std::to_string(params.control_interval) + "."); - } - } - double chk_end = MPI_Wtime(); + if (triggerRollbackIfExceeded(chem, params, iter)) { + params.rollback_enabled = true; + params.rollback_counter++; + params.sur_disabled_counter = params.control_interval; + MSG("Interpolation disabled for the next " + + std::to_string(params.control_interval) + "."); + } + } + + */ + + double chk_end = MPI_Wtime(); chkTime += chk_end - chk_start; // MSG(); @@ -529,10 +481,10 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, ctrl_profiling["ctrl_logic_master"] = chem.GetMasterCtrlLogicTime(); ctrl_profiling["bcast_ctrl_logic_master"] = chem.GetMasterCtrlBcastTime(); ctrl_profiling["recv_ctrl_logic_maser"] = chem.GetMasterRecvCtrlLogicTime(); - ctrl_profiling["ctrl_logic_worker"] = Rcpp::wrap(chem.GetWorkerControlTimings()); + ctrl_profiling["ctrl_logic_worker"] = + Rcpp::wrap(chem.GetWorkerControlTimings()); - if (params.use_dht) - { + if (params.use_dht) { chem_profiling["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); chem_profiling["dht_evictions"] = Rcpp::wrap(chem.GetWorkerDHTEvictions()); chem_profiling["dht_get_time"] = Rcpp::wrap(chem.GetWorkerDHTGetTimings()); @@ -540,8 +492,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, Rcpp::wrap(chem.GetWorkerDHTFillTimings()); } - if (params.use_interp) - { + if (params.use_interp) { chem_profiling["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); chem_profiling["interp_r"] = @@ -561,15 +512,13 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, profiling["diffusion"] = diffusion_profiling; profiling["ctrl_logic"] = ctrl_profiling; - chem.MasterLoopBreak(); return profiling; } std::vector getSpeciesNames(const Field &&field, int root, - MPI_Comm comm) -{ + MPI_Comm comm) { std::uint32_t n_elements; std::uint32_t n_string_size; @@ -579,13 +528,11 @@ std::vector getSpeciesNames(const Field &&field, int root, const bool is_master = root == rank; // first, the master sends all the species names iterative - if (is_master) - { + if (is_master) { n_elements = field.GetProps().size(); MPI_Bcast(&n_elements, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); - for (std::uint32_t i = 0; i < n_elements; i++) - { + for (std::uint32_t i = 0; i < n_elements; i++) { n_string_size = field.GetProps()[i].size(); MPI_Bcast(&n_string_size, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); MPI_Bcast(const_cast(field.GetProps()[i].c_str()), n_string_size, @@ -600,8 +547,7 @@ std::vector getSpeciesNames(const Field &&field, int root, std::vector species_names_out(n_elements); - for (std::uint32_t i = 0; i < n_elements; i++) - { + for (std::uint32_t i = 0; i < n_elements; i++) { MPI_Bcast(&n_string_size, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); char recv_buf[n_string_size]; @@ -614,8 +560,7 @@ std::vector getSpeciesNames(const Field &&field, int root, return species_names_out; } -std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) -{ +std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) { std::array base_totals; int rank; @@ -623,8 +568,7 @@ std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) const bool is_master = root == rank; - if (is_master) - { + if (is_master) { const auto h_col = field["H"]; const auto o_col = field["O"]; @@ -639,8 +583,7 @@ std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) return base_totals; } -bool getHasID(Field &&field, int root, MPI_Comm comm) -{ +bool getHasID(Field &&field, int root, MPI_Comm comm) { bool has_id; int rank; @@ -648,8 +591,7 @@ bool getHasID(Field &&field, int root, MPI_Comm comm) const bool is_master = root == rank; - if (is_master) - { + if (is_master) { const auto ID_field = field["ID"]; std::set unique_IDs(ID_field.begin(), ID_field.end()); @@ -666,8 +608,7 @@ bool getHasID(Field &&field, int root, MPI_Comm comm) return has_id; } -int main(int argc, char *argv[]) -{ +int main(int argc, char *argv[]) { int world_size; MPI_Init(&argc, &argv); @@ -678,8 +619,7 @@ int main(int argc, char *argv[]) RInsidePOET &R = RInsidePOET::getInstance(); - if (MY_RANK == 0) - { + if (MY_RANK == 0) { MSG("Running POET version " + std::string(poet_version)); } @@ -687,8 +627,7 @@ int main(int argc, char *argv[]) RuntimeParameters run_params; - if (parseInitValues(argc, argv, run_params) != 0) - { + if (parseInitValues(argc, argv, run_params) != 0) { MPI_Finalize(); return 0; } @@ -713,6 +652,7 @@ int main(int argc, char *argv[]) ChemistryModule chemistry(run_params.work_package_size, init_list.getChemistryInit(), MPI_COMM_WORLD); + ControlModule control(&run_params, &chemistry); const ChemistryModule::SurrogateSetup surr_setup = { getSpeciesNames(init_list.getInitialGrid(), 0, MPI_COMM_WORLD), @@ -730,12 +670,21 @@ int main(int argc, char *argv[]) chemistry.masterEnableSurrogates(surr_setup); - if (MY_RANK > 0) - { + const ControlModule::ControlSetup ctrl_setup = { + run_params.out_dir, // added + run_params.checkpoint_interval, + run_params.control_interval, + run_params.species_count, + run_params.species_names, + run_params.mape_threshold, + run_params.rrmse_threshold}; + + control.enableControlLogic(ctrl_setup); + + + if (MY_RANK > 0) { chemistry.WorkerLoop(); - } - else - { + } else { // R.parseEvalQ("mysetup <- setup"); // // if (MY_RANK == 0) { // get timestep vector from // // grid_init function ... // @@ -749,8 +698,7 @@ int main(int argc, char *argv[]) R["out_ext"] = run_params.out_ext; R["out_dir"] = run_params.out_dir; - if (run_params.use_ai_surrogate) - { + if (run_params.use_ai_surrogate) { /* Incorporate ai surrogate from R */ R.parseEvalQ(ai_surrogate_r_library); /* Use dht species for model input and output */ @@ -799,8 +747,7 @@ int main(int argc, char *argv[]) MPI_Finalize(); - if (MY_RANK == 0) - { + if (MY_RANK == 0) { MSG("done, bye!"); } diff --git a/src/poet.hpp.in b/src/poet.hpp.in index 6f9f0fabf..aea51966e 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -38,8 +38,7 @@ static const inline std::string ai_surrogate_r_library = R"(@R_AI_SURROGATE_LIB@)"; static const inline std::string r_runtime_parameters = "mysetup"; -struct RuntimeParameters -{ +struct RuntimeParameters { std::string out_dir; std::vector timesteps; From dc940b2f886ed8dedcca8121286ae30897ad555d Mon Sep 17 00:00:00 2001 From: rastogi Date: Thu, 23 Oct 2025 23:16:58 +0200 Subject: [PATCH 18/19] Wp data not being shuffled correctly. --- src/Chemistry/ChemistryModule.hpp | 33 +- src/Chemistry/MasterFunctions.cpp | 137 ++--- src/Chemistry/WorkerFunctions.cpp | 981 ++++++++++++++---------------- src/Control/ControlModule.cpp | 53 +- src/Control/ControlModule.hpp | 96 +-- src/poet.cpp | 34 +- src/poet.hpp.in | 6 - 7 files changed, 632 insertions(+), 708 deletions(-) diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index 73be52c60..8a96084c5 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -2,15 +2,16 @@ #ifndef CHEMISTRYMODULE_H_ #define CHEMISTRYMODULE_H_ +#include "ChemistryDefs.hpp" #include "DataStructures/Field.hpp" #include "DataStructures/NamedVector.hpp" #include "ChemistryDefs.hpp" #include "Control/ControlModule.hpp" #include "Init/InitialList.hpp" #include "NameDouble.h" +#include "PhreeqcRunner.hpp" #include "SurrogateModels/DHT_Wrapper.hpp" #include "SurrogateModels/Interpolation.hpp" -#include "PhreeqcRunner.hpp" #include #include @@ -174,12 +175,6 @@ public: */ auto GetMasterLoopTime() const { return this->send_recv_t; } - auto GetMasterCtrlLogicTime() const { return this->ctrl_t; } - - auto GetMasterCtrlBcastTime() const { return this->bcast_ctrl_t; } - - auto GetMasterRecvCtrlLogicTime() const { return this->recv_ctrl_t; } - /** * **Master only** Collect and return all accumulated timings recorded by * workers to run Phreeqc simulation. @@ -257,6 +252,8 @@ public: std::vector ai_surrogate_validity_vector; + void setControlModule(poet::ControlModule *ctrl) { control_module = ctrl; } + protected: void initializeDHT(uint32_t size_mb, const NamedVector &key_species, @@ -274,7 +271,8 @@ protected: CHEM_DHT_SIGNIF_VEC, CHEM_DHT_SNAPS, CHEM_DHT_READ_FILE, - CHEM_IP, // Control Flag + //CHEM_IP, // Control flag + CHEM_CTRL, // Control flag CHEM_IP_ENABLE, CHEM_IP_MIN_ENTRIES, CHEM_IP_SIGNIF_VEC, @@ -329,7 +327,7 @@ protected: void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, workpointer_t &sur_pointer, int &pkg_to_send, int &count_pkgs, int &free_workers, double dt, - uint32_t iteration, uint32_t control_iteration, + uint32_t iteration, const std::vector &wp_sizes_vector); void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, int &free_workers); @@ -367,6 +365,10 @@ protected: void BCastStringVec(std::vector &io); + int packResultsIntoBuffer(std::vector &mpi_buffer, int base_count, + const WorkPackage &wp, + const WorkPackage &wp_control); + int comm_size, comm_rank; MPI_Comm group_comm; @@ -380,13 +382,12 @@ protected: poet::DHT_Wrapper *dht = nullptr; - bool dht_fill_during_rollback{false}; bool interp_enabled{false}; std::unique_ptr interp; bool ai_surrogate_enabled{false}; - static constexpr uint32_t BUFFER_OFFSET = 6; + static constexpr uint32_t BUFFER_OFFSET = 5; inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { MPI_Bcast(buf, count, datatype, 0, this->group_comm); @@ -400,10 +401,6 @@ protected: double seq_t = 0.; double send_recv_t = 0.; - double ctrl_t = 0.; - double bcast_ctrl_t = 0.; - double recv_ctrl_t = 0.; - std::array base_totals{0}; bool print_progessbar{false}; @@ -422,9 +419,11 @@ protected: std::unique_ptr pqc_runner; - std::unique_ptr ctrl_module; + poet::ControlModule *control_module = nullptr; - //std::vector sur_shuffled; + bool control_enabled{false}; + + // std::vector sur_shuffled; }; } // namespace poet diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 4c75fb3cd..0bed2310f 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -235,7 +235,7 @@ inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { inline void poet::ChemistryModule::MasterSendPkgs( worker_list_t &w_list, workpointer_t &work_pointer, workpointer_t &sur_pointer, int &pkg_to_send, int &count_pkgs, - int &free_workers, double dt, uint32_t iteration, uint32_t control_interval, + int &free_workers, double dt, uint32_t iteration, const std::vector &wp_sizes_vector) { /* declare variables */ int local_work_package_size; @@ -276,14 +276,23 @@ inline void poet::ChemistryModule::MasterSendPkgs( std::accumulate(wp_sizes_vector.begin(), std::next(wp_sizes_vector.begin(), count_pkgs), 0); send_buffer[end_of_wp + 4] = wp_start_index; - // whether this iteration is a control iteration - send_buffer[end_of_wp + 5] = control_interval; /* ATTENTION Worker p has rank p+1 */ // MPI_Send(send_buffer, end_of_wp + BUFFER_OFFSET, MPI_DOUBLE, p + 1, // LOOP_WORK, this->group_comm); MPI_Send(send_buffer.data(), send_buffer.size(), MPI_DOUBLE, p + 1, LOOP_WORK, this->group_comm); + + /* ---- DEBUG LOG (Sender side) ---- */ + std::cout << "[DEBUG][rank=" << p+1 + << "] sending WP " << (count_pkgs - 1) + << " to worker rank " << (p + 1) + << " | len=" << send_buffer.size() + << " | start index=" << wp_start_index + << " | second element=" << send_buffer[1] + << " | pkg size=" << local_work_package_size + << std::endl; + /* -------------------------------- */ /* Mark that worker has work to do */ w_list[p].has_work = 1; @@ -301,8 +310,9 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, int need_to_receive = 1; double idle_a, idle_b; int p, size; - double recv_a, recv_b; + std::vector recv_buffer; + recv_buffer.reserve(wp_size * prop_count * 2); MPI_Status probe_status; // master_recv_a = MPI_Wtime(); /* start to loop as long there are packages to recv and the need to receive @@ -320,41 +330,48 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, idle_b = MPI_Wtime(); this->idle_t += idle_b - idle_a; } - + if (!need_to_receive) { + continue; + } /* if need_to_receive was set to true above, so there is a message to * receive */ - if (need_to_receive) { - p = probe_status.MPI_SOURCE; - if (probe_status.MPI_TAG == LOOP_WORK) { - MPI_Get_count(&probe_status, MPI_DOUBLE, &size); - MPI_Recv(w_list[p - 1].send_addr, size, MPI_DOUBLE, p, LOOP_WORK, - this->group_comm, MPI_STATUS_IGNORE); - w_list[p - 1].has_work = 0; - pkg_to_recv -= 1; - free_workers++; - } - if (probe_status.MPI_TAG == LOOP_CTRL) { - recv_a = MPI_Wtime(); - MPI_Get_count(&probe_status, MPI_DOUBLE, &size); + p = probe_status.MPI_SOURCE; + bool handled = false; - // layout of buffer is [phreeqc][surrogate] - std::vector recv_buffer(size); + switch (probe_status.MPI_TAG) { + case LOOP_WORK: { + MPI_Get_count(&probe_status, MPI_DOUBLE, &size); + MPI_Recv(w_list[p - 1].send_addr, size, MPI_DOUBLE, p, LOOP_WORK, + this->group_comm, MPI_STATUS_IGNORE); + handled = true; + break; + } + case LOOP_CTRL: { + /* layout of buffer is [phreeqc][surrogate] */ + MPI_Get_count(&probe_status, MPI_DOUBLE, &size); + recv_buffer.resize(size); + MPI_Recv(recv_buffer.data(), size, MPI_DOUBLE, p, LOOP_CTRL, + this->group_comm, MPI_STATUS_IGNORE); - MPI_Recv(recv_buffer.data(), size, MPI_DOUBLE, p, LOOP_CTRL, - this->group_comm, MPI_STATUS_IGNORE); + int half = size / 2; + std::copy(recv_buffer.begin(), recv_buffer.begin() + half, + w_list[p - 1].send_addr); - std::copy(recv_buffer.begin(), recv_buffer.begin() + (size / 2), - w_list[p - 1].send_addr); + std::copy(recv_buffer.begin() + (size / 2), recv_buffer.begin() + size, + w_list[p - 1].surrogate_addr); - std::copy(recv_buffer.begin() + (size / 2), recv_buffer.begin() + size, - w_list[p - 1].surrogate_addr); - - w_list[p - 1].has_work = 0; - pkg_to_recv -= 1; - free_workers++; - recv_b = MPI_Wtime(); - this->recv_ctrl_t += recv_b - recv_a; - } + handled = true; + break; + } + default: { + throw std::runtime_error("Master received unknown MPI tag: " + + std::to_string(probe_status.MPI_TAG)); + } + } + if (handled) { + w_list[p - 1].has_work = 0; + pkg_to_recv -= 1; + free_workers++; } } } @@ -408,10 +425,6 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { int i_pkgs; int ftype; - double ctrl_a, ctrl_b; - double worker_ctrl_a, worker_ctrl_b; - double ctrl_bcast_a, ctrl_bcast_b; - const std::vector wp_sizes_vector = CalculateWPSizesVector(this->n_cells, this->wp_size); @@ -425,15 +438,18 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { MPI_INT); } - /* start time measurement of broadcasting interpolation status */ - ctrl_bcast_a = MPI_Wtime(); + uint32_t control_flag = control_module->GetControlIntervalEnabled(); + if (control_flag) { + ftype = CHEM_CTRL; + PropagateFunctionType(ftype); + ChemBCast(&control_flag, 1, MPI_INT); + } + + /* ftype = CHEM_IP; PropagateFunctionType(ftype); ctrl_module->BCastControlFlags(); - /* end time measurement of broadcasting interpolation status */ - ctrl_bcast_b = MPI_Wtime(); - this->bcast_ctrl_t += ctrl_bcast_b - ctrl_bcast_a; - +*/ ftype = CHEM_WORK_LOOP; PropagateFunctionType(ftype); @@ -441,32 +457,23 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { static uint32_t iteration = 0; - uint32_t control_logic_enabled = - ctrl_module->control_interval_enabled ? 1 : 0; - - if (control_logic_enabled) { - ctrl_module->sur_shuffled.clear(); - ctrl_module->sur_shuffled.reserve(this->n_cells * this->prop_count); - } - /* start time measurement of sequential part */ seq_a = MPI_Wtime(); /* shuffle grid */ // grid.shuffleAndExport(mpi_buffer); - std::vector mpi_buffer = shuffleField(chem_field.AsVector(), this->n_cells, this->prop_count, wp_sizes_vector.size()); - ctrl_module->sur_shuffled.resize(mpi_buffer.size()); + std::vector mpi_surr_buffer{mpi_buffer}; /* setup local variables */ pkg_to_send = wp_sizes_vector.size(); pkg_to_recv = wp_sizes_vector.size(); workpointer_t work_pointer = mpi_buffer.begin(); - workpointer_t sur_pointer = ctrl_module->sur_shuffled.begin(); + workpointer_t sur_pointer = mpi_surr_buffer.begin(); worker_list_t worker_list(this->comm_size - 1); free_workers = this->comm_size - 1; @@ -490,8 +497,7 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { if (pkg_to_send > 0) { // send packages to all free workers ... MasterSendPkgs(worker_list, work_pointer, sur_pointer, pkg_to_send, - i_pkgs, free_workers, dt, iteration, control_logic_enabled, - wp_sizes_vector); + i_pkgs, free_workers, dt, iteration, wp_sizes_vector); } // ... and try to receive them from workers who has finished their work MasterRecvPkgs(worker_list, pkg_to_recv, pkg_to_send > 0, free_workers); @@ -516,22 +522,17 @@ void poet::ChemistryModule::MasterRunParallel(double dt) { /* do master stuff */ - /* start time measurement of control logic */ - ctrl_a = MPI_Wtime(); - - if (control_logic_enabled && !ctrl_module->rollback_enabled) { - std::cout << "[Master] Control logic enabled for this iteration." << std::endl; - std::vector sur_unshuffled{ctrl_module->sur_shuffled}; - unshuffleField(ctrl_module->sur_shuffled, this->n_cells, this->prop_count, + if (control_flag) { + std::cout << "[Master] Control logic enabled for this iteration." + << std::endl; + std::vector sur_unshuffled{mpi_surr_buffer}; + unshuffleField(mpi_surr_buffer, this->n_cells, this->prop_count, wp_sizes_vector.size(), sur_unshuffled); - ctrl_module->computeSpeciesErrors(out_vec, sur_unshuffled, this->n_cells); + control_module->computeSpeciesErrors(out_vec, sur_unshuffled, + this->n_cells); } - /* end time measurement of control logic */ - ctrl_b = MPI_Wtime(); - this->ctrl_t += ctrl_b - ctrl_a; - /* start time measurement of master chemistry */ sim_e_chemistry = MPI_Wtime(); diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index 8cf15fe92..6b9b73ceb 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -9,574 +9,487 @@ #include #include #include -#include #include +#include #include #include #include -namespace poet -{ +namespace poet { - inline std::string get_string(int root, MPI_Comm communicator) - { - int count; - MPI_Bcast(&count, 1, MPI_INT, root, communicator); +inline std::string get_string(int root, MPI_Comm communicator) { + int count; + MPI_Bcast(&count, 1, MPI_INT, root, communicator); - char *buffer = new char[count + 1]; - MPI_Bcast(buffer, count, MPI_CHAR, root, communicator); + char *buffer = new char[count + 1]; + MPI_Bcast(buffer, count, MPI_CHAR, root, communicator); - buffer[count] = '\0'; + buffer[count] = '\0'; - std::string ret_str(buffer); - delete[] buffer; + std::string ret_str(buffer); + delete[] buffer; - return ret_str; + return ret_str; +} + +void poet::ChemistryModule::WorkerLoop() { + struct worker_s timings; + + // HACK: defining the worker iteration count here, which will increment after + // each CHEM_ITER_END message + uint32_t iteration = 1; + bool loop = true; + while (loop) { + int func_type; + PropagateFunctionType(func_type); + + switch (func_type) { + case CHEM_FIELD_INIT: { + ChemBCast(&this->prop_count, 1, MPI_UINT32_T); + if (this->ai_surrogate_enabled) { + this->ai_surrogate_validity_vector.resize( + this->n_cells); // resize statt reserve? + } + break; + } + case CHEM_AI_BCAST_VALIDITY: { + // Receive the index vector of valid ai surrogate predictions + MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells, + MPI_INT, 0, this->group_comm); + break; + } + case CHEM_CTRL: { + int control_flag = 0; + ChemBCast(&control_flag, 1, MPI_INT); + this->control_enabled = (control_flag == 1); + break; + } + case CHEM_WORK_LOOP: { + WorkerProcessPkgs(timings, iteration); + break; + } + case CHEM_PERF: { + int type; + ChemBCast(&type, 1, MPI_INT); + if (type < WORKER_DHT_HITS) { + WorkerPerfToMaster(type, timings); + break; + } + WorkerMetricsToMaster(type); + break; + } + case CHEM_BREAK_MAIN_LOOP: { + WorkerPostSim(iteration); + loop = false; + break; + } + default: { + throw std::runtime_error("Worker received unknown tag from master."); + } + } + } +} + +void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, + uint32_t &iteration) { + MPI_Status probe_status; + bool loop = true; + + MPI_Barrier(this->group_comm); + + while (loop) { + double idle_a = MPI_Wtime(); + MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status); + double idle_b = MPI_Wtime(); + + switch (probe_status.MPI_TAG) { + case LOOP_WORK: { + timings.idle_t += idle_b - idle_a; + int count; + MPI_Get_count(&probe_status, MPI_DOUBLE, &count); + + WorkerDoWork(probe_status, count, timings); + break; + } + case LOOP_END: { + WorkerPostIter(probe_status, iteration); + iteration++; + loop = false; + break; + } + } + } +} + +void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, + int double_count, + struct worker_s &timings) { + static int counter = 1; + + double dht_get_start, dht_get_end; + double phreeqc_time_start, phreeqc_time_end; + double dht_fill_start, dht_fill_end; + + uint32_t iteration; + double dt; + double current_sim_time; + uint32_t wp_start_index; + int count = double_count; + std::vector mpi_buffer(count); + + /* receive */ + MPI_Recv(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, + MPI_STATUS_IGNORE); + + /* decrement count of work_package by BUFFER_OFFSET */ + count -= BUFFER_OFFSET; + + /* check for changes on all additional variables given by the 'header' of + * mpi_buffer */ + + // work_package_size + poet::WorkPackage s_curr_wp(mpi_buffer[count]); + + // current iteration of simulation + iteration = mpi_buffer[count + 1]; + + // current timestep size + dt = mpi_buffer[count + 2]; + + // current simulation time ('age' of simulation) + current_sim_time = mpi_buffer[count + 3]; + + // current work package start location in field + wp_start_index = mpi_buffer[count + 4]; + + std::cout << "[DEBUG][rank=" << this->comm_rank << "] WP " << counter + << " len=" << count << " | second element: " << mpi_buffer[1] + << " | iteration=" << iteration << " | dt=" << dt + << " | simtime=" << current_sim_time + << " | start_index=" << wp_start_index << std::endl; + + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { + s_curr_wp.input[wp_i] = + std::vector(mpi_buffer.begin() + this->prop_count * wp_i, + mpi_buffer.begin() + this->prop_count * (wp_i + 1)); } - void poet::ChemistryModule::WorkerLoop() - { - struct worker_s timings; - - // HACK: defining the worker iteration count here, which will increment after - // each CHEM_ITER_END message - uint32_t iteration = 1; - bool loop = true; - - while (loop) - { - int func_type; - PropagateFunctionType(func_type); - - switch (func_type) - { - case CHEM_FIELD_INIT: - { - ChemBCast(&this->prop_count, 1, MPI_UINT32_T); - if (this->ai_surrogate_enabled) - { - this->ai_surrogate_validity_vector.resize( - this->n_cells); // resize statt reserve? - } - break; - } - case CHEM_AI_BCAST_VALIDITY: - { - // Receive the index vector of valid ai surrogate predictions - MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells, - MPI_INT, 0, this->group_comm); - break; - } - case CHEM_IP: - { - int interp_flag = 0; - int dht_fill_flag = 0; - - ChemBCast(&interp_flag, 1, MPI_INT); - ChemBCast(&dht_fill_flag, 1, MPI_INT); - - this->interp_enabled = (interp_flag == 1); - this->dht_fill_during_rollback = (dht_fill_flag == 1); - break; - } - case CHEM_WORK_LOOP: - { - WorkerProcessPkgs(timings, iteration); - break; - } - case CHEM_PERF: - { - int type; - ChemBCast(&type, 1, MPI_INT); - if (type < WORKER_DHT_HITS) - { - WorkerPerfToMaster(type, timings); - break; - } - WorkerMetricsToMaster(type); - break; - } - case CHEM_BREAK_MAIN_LOOP: - { - WorkerPostSim(iteration); - loop = false; - break; - } - default: - { - throw std::runtime_error("Worker received unknown tag from master."); - } - } - } - } - - void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, - uint32_t &iteration) - { - MPI_Status probe_status; - bool loop = true; - - MPI_Barrier(this->group_comm); - - while (loop) - { - double idle_a = MPI_Wtime(); - MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status); - double idle_b = MPI_Wtime(); - - switch (probe_status.MPI_TAG) - { - case LOOP_WORK: - { - timings.idle_t += idle_b - idle_a; - int count; - MPI_Get_count(&probe_status, MPI_DOUBLE, &count); - - WorkerDoWork(probe_status, count, timings); - break; - } - case LOOP_END: - { - WorkerPostIter(probe_status, iteration); - iteration++; - loop = false; - break; - } - } - } - } - - void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, - int double_count, - struct worker_s &timings) - { - static int counter = 1; - - double dht_get_start, dht_get_end; - double phreeqc_time_start, phreeqc_time_end; - double dht_fill_start, dht_fill_end; - double ctrl_time_c, ctrl_time_d; - - uint32_t iteration; - double dt; - double current_sim_time; - uint32_t wp_start_index; - int count = double_count; - bool control_logic_enabled = false; - std::vector mpi_buffer(count); - - /* receive */ - MPI_Recv(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, - MPI_STATUS_IGNORE); - - /* decrement count of work_package by BUFFER_OFFSET */ - count -= BUFFER_OFFSET; - /* check for changes on all additional variables given by the 'header' of - * mpi_buffer */ - - // work_package_size - poet::WorkPackage s_curr_wp(mpi_buffer[count]); - - // current iteration of simulation - iteration = mpi_buffer[count + 1]; - - // current timestep size - dt = mpi_buffer[count + 2]; - - // current simulation time ('age' of simulation) - current_sim_time = mpi_buffer[count + 3]; - - // current work package start location in field - wp_start_index = mpi_buffer[count + 4]; - - control_logic_enabled = (mpi_buffer[count + 5] == 1); - - for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) - { - s_curr_wp.input[wp_i] = - std::vector(mpi_buffer.begin() + this->prop_count * wp_i, - mpi_buffer.begin() + this->prop_count * (wp_i + 1)); - } - - // std::cout << this->comm_rank << ":" << counter++ << std::endl; - if (dht_enabled || interp_enabled) - { - dht->prepareKeys(s_curr_wp.input, dt); - } - - if (dht_enabled) - { - /* check for values in DHT */ - dht_get_start = MPI_Wtime(); - dht->checkDHT(s_curr_wp); - dht_get_end = MPI_Wtime(); - timings.dht_get += dht_get_end - dht_get_start; - } - - if (interp_enabled) - { - interp->tryInterpolation(s_curr_wp); - } - - if (this->ai_surrogate_enabled) - { - // Map valid predictions from the ai surrogate in the workpackage - for (int i = 0; i < s_curr_wp.size; i++) - { - if (this->ai_surrogate_validity_vector[wp_start_index + i] == 1) - { - s_curr_wp.mapping[i] = CHEM_AISURR; - } - } - } - - /* if control iteration: create copy surrogate results (output and mappings) and then set them to zero, - give this to phreeqc */ - - poet::WorkPackage s_curr_wp_control = s_curr_wp; - - if (control_logic_enabled) - { - for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) - { - s_curr_wp_control.output[wp_i] = std::vector(this->prop_count, 0.0); - s_curr_wp_control.mapping[wp_i] = 0; - } - } - - phreeqc_time_start = MPI_Wtime(); - - WorkerRunWorkPackage(control_logic_enabled ? s_curr_wp_control : s_curr_wp, current_sim_time, dt); - - phreeqc_time_end = MPI_Wtime(); - - if (control_logic_enabled) - { - /* start time measurement for copying control workpackage */ - ctrl_time_c = MPI_Wtime(); - - std::size_t sur_wp_offset = s_curr_wp.size * this->prop_count; - - mpi_buffer.resize(count + sur_wp_offset); - - for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) - { - std::copy(s_curr_wp_control.output[wp_i].begin(), s_curr_wp_control.output[wp_i].end(), - mpi_buffer.begin() + this->prop_count * wp_i); - } - - // s_curr_wp only contains the interpolated data - // copy surrogate output after the the pqc output, mpi_buffer[pqc][interp] - - for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) - { - if (s_curr_wp.mapping[wp_i] != CHEM_PQC) // only copy if surrogate was used - { - std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), - mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i); - } else - { - // if pqc was used, copy pqc results again - std::copy(s_curr_wp_control.output[wp_i].begin(), s_curr_wp_control.output[wp_i].end(), - mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i); - } - - } - - count += sur_wp_offset; - - /* end time measurement for copying control workpackage */ - ctrl_time_d = MPI_Wtime(); - timings.ctrl_t += ctrl_time_d - ctrl_time_c; - } - else - { - for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) - { - std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), - mpi_buffer.begin() + this->prop_count * wp_i); - } - } - - /* send results to master */ - MPI_Request send_req; - - int mpi_tag = control_logic_enabled ? LOOP_CTRL : LOOP_WORK; - MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD, &send_req); - - if (dht_enabled || interp_enabled || dht_fill_during_rollback) - { - /* write results to DHT */ - dht_fill_start = MPI_Wtime(); - dht->fillDHT(control_logic_enabled ? s_curr_wp_control : s_curr_wp); - dht_fill_end = MPI_Wtime(); - - if (interp_enabled) - { - interp->writePairs(); - } - timings.dht_fill += dht_fill_end - dht_fill_start; - } - - timings.phreeqc_t += phreeqc_time_end - phreeqc_time_start; - MPI_Wait(&send_req, MPI_STATUS_IGNORE); + // std::cout << this->comm_rank << ":" << counter++ << std::endl; + if (dht_enabled || interp_enabled) { + dht->prepareKeys(s_curr_wp.input, dt); } - void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, - uint32_t iteration) - { - MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, - MPI_STATUS_IGNORE); - - if (this->dht_enabled) - { - dht_hits.push_back(dht->getHits()); - dht_evictions.push_back(dht->getEvictions()); - dht->resetCounter(); - - if (this->dht_snaps_type == DHT_SNAPS_ITEREND) - { - WorkerWriteDHTDump(iteration); - } - } - - if (this->interp_enabled) - { - std::stringstream out; - interp_calls.push_back(interp->getInterpolationCount()); - interp->resetCounter(); - interp->writePHTStats(); - if (this->dht_snaps_type == DHT_SNAPS_ITEREND) - { - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".pht"; - interp->dumpPHTState(out.str()); - } - - const auto max_mean_idx = - DHT_get_used_idx_factor(this->interp->getDHTObject(), 1); - - if (max_mean_idx >= 2) - { - DHT_flush(this->interp->getDHTObject()); - DHT_flush(this->dht->getDHT()); - if (this->comm_rank == 2) - { - std::cout << "Flushed both DHT and PHT!\n\n"; - } - } - } - - RInsidePOET::getInstance().parseEvalQ("gc()"); + if (dht_enabled) { + /* check for values in DHT */ + dht_get_start = MPI_Wtime(); + dht->checkDHT(s_curr_wp); + dht_get_end = MPI_Wtime(); + timings.dht_get += dht_get_end - dht_get_start; } - void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) - { - if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) - { + if (interp_enabled) { + interp->tryInterpolation(s_curr_wp); + } + + if (this->ai_surrogate_enabled) { + // Map valid predictions from the ai surrogate in the workpackage + for (int i = 0; i < s_curr_wp.size; i++) { + if (this->ai_surrogate_validity_vector[wp_start_index + i] == 1) { + s_curr_wp.mapping[i] = CHEM_AISURR; + } + } + } + + /* if control iteration: create copy surrogate results (output and mappings) + and then set them to zero, give this to phreeqc */ + + poet::WorkPackage s_curr_wp_control = s_curr_wp; + + if (control_enabled) { + for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) { + s_curr_wp_control.output[wp_i] = std::vector(prop_count, 0.0); + s_curr_wp_control.mapping[wp_i] = 0; + } + } + + phreeqc_time_start = MPI_Wtime(); + + WorkerRunWorkPackage(control_enabled ? s_curr_wp_control : s_curr_wp, + current_sim_time, dt); + + phreeqc_time_end = MPI_Wtime(); + + count = + packResultsIntoBuffer(mpi_buffer, count, s_curr_wp, s_curr_wp_control); + + /* send results to master */ + MPI_Request send_req; + + int mpi_tag = control_enabled ? LOOP_CTRL : LOOP_WORK; + MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD, + &send_req); + + if (dht_enabled || interp_enabled) { + /* write results to DHT */ + dht_fill_start = MPI_Wtime(); + dht->fillDHT(control_enabled ? s_curr_wp_control : s_curr_wp); + dht_fill_end = MPI_Wtime(); + + if (interp_enabled) { + interp->writePairs(); + } + timings.dht_fill += dht_fill_end - dht_fill_start; + } + + timings.phreeqc_t += phreeqc_time_end - phreeqc_time_start; + MPI_Wait(&send_req, MPI_STATUS_IGNORE); +} + +int poet::ChemistryModule::packResultsIntoBuffer( + std::vector &mpi_buffer, int base_count, const WorkPackage &wp, + const WorkPackage &wp_control) { + if (control_enabled) { + std::size_t wp_offset = wp_control.size * prop_count; + mpi_buffer.resize(base_count + wp_offset); + + /* copy pqc outputs first */ + for (std::size_t wp_i = 0; wp_i < wp_control.size; wp_i++) { + std::copy(wp_control.output[wp_i].begin(), wp_control.output[wp_i].end(), + mpi_buffer.begin() + prop_count * wp_i); + } + + /* copy surrogate output, only if it contains interpolated data, after the + * the pqc output, layout = mpi_buffer[pqc][interp] */ + for (std::size_t wp_i = 0; wp_i < wp.size; wp_i++) { + const auto &wp_copy = wp.mapping[wp_i] != CHEM_PQC + ? wp.output[wp_i] + : wp_control.output[wp_i]; + + std::copy(wp_copy.begin(), wp_copy.end(), + mpi_buffer.begin() + wp_offset + prop_count * wp_i); + } + return base_count + static_cast(wp_offset); + + } else { + for (std::size_t wp_i = 0; wp_i < wp.size; wp_i++) { + std::copy(wp.output[wp_i].begin(), wp.output[wp_i].end(), + mpi_buffer.begin() + prop_count + wp_i); + } + return base_count; + } +} + +void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, + uint32_t iteration) { + MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, + MPI_STATUS_IGNORE); + + if (this->dht_enabled) { + dht_hits.push_back(dht->getHits()); + dht_evictions.push_back(dht->getEvictions()); + dht->resetCounter(); + + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); } - if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) - { - std::stringstream out; + } + + if (this->interp_enabled) { + std::stringstream out; + interp_calls.push_back(interp->getInterpolationCount()); + interp->resetCounter(); + interp->writePHTStats(); + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { out << this->dht_file_out_dir << "/iter_" << std::setfill('0') << std::setw(this->file_pad) << iteration << ".pht"; interp->dumpPHTState(out.str()); } + + const auto max_mean_idx = + DHT_get_used_idx_factor(this->interp->getDHTObject(), 1); + + if (max_mean_idx >= 2) { + DHT_flush(this->interp->getDHTObject()); + DHT_flush(this->dht->getDHT()); + if (this->comm_rank == 2) { + std::cout << "Flushed both DHT and PHT!\n\n"; + } + } } - void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) - { + RInsidePOET::getInstance().parseEvalQ("gc()"); +} + +void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { + if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { + WorkerWriteDHTDump(iteration); + } + if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { std::stringstream out; out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".dht"; - int res = dht->tableToFile(out.str().c_str()); - if (res != DHT_SUCCESS && this->comm_rank == 2) - std::cerr - << "CPP: Worker: Error in writing current state of DHT to file.\n"; - else if (this->comm_rank == 2) - std::cout << "CPP: Worker: Successfully written DHT to file " << out.str() - << "\n"; + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } +} + +void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".dht"; + int res = dht->tableToFile(out.str().c_str()); + if (res != DHT_SUCCESS && this->comm_rank == 2) + std::cerr + << "CPP: Worker: Error in writing current state of DHT to file.\n"; + else if (this->comm_rank == 2) + std::cout << "CPP: Worker: Successfully written DHT to file " << out.str() + << "\n"; +} + +void poet::ChemistryModule::WorkerReadDHTDump( + const std::string &dht_input_file) { + int res = dht->fileToTable((char *)dht_input_file.c_str()); + if (res != DHT_SUCCESS) { + if (res == DHT_WRONG_FILE) { + if (this->comm_rank == 1) + std::cerr + << "CPP: Worker: Wrong file layout! Continue with empty DHT ...\n"; + } else { + if (this->comm_rank == 1) + std::cerr << "CPP: Worker: Error in loading current state of DHT from " + "file. Continue with empty DHT ...\n"; + } + } else { + if (this->comm_rank == 2) + std::cout << "CPP: Worker: Successfully loaded state of DHT from file " + << dht_input_file << "\n"; + } +} + +void poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package, + double dSimTime, + double dTimestep) { + + std::vector> inout_chem = work_package.input; + std::vector to_ignore; + + for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) { + if (work_package.mapping[wp_id] != CHEM_PQC) { + to_ignore.push_back(wp_id); + } + + // HACK: remove the first element (cell_id) before sending to phreeqc + inout_chem[wp_id].erase(inout_chem[wp_id].begin(), + inout_chem[wp_id].begin() + 1); } - void poet::ChemistryModule::WorkerReadDHTDump( - const std::string &dht_input_file) - { - int res = dht->fileToTable((char *)dht_input_file.c_str()); - if (res != DHT_SUCCESS) - { - if (res == DHT_WRONG_FILE) - { - if (this->comm_rank == 1) - std::cerr - << "CPP: Worker: Wrong file layout! Continue with empty DHT ...\n"; - } - else - { - if (this->comm_rank == 1) - std::cerr << "CPP: Worker: Error in loading current state of DHT from " - "file. Continue with empty DHT ...\n"; - } - } - else - { - if (this->comm_rank == 2) - std::cout << "CPP: Worker: Successfully loaded state of DHT from file " - << dht_input_file << "\n"; + this->pqc_runner->run(inout_chem, dTimestep, to_ignore); + + for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) { + if (work_package.mapping[wp_id] == CHEM_PQC) { + // HACK: as we removed the first element (cell_id) before sending to + // phreeqc, copy back with an offset of 1 + work_package.output[wp_id] = work_package.input[wp_id]; + std::copy(inout_chem[wp_id].begin(), inout_chem[wp_id].end(), + work_package.output[wp_id].begin() + 1); } } +} - void poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package, - double dSimTime, - double dTimestep) - { - - std::vector> inout_chem = work_package.input; - std::vector to_ignore; - - for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) - { - if (work_package.mapping[wp_id] != CHEM_PQC) - { - to_ignore.push_back(wp_id); - } - - // HACK: remove the first element (cell_id) before sending to phreeqc - inout_chem[wp_id].erase( - inout_chem[wp_id].begin(), inout_chem[wp_id].begin() + 1); +void poet::ChemistryModule::WorkerPerfToMaster(int type, + const struct worker_s &timings) { + switch (type) { + case WORKER_PHREEQC: { + MPI_Gather(&timings.phreeqc_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; } - - this->pqc_runner->run(inout_chem, dTimestep, to_ignore); - - for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) - { - if (work_package.mapping[wp_id] == CHEM_PQC) - { - // HACK: as we removed the first element (cell_id) before sending to phreeqc, - // copy back with an offset of 1 - work_package.output[wp_id] = work_package.input[wp_id]; - std::copy(inout_chem[wp_id].begin(), inout_chem[wp_id].end(), - work_package.output[wp_id].begin() + 1); - } - } + case WORKER_CTRL_ITER: { + MPI_Gather(&timings.ctrl_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; } - - void poet::ChemistryModule::WorkerPerfToMaster(int type, - const struct worker_s &timings) - { - switch (type) - { - case WORKER_PHREEQC: - { - MPI_Gather(&timings.phreeqc_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_CTRL_ITER: - { - MPI_Gather(&timings.ctrl_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_DHT_GET: - { - MPI_Gather(&timings.dht_get, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_DHT_FILL: - { - MPI_Gather(&timings.dht_fill, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_IDLE: - { - MPI_Gather(&timings.idle_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, - this->group_comm); - break; - } - case WORKER_IP_WRITE: - { - double val = interp->getPHTWriteTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - case WORKER_IP_READ: - { - double val = interp->getPHTReadTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - case WORKER_IP_GATHER: - { - double val = interp->getDHTGatherTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - case WORKER_IP_FC: - { - double val = interp->getInterpolationTime(); - MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); - break; - } - default: - { - throw std::runtime_error("Unknown perf type in master's message."); - } - } + case WORKER_DHT_GET: { + MPI_Gather(&timings.dht_get, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; } - - void poet::ChemistryModule::WorkerMetricsToMaster(int type) - { - MPI_Comm worker_comm = dht->getCommunicator(); - int worker_rank; - MPI_Comm_rank(worker_comm, &worker_rank); - - MPI_Comm &group_comm = this->group_comm; - - auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm]( - std::vector &send_buffer, int tag) - { - std::vector to_master(send_buffer.size()); - MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(), - MPI_UINT32_T, MPI_SUM, 0, worker_comm); - - if (worker_rank == 0) - { - MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag, - group_comm); - } - }; - - switch (type) - { - case WORKER_DHT_HITS: - { - reduce_and_send(dht_hits, WORKER_DHT_HITS); - break; - } - case WORKER_DHT_EVICTIONS: - { - reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS); - break; - } - case WORKER_IP_CALLS: - { - reduce_and_send(interp_calls, WORKER_IP_CALLS); - return; - } - case WORKER_PHT_CACHE_HITS: - { - std::vector input = this->interp->getPHTLocalCacheHits(); - reduce_and_send(input, WORKER_PHT_CACHE_HITS); - return; - } - default: - { - throw std::runtime_error("Unknown perf type in master's message."); - } - } + case WORKER_DHT_FILL: { + MPI_Gather(&timings.dht_fill, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; } + case WORKER_IDLE: { + MPI_Gather(&timings.idle_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, + this->group_comm); + break; + } + case WORKER_IP_WRITE: { + double val = interp->getPHTWriteTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_READ: { + double val = interp->getPHTReadTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_GATHER: { + double val = interp->getDHTGatherTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_FC: { + double val = interp->getInterpolationTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + default: { + throw std::runtime_error("Unknown perf type in master's message."); + } + } +} + +void poet::ChemistryModule::WorkerMetricsToMaster(int type) { + MPI_Comm worker_comm = dht->getCommunicator(); + int worker_rank; + MPI_Comm_rank(worker_comm, &worker_rank); + + MPI_Comm &group_comm = this->group_comm; + + auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm]( + std::vector &send_buffer, int tag) { + std::vector to_master(send_buffer.size()); + MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(), + MPI_UINT32_T, MPI_SUM, 0, worker_comm); + + if (worker_rank == 0) { + MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag, + group_comm); + } + }; + + switch (type) { + case WORKER_DHT_HITS: { + reduce_and_send(dht_hits, WORKER_DHT_HITS); + break; + } + case WORKER_DHT_EVICTIONS: { + reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS); + break; + } + case WORKER_IP_CALLS: { + reduce_and_send(interp_calls, WORKER_IP_CALLS); + return; + } + case WORKER_PHT_CACHE_HITS: { + std::vector input = this->interp->getPHTLocalCacheHits(); + reduce_and_send(input, WORKER_PHT_CACHE_HITS); + return; + } + default: { + throw std::runtime_error("Unknown perf type in master's message."); + } + } +} } // namespace poet diff --git a/src/Control/ControlModule.cpp b/src/Control/ControlModule.cpp index a5a71d577..66b3a419c 100644 --- a/src/Control/ControlModule.cpp +++ b/src/Control/ControlModule.cpp @@ -4,15 +4,23 @@ #include "IO/StatsIO.hpp" #include -bool poet::ControlModule::isControlIteration(uint32_t iter) { +void poet::ControlModule::updateControlIteration(const uint32_t iter) { + + global_iteration = iter; + + if (control_interval == 0) { + control_interval_enabled = false; + return; + } + control_interval_enabled = (iter % control_interval == 0); if (control_interval_enabled) { - MSG("[Control] Control interval triggered at iteration " + + MSG("[Control] Control interval enabled at iteration " + std::to_string(iter)); } - return control_interval_enabled; } +/* void poet::ControlModule::beginIteration() { if (rollback_enabled) { if (sur_disabled_counter > 0) { @@ -23,19 +31,23 @@ void poet::ControlModule::beginIteration() { } } } +*/ -void poet::ControlModule::endIteration(uint32_t iter) { +void poet::ControlModule::endIteration(const uint32_t iter) { + + if (!control_interval_enabled) { + return; + } /* Writing a checkpointing */ - if (checkpoint_interval > 0 && iter % checkpoint_interval == 0) { + /* Control Logic*/ + if (control_interval_enabled && + checkpoint_interval > 0 /*&& !rollback_enabled*/) { MSG("Writing checkpoint of iteration " + std::to_string(iter)); write_checkpoint(out_dir, "checkpoint" + std::to_string(iter) + ".hdf5", {.field = chem->getField(), .iteration = iter}); - } + writeStatsToCSV(error_history, species_names, out_dir, "stats_overview"); - /* Control Logic*/ - if (control_interval_enabled && !rollback_enabled) { - writeStatsToCSV(error_history, species_names, out_dir, - "stats_overview"); + /* if (triggerRollbackIfExceeded(*chem, *params, iter)) { rollback_enabled = true; @@ -44,9 +56,12 @@ void poet::ControlModule::endIteration(uint32_t iter) { MSG("Interpolation disabled for the next " + std::to_string(control_interval) + "."); } + + */ } } +/* void poet::ControlModule::BCastControlFlags() { int interp_flag = rollback_enabled ? 0 : 1; int dht_fill_flag = rollback_enabled ? 1 : 0; @@ -54,6 +69,9 @@ void poet::ControlModule::BCastControlFlags() { chem->ChemBCast(&dht_fill_flag, 1, MPI_INT); } +*/ + +/* bool poet::ControlModule::triggerRollbackIfExceeded(ChemistryModule &chem, RuntimeParameters ¶ms, uint32_t &iter) { @@ -91,17 +109,20 @@ bool poet::ControlModule::triggerRollbackIfExceeded(ChemistryModule &chem, } } MSG("All species are within their MAPE and RRMSE thresholds."); - return false; -} + return + false; +} +*/ void poet::ControlModule::computeSpeciesErrors( const std::vector &reference_values, - const std::vector &surrogate_values, uint32_t size_per_prop) { + const std::vector &surrogate_values, const uint32_t size_per_prop) { - SimulationErrorStats species_error_stats(species_count, params->global_iter, - rollback_counter); + SimulationErrorStats species_error_stats(this->species_names.size(), + global_iteration, + /*rollback_counter*/ 0); - for (uint32_t i = 0; i < species_count; ++i) { + for (uint32_t i = 0; i < this->species_names.size(); ++i) { double err_sum = 0.0; double sqr_err_sum = 0.0; uint32_t base_idx = i * size_per_prop; diff --git a/src/Control/ControlModule.hpp b/src/Control/ControlModule.hpp index 6bd848d07..14620c2e4 100644 --- a/src/Control/ControlModule.hpp +++ b/src/Control/ControlModule.hpp @@ -16,43 +16,24 @@ class ChemistryModule; class ControlModule { public: - ControlModule(RuntimeParameters *run_params, ChemistryModule *chem_module) - : params(run_params), chem(chem_module) {}; - /* Control configuration*/ - std::vector species_names; - uint32_t species_count = 0; - std::string out_dir; - bool rollback_enabled = false; - bool control_interval_enabled = false; + // std::uint32_t global_iter = 0; + // std::uint32_t sur_disabled_counter = 0; + // std::uint32_t rollback_counter = 0; - std::uint32_t global_iter = 0; - std::uint32_t sur_disabled_counter = 0; - std::uint32_t rollback_counter = 0; - std::uint32_t checkpoint_interval = 0; - std::uint32_t control_interval = 0; + void updateControlIteration(const uint32_t iter); - std::vector mape_threshold; - std::vector rrmse_threshold; + auto GetGlobalIteration() const noexcept { return global_iteration; } - double ctrl_t = 0.; - double bcast_ctrl_t = 0.; - double recv_ctrl_t = 0.; + // void beginIteration(); - /* Buffer for shuffled surrogate data */ - std::vector sur_shuffled; + void endIteration(const uint32_t iter); - bool isControlIteration(uint32_t iter); + // void BCastControlFlags(); - void beginIteration(); - - void endIteration(uint32_t iter); - - void BCastControlFlags(); - - bool triggerRollbackIfExceeded(ChemistryModule &chem, - RuntimeParameters ¶ms, uint32_t &iter); + //bool triggerRollbackIfExceeded(ChemistryModule &chem, + // RuntimeParameters ¶ms, uint32_t &iter); struct SimulationErrorStats { std::vector mape; @@ -60,14 +41,14 @@ public: uint32_t iteration; // iterations in simulation after rollbacks uint32_t rollback_count; - SimulationErrorStats(size_t species_count, uint32_t iter, uint32_t counter) + SimulationErrorStats(uint32_t species_count, uint32_t iter, uint32_t counter) : mape(species_count, 0.0), rrmse(species_count, 0.0), iteration(iter), rollback_count(counter) {} }; - static void computeSpeciesErrors(const std::vector &reference_values, + void computeSpeciesErrors(const std::vector &reference_values, const std::vector &surrogate_values, - uint32_t size_per_prop); + const uint32_t size_per_prop); std::vector error_history; @@ -75,34 +56,53 @@ public: std::string out_dir; std::uint32_t checkpoint_interval; std::uint32_t control_interval; - std::uint32_t species_count; - std::vector species_names; std::vector mape_threshold; - std::vector rrmse_threshold; }; void enableControlLogic(const ControlSetup &setup) { - out_dir = setup.out_dir; - checkpoint_interval = setup.checkpoint_interval; - control_interval = setup.control_interval; - species_count = setup.species_count; - - species_names = setup.species_names; - mape_threshold = setup.mape_threshold; - rrmse_threshold = setup.rrmse_threshold; + this->out_dir = setup.out_dir; + this->checkpoint_interval = setup.checkpoint_interval; + this->control_interval = setup.control_interval; + this->species_names = setup.species_names; + this->mape_threshold = setup.mape_threshold; } + bool GetControlIntervalEnabled() const { + return this->control_interval_enabled; + } + + auto GetControlInterval() const { return this->control_interval; } + + std::vector GetMapeThreshold() const { return this->mape_threshold; } + /* Profiling getters */ - auto GetMasterCtrlLogicTime() const { return this->ctrl_t; } + auto GetMasterCtrlLogicTime() const { return this->ctrl_time; } - auto GetMasterCtrlBcastTime() const { return this->bcast_ctrl_t; } + auto GetMasterCtrlBcastTime() const { return this->bcast_ctrl_time; } - auto GetMasterRecvCtrlLogicTime() const { return this->recv_ctrl_t; } + auto GetMasterRecvCtrlLogicTime() const { return this->recv_ctrl_time; } private: - RuntimeParameters *params; - ChemistryModule *chem; + bool rollback_enabled = false; + bool control_interval_enabled = false; + + poet::ChemistryModule *chem = nullptr; + + std::uint32_t checkpoint_interval = 0; + std::uint32_t control_interval = 0; + std::uint32_t global_iteration = 0; + std::vector mape_threshold; + + std::vector species_names; + std::string out_dir; + + double ctrl_time = 0.0; + double bcast_ctrl_time = 0.0; + double recv_ctrl_time = 0.0; + + /* Buffer for shuffled surrogate data */ + std::vector sur_shuffled; }; } // namespace poet diff --git a/src/poet.cpp b/src/poet.cpp index 48260f3c7..3525ced8d 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -25,7 +25,7 @@ #include "Base/RInsidePOET.hpp" #include "CLI/CLI.hpp" #include "Chemistry/ChemistryModule.hpp" -#include "Control/ControlManager.hpp" +#include "Control/ControlModule.hpp" #include "DataStructures/Field.hpp" #include "Init/InitialList.hpp" #include "Transport/DiffusionModule.hpp" @@ -255,8 +255,6 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { Rcpp::as(global_rt_setup->operator[]("checkpoint_interval")); params.mape_threshold = Rcpp::as>( global_rt_setup->operator[]("mape_threshold")); - params.rrmse_threshold = Rcpp::as>( - global_rt_setup->operator[]("rrmse_threshold")); } catch (const std::exception &e) { ERRMSG("Error while parsing R scripts: " + std::string(e.what())); return ParseRet::PARSER_ERROR; @@ -300,7 +298,6 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, /* SIMULATION LOOP */ double dSimTime{0}; - double chkTime = 0.0; for (uint32_t iter = 1; iter < maxiter + 1; iter++) { // Rollback countdowm @@ -315,10 +312,10 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, } } */ - control.beginIteration(iter); + //control.beginIteration(iter); // params.global_iter = iter; - control.isControlIteration(iter); + control.updateControlIteration(iter); // params.control_interval_enabled = (iter % params.control_interval == 0); double start_t = MPI_Wtime(); @@ -431,8 +428,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); - double chk_start = MPI_Wtime(); - control.endIteration(iter) + control.endIteration(iter); /* if (iter % params.checkpoint_interval == 0) { MSG("Writing checkpoint of iteration " + std::to_string(iter)); @@ -457,8 +453,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, */ - double chk_end = MPI_Wtime(); - chkTime += chk_end - chk_start; + // MSG(); } // END SIMULATION LOOP @@ -476,13 +471,14 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, Rcpp::List diffusion_profiling; diffusion_profiling["simtime"] = diffusion.getTransportTime(); - Rcpp::List ctrl_profiling; + /*Rcpp::List ctrl_profiling; ctrl_profiling["checkpointing_time"] = chkTime; ctrl_profiling["ctrl_logic_master"] = chem.GetMasterCtrlLogicTime(); ctrl_profiling["bcast_ctrl_logic_master"] = chem.GetMasterCtrlBcastTime(); ctrl_profiling["recv_ctrl_logic_maser"] = chem.GetMasterRecvCtrlLogicTime(); ctrl_profiling["ctrl_logic_worker"] = Rcpp::wrap(chem.GetWorkerControlTimings()); + */ if (params.use_dht) { chem_profiling["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); @@ -510,7 +506,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, profiling["simtime"] = dSimTime; profiling["chemistry"] = chem_profiling; profiling["diffusion"] = diffusion_profiling; - profiling["ctrl_logic"] = ctrl_profiling; + //profiling["ctrl_logic"] = ctrl_profiling; chem.MasterLoopBreak(); @@ -652,7 +648,10 @@ int main(int argc, char *argv[]) { ChemistryModule chemistry(run_params.work_package_size, init_list.getChemistryInit(), MPI_COMM_WORLD); - ControlModule control(&run_params, &chemistry); + + ControlModule control; + + chemistry.setControlModule(&control); const ChemistryModule::SurrogateSetup surr_setup = { getSpeciesNames(init_list.getInitialGrid(), 0, MPI_COMM_WORLD), @@ -674,14 +673,11 @@ int main(int argc, char *argv[]) { run_params.out_dir, // added run_params.checkpoint_interval, run_params.control_interval, - run_params.species_count, - run_params.species_names, - run_params.mape_threshold, - run_params.rrmse_threshold}; + getSpeciesNames(init_list.getInitialGrid(), 0, MPI_COMM_WORLD), + run_params.mape_threshold}; control.enableControlLogic(ctrl_setup); - if (MY_RANK > 0) { chemistry.WorkerLoop(); } else { @@ -725,7 +721,7 @@ int main(int argc, char *argv[]) { chemistry.masterSetField(init_list.getInitialGrid()); - Rcpp::List profiling = RunMasterLoop(R, run_params, diffusion, chemistry); + Rcpp::List profiling = RunMasterLoop(R, run_params, diffusion, chemistry, control); MSG("finished simulation loop"); diff --git a/src/poet.hpp.in b/src/poet.hpp.in index aea51966e..b5f807c1c 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -51,15 +51,9 @@ struct RuntimeParameters { bool print_progress = false; - bool rollback_enabled = false; - bool control_interval_enabled = false; - std::uint32_t global_iter = 0; - std::uint32_t sur_disabled_counter = 0; - std::uint32_t rollback_counter = 0; std::uint32_t checkpoint_interval = 0; std::uint32_t control_interval = 0; std::vector mape_threshold; - std::vector rrmse_threshold; static constexpr std::uint32_t WORK_PACKAGE_SIZE_DEFAULT = 32; std::uint32_t work_package_size = WORK_PACKAGE_SIZE_DEFAULT; From 29e66202f73e4fbcd26cd9872bf0c613f4a0e550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Fri, 24 Oct 2025 12:44:31 +0200 Subject: [PATCH 19/19] chore(gitignore): ignore common local installation paths --- .gitignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0e1d0720f..2e30f79e2 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,10 @@ build/ /.cache/ .vscode -.codechecker \ No newline at end of file +.codechecker + +# Prevent upload of local installations +bin/ +share/ +lib/ +include/