From 858da525d78524f6769750986fab7add65664313 Mon Sep 17 00:00:00 2001 From: rastogi Date: Mon, 1 Sep 2025 12:39:01 +0200 Subject: [PATCH] computeStats not working correctly, Unit Tests added --- CMakeLists.txt | 4 +- src/CMakeLists.txt | 1 + src/Chemistry/ChemistryModule.hpp | 768 ++++++++++++++++-------------- src/Chemistry/MasterFunctions.cpp | 128 +++-- src/Chemistry/WorkerFunctions.cpp | 81 +--- src/IO/HDF5Functions.hpp | 5 +- src/IO/StatsIO.cpp | 37 ++ src/IO/StatsIO.hpp | 9 + src/poet.cpp | 143 ++++-- src/poet.hpp.in | 2 +- test/CMakeLists.txt | 2 +- test/testStats.cpp | 119 +++++ 12 files changed, 789 insertions(+), 510 deletions(-) create mode 100644 src/IO/StatsIO.cpp create mode 100644 src/IO/StatsIO.hpp create mode 100644 test/testStats.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fa7f009a1..1550f0962 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,12 +28,12 @@ if (POET_PREPROCESS_BENCHS) endif() # as tug will also pull in doctest as a dependency -set(TUG_ENABLE_TESTING OFF CACHE BOOL "" FORCE) +set(TUG_ENABLE_TESTING ON CACHE BOOL "" FORCE) add_subdirectory(ext/tug EXCLUDE_FROM_ALL) add_subdirectory(ext/iphreeqc EXCLUDE_FROM_ALL) -option(POET_ENABLE_TESTING "Build test suite for POET" OFF) +option(POET_ENABLE_TESTING "Build test suite for POET" ON) if (POET_ENABLE_TESTING) add_subdirectory(test) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89790649b..a9849a768 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,7 @@ add_library(POETLib Init/DiffusionInit.cpp Init/ChemistryInit.cpp IO/checkpoint.cpp + IO/StatsIO.cpp DataStructures/Field.cpp Transport/DiffusionModule.cpp Chemistry/ChemistryModule.cpp diff --git a/src/Chemistry/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp index 22547a212..cc78ede87 100644 --- a/src/Chemistry/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -23,392 +23,432 @@ #include #include -namespace poet { -/** - * \brief Wrapper around PhreeqcRM to provide POET specific parallelization with - * easy access. - */ -class ChemistryModule { -public: +namespace poet +{ /** - * Creates a new instance of Chemistry module with given grid cell count, work - * package size and communicator. - * - * This constructor shall only be called by the master. To create workers, see - * ChemistryModule::createWorker . - * - * When the use of parallelization is intended, the nxyz value shall be set to - * 1 to save memory and only one node is needed for initialization. - * - * \param nxyz Count of grid cells to allocate and initialize for each - * process. For parellel use set to 1 at the master. - * \param wp_size Count of grid cells to fill each work package at maximum. - * \param communicator MPI communicator to distribute work in. + * \brief Wrapper around PhreeqcRM to provide POET specific parallelization with + * easy access. */ - ChemistryModule(uint32_t wp_size, - const InitialList::ChemistryInit chem_params, - MPI_Comm communicator); + class ChemistryModule + { + public: + /** + * Creates a new instance of Chemistry module with given grid cell count, work + * package size and communicator. + * + * This constructor shall only be called by the master. To create workers, see + * ChemistryModule::createWorker . + * + * When the use of parallelization is intended, the nxyz value shall be set to + * 1 to save memory and only one node is needed for initialization. + * + * \param nxyz Count of grid cells to allocate and initialize for each + * process. For parellel use set to 1 at the master. + * \param wp_size Count of grid cells to fill each work package at maximum. + * \param communicator MPI communicator to distribute work in. + */ + ChemistryModule(uint32_t wp_size, + const InitialList::ChemistryInit chem_params, + MPI_Comm communicator); - /** - * Deconstructor, which frees DHT data structure if used. - */ - ~ChemistryModule(); + /** + * Deconstructor, which frees DHT data structure if used. + */ + ~ChemistryModule(); - void masterSetField(Field field); - /** - * Run the chemical simulation with parameters set. - */ - void simulate(double dt); + void masterSetField(Field field); + /** + * Run the chemical simulation with parameters set. + */ + void simulate(double dt); - /** - * Returns all known species names, including not only aqueous species, but - * also equilibrium, exchange, surface and kinetic reactants. - */ - // auto GetPropNames() const { return this->prop_names; } + /** + * Returns all known species names, including not only aqueous species, but + * also equilibrium, exchange, surface and kinetic reactants. + */ + // auto GetPropNames() const { return this->prop_names; } - /** - * Return the accumulated runtime in seconds for chemical simulation. - */ - auto GetChemistryTime() const { return this->chem_t; } + /** + * Return the accumulated runtime in seconds for chemical simulation. + */ + auto GetChemistryTime() const { return this->chem_t; } - void setFilePadding(std::uint32_t maxiter) { - this->file_pad = - static_cast(std::ceil(std::log10(maxiter + 1))); - } + void setFilePadding(std::uint32_t maxiter) + { + this->file_pad = + static_cast(std::ceil(std::log10(maxiter + 1))); + } - struct SurrogateSetup { - std::vector prop_names; - std::array base_totals; - bool has_het_ids; + struct SurrogateSetup + { + std::vector prop_names; + std::array base_totals; + bool has_het_ids; - bool dht_enabled; - std::uint32_t dht_size_mb; - int dht_snaps; - std::string dht_out_dir; + bool dht_enabled; + std::uint32_t dht_size_mb; + int dht_snaps; + std::string dht_out_dir; - bool interp_enabled; - std::uint32_t interp_bucket_size; - std::uint32_t interp_size_mb; - std::uint32_t interp_min_entries; - bool ai_surrogate_enabled; - }; + bool interp_enabled; + std::uint32_t interp_bucket_size; + std::uint32_t interp_size_mb; + std::uint32_t interp_min_entries; + bool ai_surrogate_enabled; + }; - void masterEnableSurrogates(const SurrogateSetup &setup) { - // FIXME: This is a hack to get the prop_names and prop_count from the setup - this->prop_names = setup.prop_names; - this->prop_count = setup.prop_names.size(); + void masterEnableSurrogates(const SurrogateSetup &setup) + { + // FIXME: This is a hack to get the prop_names and prop_count from the setup + this->prop_names = setup.prop_names; + this->prop_count = setup.prop_names.size(); - this->dht_enabled = setup.dht_enabled; - this->interp_enabled = setup.interp_enabled; - this->ai_surrogate_enabled = setup.ai_surrogate_enabled; + this->dht_enabled = setup.dht_enabled; + this->interp_enabled = setup.interp_enabled; + this->ai_surrogate_enabled = setup.ai_surrogate_enabled; - this->base_totals = setup.base_totals; + this->base_totals = setup.base_totals; - if (this->dht_enabled || this->interp_enabled) { - this->initializeDHT(setup.dht_size_mb, this->params.dht_species, - setup.has_het_ids); + if (this->dht_enabled || this->interp_enabled) + { + this->initializeDHT(setup.dht_size_mb, this->params.dht_species, + setup.has_het_ids); - if (setup.dht_snaps != DHT_SNAPS_DISABLED) { - this->setDHTSnapshots(setup.dht_snaps, setup.dht_out_dir); + if (setup.dht_snaps != DHT_SNAPS_DISABLED) + { + this->setDHTSnapshots(setup.dht_snaps, setup.dht_out_dir); + } + } + + if (this->interp_enabled) + { + this->initializeInterp(setup.interp_bucket_size, setup.interp_size_mb, + setup.interp_min_entries, + this->params.interp_species); } } - if (this->interp_enabled) { - this->initializeInterp(setup.interp_bucket_size, setup.interp_size_mb, - setup.interp_min_entries, - this->params.interp_species); + /** + * Intended to alias input parameters for grid initialization with a single + * value per species. + */ + using SingleCMap = std::unordered_map; + + /** + * Intended to alias input parameters for grid initialization with mutlitple + * values per species. + */ + using VectorCMap = std::unordered_map>; + + /** + * Enumerating DHT file options + */ + enum + { + DHT_SNAPS_DISABLED = 0, //!< disabled file output + DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation + DHT_SNAPS_ITEREND //!< output snapshots after each iteration + }; + + /** + * **Only called by workers!** Start the worker listening loop. + */ + void WorkerLoop(); + + /** + * **Called by master** Advise the workers to break the loop. + */ + void MasterLoopBreak(); + + /** + * **Master only** Return count of grid cells. + */ + auto GetNCells() const { return this->n_cells; } + /** + * **Master only** Return work package size. + */ + auto GetWPSize() const { return this->wp_size; } + /** + * **Master only** Return the time in seconds the master spent waiting for any + * free worker. + */ + auto GetMasterIdleTime() const { return this->idle_t; } + /** + * **Master only** Return the time in seconds the master spent in sequential + * part of the simulation, including times for shuffling/unshuffling field + * etc. + */ + auto GetMasterSequentialTime() const { return this->seq_t; } + /** + * **Master only** Return the time in seconds the master spent in the + * send/receive loop. + */ + auto GetMasterLoopTime() const { return this->send_recv_t; } + + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to run Phreeqc simulation. + * + * \return Vector of all accumulated Phreeqc timings. + */ + std::vector GetWorkerPhreeqcTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to get values from the DHT. + * + * \return Vector of all accumulated DHT get times. + */ + std::vector GetWorkerDHTGetTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers to write values to the DHT. + * + * \return Vector of all accumulated DHT fill times. + */ + std::vector GetWorkerDHTFillTimings() const; + /** + * **Master only** Collect and return all accumulated timings recorded by + * workers waiting for work packages from the master. + * + * \return Vector of all accumulated waiting times. + */ + std::vector GetWorkerIdleTimings() const; + + /** + * **Master only** Collect and return DHT hits of all workers. + * + * \return Vector of all count of DHT hits. + */ + std::vector GetWorkerDHTHits() const; + + /** + * **Master only** Collect and return DHT evictions of all workers. + * + * \return Vector of all count of DHT evictions. + */ + std::vector GetWorkerDHTEvictions() const; + + /** + * **Master only** Returns the current state of the chemical field. + * + * \return Reference to the chemical field. + */ + Field &getField() { return this->chem_field; } + + /** + * **Master only** Enable/disable progress bar. + * + * \param enabled True if print progressbar, false if not. + */ + void setProgressBarPrintout(bool enabled) + { + this->print_progessbar = enabled; + }; + + /** + * **Master only** Set the ai surrogate validity vector from R + */ + void set_ai_surrogate_validity_vector(std::vector r_vector); + + std::vector GetWorkerInterpolationCalls() const; + + std::vector GetWorkerInterpolationWriteTimings() const; + std::vector GetWorkerInterpolationReadTimings() const; + std::vector GetWorkerInterpolationGatherTimings() const; + std::vector GetWorkerInterpolationFunctionCallTimings() const; + + std::vector GetWorkerPHTCacheHits() const; + + std::vector ai_surrogate_validity_vector; + + RuntimeParameters *runtime_params = nullptr; + uint32_t control_iteration_counter = 0; + + struct error_stats + { + std::vector mape; + std::vector rrsme; + uint32_t iteration; + + error_stats(size_t species_count, size_t iter) + : mape(species_count, 0.0), rrsme(species_count, 0.0), iteration(iter) {} + }; + + std::vector error_stats_history; + + static void computeStats(const std::vector &pqc_vector, + const std::vector &sur_vector, + uint32_t size_per_prop, uint32_t species_count, + error_stats &stats); + + protected: + void initializeDHT(uint32_t size_mb, + const NamedVector &key_species, + bool has_het_ids); + void setDHTSnapshots(int type, const std::string &out_dir); + void setDHTReadFile(const std::string &input_file); + + void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, + std::uint32_t min_entries, + const NamedVector &key_species); + + enum + { + CHEM_FIELD_INIT, + CHEM_DHT_ENABLE, + CHEM_DHT_SIGNIF_VEC, + CHEM_DHT_SNAPS, + CHEM_DHT_READ_FILE, + CHEM_IP_ENABLE, + CHEM_IP_MIN_ENTRIES, + CHEM_IP_SIGNIF_VEC, + CHEM_WORK_LOOP, + CHEM_PERF, + CHEM_BREAK_MAIN_LOOP, + CHEM_AI_BCAST_VALIDITY + }; + + enum + { + LOOP_WORK, + LOOP_END, + LOOP_CTRL + }; + + enum + { + WORKER_PHREEQC, + WORKER_DHT_GET, + WORKER_DHT_FILL, + WORKER_IDLE, + WORKER_IP_WRITE, + WORKER_IP_READ, + WORKER_IP_GATHER, + WORKER_IP_FC, + WORKER_DHT_HITS, + WORKER_DHT_EVICTIONS, + WORKER_PHT_CACHE_HITS, + WORKER_IP_CALLS + }; + + std::vector interp_calls; + std::vector dht_hits; + std::vector dht_evictions; + + struct worker_s + { + double phreeqc_t = 0.; + double dht_get = 0.; + double dht_fill = 0.; + double idle_t = 0.; + }; + + struct worker_info_s + { + char has_work = 0; + double *send_addr; + }; + + using worker_list_t = std::vector; + using workpointer_t = std::vector::iterator; + + void MasterRunParallel(double dt); + void MasterRunSequential(); + + void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, + int &pkg_to_send, int &count_pkgs, int &free_workers, + double dt, uint32_t iteration, uint32_t control_iteration, + const std::vector &wp_sizes_vector); + void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, + int &free_workers); + + std::vector MasterGatherWorkerTimings(int type) const; + std::vector MasterGatherWorkerMetrics(int type) const; + + void WorkerProcessPkgs(struct worker_s &timings, uint32_t &iteration); + + void WorkerDoWork(MPI_Status &probe_status, int double_count, + struct worker_s &timings); + void WorkerPostIter(MPI_Status &prope_status, uint32_t iteration); + void WorkerPostSim(uint32_t iteration); + + void WorkerWriteDHTDump(uint32_t iteration); + void WorkerReadDHTDump(const std::string &dht_input_file); + + void WorkerPerfToMaster(int type, const struct worker_s &timings); + void WorkerMetricsToMaster(int type); + + void WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, + double dTimestep); + + std::vector CalculateWPSizesVector(uint32_t n_cells, + uint32_t wp_size) const; + std::vector shuffleField(const std::vector &in_field, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count); + void unshuffleField(const std::vector &in_buffer, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count, std::vector &out_field); + std::vector + parseDHTSpeciesVec(const NamedVector &key_species, + const std::vector &to_compare) const; + + void BCastStringVec(std::vector &io); + + int comm_size, comm_rank; + MPI_Comm group_comm; + + bool is_sequential; + bool is_master; + + uint32_t wp_size; + bool dht_enabled{false}; + int dht_snaps_type{DHT_SNAPS_DISABLED}; + std::string dht_file_out_dir; + + poet::DHT_Wrapper *dht = nullptr; + + bool interp_enabled{false}; + std::unique_ptr interp; + + bool ai_surrogate_enabled{false}; + + static constexpr uint32_t BUFFER_OFFSET = 6; + + inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const + { + MPI_Bcast(buf, count, datatype, 0, this->group_comm); } - } - /** - * Intended to alias input parameters for grid initialization with a single - * value per species. - */ - using SingleCMap = std::unordered_map; - - /** - * Intended to alias input parameters for grid initialization with mutlitple - * values per species. - */ - using VectorCMap = std::unordered_map>; - - /** - * Enumerating DHT file options - */ - enum { - DHT_SNAPS_DISABLED = 0, //!< disabled file output - DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation - DHT_SNAPS_ITEREND //!< output snapshots after each iteration - }; - - /** - * **Only called by workers!** Start the worker listening loop. - */ - void WorkerLoop(); - - /** - * **Called by master** Advise the workers to break the loop. - */ - void MasterLoopBreak(); - - /** - * **Master only** Return count of grid cells. - */ - auto GetNCells() const { return this->n_cells; } - /** - * **Master only** Return work package size. - */ - auto GetWPSize() const { return this->wp_size; } - /** - * **Master only** Return the time in seconds the master spent waiting for any - * free worker. - */ - auto GetMasterIdleTime() const { return this->idle_t; } - /** - * **Master only** Return the time in seconds the master spent in sequential - * part of the simulation, including times for shuffling/unshuffling field - * etc. - */ - auto GetMasterSequentialTime() const { return this->seq_t; } - /** - * **Master only** Return the time in seconds the master spent in the - * send/receive loop. - */ - auto GetMasterLoopTime() const { return this->send_recv_t; } - - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to run Phreeqc simulation. - * - * \return Vector of all accumulated Phreeqc timings. - */ - std::vector GetWorkerPhreeqcTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to get values from the DHT. - * - * \return Vector of all accumulated DHT get times. - */ - std::vector GetWorkerDHTGetTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers to write values to the DHT. - * - * \return Vector of all accumulated DHT fill times. - */ - std::vector GetWorkerDHTFillTimings() const; - /** - * **Master only** Collect and return all accumulated timings recorded by - * workers waiting for work packages from the master. - * - * \return Vector of all accumulated waiting times. - */ - std::vector GetWorkerIdleTimings() const; - - /** - * **Master only** Collect and return DHT hits of all workers. - * - * \return Vector of all count of DHT hits. - */ - std::vector GetWorkerDHTHits() const; - - /** - * **Master only** Collect and return DHT evictions of all workers. - * - * \return Vector of all count of DHT evictions. - */ - std::vector GetWorkerDHTEvictions() const; - - /** - * **Master only** Returns the current state of the chemical field. - * - * \return Reference to the chemical field. - */ - Field &getField() { return this->chem_field; } - - /** - * **Master only** Enable/disable progress bar. - * - * \param enabled True if print progressbar, false if not. - */ - void setProgressBarPrintout(bool enabled) { - this->print_progessbar = enabled; - }; - - /** - * **Master only** Set the ai surrogate validity vector from R - */ - void set_ai_surrogate_validity_vector(std::vector r_vector); - - std::vector GetWorkerInterpolationCalls() const; - - std::vector GetWorkerInterpolationWriteTimings() const; - std::vector GetWorkerInterpolationReadTimings() const; - std::vector GetWorkerInterpolationGatherTimings() const; - std::vector GetWorkerInterpolationFunctionCallTimings() const; - - std::vector GetWorkerPHTCacheHits() const; - - std::vector ai_surrogate_validity_vector; - - RuntimeParameters *runtime_params = nullptr; - -protected: - void initializeDHT(uint32_t size_mb, - const NamedVector &key_species, - bool has_het_ids); - void setDHTSnapshots(int type, const std::string &out_dir); - void setDHTReadFile(const std::string &input_file); - - void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, - std::uint32_t min_entries, - const NamedVector &key_species); - - enum { - CHEM_FIELD_INIT, - CHEM_DHT_ENABLE, - CHEM_DHT_SIGNIF_VEC, - CHEM_DHT_SNAPS, - CHEM_DHT_READ_FILE, - CHEM_IP_ENABLE, - CHEM_IP_MIN_ENTRIES, - CHEM_IP_SIGNIF_VEC, - CHEM_WORK_LOOP, - CHEM_PERF, - CHEM_BREAK_MAIN_LOOP, - CHEM_AI_BCAST_VALIDITY - }; - - enum { LOOP_WORK, LOOP_END, WITH_REL_ERROR }; - - enum { - WORKER_PHREEQC, - WORKER_DHT_GET, - WORKER_DHT_FILL, - WORKER_IDLE, - WORKER_IP_WRITE, - WORKER_IP_READ, - WORKER_IP_GATHER, - WORKER_IP_FC, - WORKER_DHT_HITS, - WORKER_DHT_EVICTIONS, - WORKER_PHT_CACHE_HITS, - WORKER_IP_CALLS - }; - - std::vector interp_calls; - std::vector dht_hits; - std::vector dht_evictions; - - struct worker_s { - double phreeqc_t = 0.; - double dht_get = 0.; - double dht_fill = 0.; + inline void PropagateFunctionType(int &type) const + { + ChemBCast(&type, 1, MPI_INT); + } + double simtime = 0.; double idle_t = 0.; + double seq_t = 0.; + double send_recv_t = 0.; + + std::array base_totals{0}; + + bool print_progessbar{false}; + + std::uint8_t file_pad{1}; + + double chem_t{0.}; + + uint32_t n_cells = 0; + uint32_t prop_count = 0; + std::vector prop_names; + + Field chem_field; + + const InitialList::ChemistryInit params; + + std::unique_ptr pqc_runner; + + std::vector sur_shuffled; }; - - struct worker_info_s { - char has_work = 0; - double *send_addr; - }; - - using worker_list_t = std::vector; - using workpointer_t = std::vector::iterator; - - void MasterRunParallel(double dt); - void MasterRunSequential(); - - void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer, - int &pkg_to_send, int &count_pkgs, int &free_workers, - double dt, uint32_t iteration, uint32_t control_iter, - const std::vector &wp_sizes_vector); - void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send, - int &free_workers); - - std::vector MasterGatherWorkerTimings(int type) const; - std::vector MasterGatherWorkerMetrics(int type) const; - - void WorkerProcessPkgs(struct worker_s &timings, uint32_t &iteration); - - void WorkerDoWork(MPI_Status &probe_status, int double_count, - struct worker_s &timings); - void WorkerPostIter(MPI_Status &prope_status, uint32_t iteration); - void WorkerPostSim(uint32_t iteration); - - void WorkerWriteDHTDump(uint32_t iteration); - void WorkerReadDHTDump(const std::string &dht_input_file); - - void WorkerPerfToMaster(int type, const struct worker_s &timings); - void WorkerMetricsToMaster(int type); - - void WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, - double dTimestep); - - std::vector CalculateWPSizesVector(uint32_t n_cells, - uint32_t wp_size) const; - - std::vector shuffleField(const std::vector &in_field, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count); - void unshuffleField(const std::vector &in_buffer, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count, std::vector &out_field); - std::vector - parseDHTSpeciesVec(const NamedVector &key_species, - const std::vector &to_compare) const; - - void BCastStringVec(std::vector &io); - - int comm_size, comm_rank; - MPI_Comm group_comm; - - bool is_sequential; - bool is_master; - - uint32_t wp_size; - bool dht_enabled{false}; - int dht_snaps_type{DHT_SNAPS_DISABLED}; - std::string dht_file_out_dir; - - poet::DHT_Wrapper *dht = nullptr; - - bool interp_enabled{false}; - std::unique_ptr interp; - - bool ai_surrogate_enabled{false}; - - static constexpr uint32_t BUFFER_OFFSET = 6; - - inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { - MPI_Bcast(buf, count, datatype, 0, this->group_comm); - } - - inline void PropagateFunctionType(int &type) const { - ChemBCast(&type, 1, MPI_INT); - } - double simtime = 0.; - double idle_t = 0.; - double seq_t = 0.; - double send_recv_t = 0.; - - std::array base_totals{0}; - - bool print_progessbar{false}; - - std::uint8_t file_pad{1}; - - double chem_t{0.}; - - uint32_t n_cells = 0; - uint32_t prop_count = 0; - std::vector prop_names; - - Field chem_field; - - const InitialList::ChemistryInit params; - - std::unique_ptr pqc_runner; -}; } // namespace poet #endif // CHEMISTRYMODULE_H_ diff --git a/src/Chemistry/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp index 63858c530..c1ce0b75e 100644 --- a/src/Chemistry/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -172,6 +173,61 @@ std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const return ret; } +void poet::ChemistryModule::computeStats(const std::vector &pqc_vector, + const std::vector &sur_vector, + uint32_t size_per_prop, uint32_t species_count, + error_stats &stats) +{ + for (uint32_t i = 0; i < species_count; i++) + { + double err_sum = 0.0; + double sqr_err_sum = 0.0; + int count = 0; + + for (uint32_t j = 0; j < size_per_prop; j++) + { + + double pqc_value = pqc_vector[i * size_per_prop + j]; + double sur_value = sur_vector[i * size_per_prop + j]; + +if (i == 0 && (j % 10000 == 0)) { + std::cout << "i=" << i << ", j=" << j + << ", idx=" << i * size_per_prop + j + << ", pqc=" << pqc_value + << ", sur=" << sur_value + << std::endl; +} + + if (pqc_value != 0) + { + double rel_err = (pqc_value - sur_value) / pqc_value; + err_sum += std::abs(rel_err); + sqr_err_sum += rel_err * rel_err; + count++; + } + if (pqc_value == 0 && sur_value != 0) + { + err_sum += 1.0; + sqr_err_sum += 1.0; + count++; + } + + if (pqc_value == 0 && sur_value == 0) + { + } + + // else: both cases are zero, skip (no error) + } + if (i == 0) + { + std::cout << "computeStats, i==0, err_sum: " << err_sum << std::endl; + std::cout << "computeStats, i==0, sqr_err_sum: " << sqr_err_sum << std::endl; + } + stats.mape[i] = (count > 0) ? (100.0 / count) * err_sum : 0.0; + stats.rrsme[i] = (count > 0) ? std::sqrt(sqr_err_sum / count) : 0.0; + } + +} inline std::vector shuffleVector(const std::vector &in_vector, uint32_t size_per_prop, @@ -355,30 +411,21 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, pkg_to_recv -= 1; free_workers++; } - if (probe_status.MPI_TAG == WITH_REL_ERROR) + if (probe_status.MPI_TAG == LOOP_CTRL) { MPI_Get_count(&probe_status, MPI_DOUBLE, &size); - std::cout << "[Master] Probed rel error from worker " << p - << ", size = " << size << std::endl; + // layout of buffer is [phreeqc][surrogate] + std::vector recv_buffer(size); - int half = size/2; - - std::vector rel_err_buffer(size); - std::vector rel_error(half); - MPI_Recv(rel_err_buffer.data(), size, MPI_DOUBLE, p, WITH_REL_ERROR, + MPI_Recv(recv_buffer.data(), size, MPI_DOUBLE, p, LOOP_CTRL, this->group_comm, MPI_STATUS_IGNORE); - + std::copy(recv_buffer.begin(), recv_buffer.begin() + (size / 2), + w_list[p - 1].send_addr); - std::copy(rel_err_buffer.begin(), rel_err_buffer.begin() + half, - w_list[p - 1].send_addr); - - std::copy(rel_err_buffer.begin() + half, rel_err_buffer.end(), rel_error.begin()); - - std::cout << "[Master] Received rel error buffer from worker " << p - << ", first value = " << (rel_err_buffer.empty() ? -1 : rel_err_buffer[0]) - << std::endl; + sur_shuffled.insert(sur_shuffled.end(), recv_buffer.begin() + (size / 2), + recv_buffer.begin() + size); w_list[p - 1].has_work = 0; pkg_to_recv -= 1; @@ -461,12 +508,18 @@ void poet::ChemistryModule::MasterRunParallel(double dt) static uint32_t iteration = 0; uint32_t control_iteration = static_cast(this->runtime_params->control_iteration_active ? 1 : 0); + if (control_iteration) + { + sur_shuffled.clear(); + sur_shuffled.reserve(this->n_cells * this->prop_count); + } /* start time measurement of sequential part */ seq_a = MPI_Wtime(); /* shuffle grid */ // grid.shuffleAndExport(mpi_buffer); + std::vector mpi_buffer = shuffleField(chem_field.AsVector(), this->n_cells, this->prop_count, wp_sizes_vector.size()); @@ -507,30 +560,6 @@ void poet::ChemistryModule::MasterRunParallel(double dt) // ... and try to receive them from workers who has finished their work MasterRecvPkgs(worker_list, pkg_to_recv, pkg_to_send > 0, free_workers); } - // to do: Statistik - - /* if control_iteration_active is true receive rel. error data and compare with epsilon */ - if (this->runtime_params->control_iteration_active) - { - - // do Statistik - /** - int rel_err_offset = size / 2; // or calculate as needed - - for (std::size_t ep_i = 0; ep_i < this->runtime_params->species_epsilon.size(); ep_i++) - { - if (rel_err_buffer[rel_err_offset + ep_i] > this->runtime_params->species_epsilon[ep_i]) - { - std::cout << "[Master] At least one relative error exceeded epsilon threshold!" - << std::endl; - std::cout << "value: " << rel_err_buffer[rel_err_offset + ep_i] << " epsilon: " - << this->runtime_params->species_epsilon[ep_i] << std::endl; - break; - } - } - */ - } - // Just to complete the progressbar std::cout << std::endl; @@ -551,6 +580,23 @@ void poet::ChemistryModule::MasterRunParallel(double dt) /* do master stuff */ + if (control_iteration) + { + control_iteration_counter++; + + std::vector sur_unshuffled{sur_shuffled}; + + unshuffleField(sur_shuffled, this->n_cells, this->prop_count, + wp_sizes_vector.size(), sur_unshuffled); + + error_stats stats(this->prop_count, control_iteration_counter * runtime_params->control_iteration); + + computeStats(out_vec, sur_unshuffled, this->n_cells, this->prop_count, stats); + error_stats_history.push_back(stats); + + // to do: control values to epsilon + } + /* start time measurement of master chemistry */ sim_e_chemistry = MPI_Wtime(); diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index a45a5bc23..1b35387c1 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -238,77 +238,48 @@ namespace poet phreeqc_time_end = MPI_Wtime(); if (control_iteration_active) - { - // increase size for relative error - std::size_t rel_error_size = s_curr_wp.size * this->prop_count; - - // extend mpi_buffer, for rel. error for every species - mpi_buffer.resize(count + rel_error_size); - std::size_t offset = count; - count += rel_error_size; - - // calc rel. error if phreeqc != surrogate + { + std::size_t sur_wp_offset = s_curr_wp.size * this->prop_count; + + mpi_buffer.resize(count + sur_wp_offset); + for (std::size_t wp_i = 0; wp_i < s_curr_wp_pqc.size; wp_i++) { - const auto &surrogate_result = s_curr_wp.output[wp_i]; - const auto &phreeqc_result = s_curr_wp_pqc.output[wp_i]; - - // std::cout << "surrogate_result.size() " << surrogate_result.size() << ", phreeqc_result " << phreeqc_result.size() << std::endl; - - // fill NaNs - if (surrogate_result.size() == 0) - { - for (std::size_t i = 0; i < this->prop_count; i++) - { - mpi_buffer[offset++] = std::numeric_limits::quiet_NaN(); - } - } - - // compute rel error - if (surrogate_result.size() == phreeqc_result.size()) - { - for (std::size_t i = 0; i < this->prop_count; i++) - { - double ref = phreeqc_result[i]; - double surrogate = surrogate_result[i]; - - if (std::abs(ref) > 1e-9) - { - mpi_buffer[offset++] = std::abs((surrogate - ref) / ref); - } - else - { - mpi_buffer[offset++] = 0.0; - } - } - } + std::copy(s_curr_wp_pqc.output[wp_i].begin(), s_curr_wp_pqc.output[wp_i].end(), + mpi_buffer.begin() + this->prop_count * wp_i); } + + // s_curr_wp only contains the interpolated data + // copy surrogate output after the the pqc output, mpi_buffer[pqc][interp] + + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) + { + std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), + mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i); + } + + count += sur_wp_offset; } - - poet::WorkPackage &s_curr_wp_copy = control_iteration_active ? s_curr_wp_pqc : s_curr_wp; - - for (std::size_t wp_i = 0; wp_i < s_curr_wp_copy.size; wp_i++) + else { - std::copy(s_curr_wp_copy.output[wp_i].begin(), s_curr_wp_copy.output[wp_i].end(), - mpi_buffer.begin() + this->prop_count * wp_i); + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) + { + std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), + mpi_buffer.begin() + this->prop_count * wp_i); + } } /* send results to master */ MPI_Request send_req; - int mpi_tag = control_iteration_active ? WITH_REL_ERROR : LOOP_WORK; + int mpi_tag = control_iteration_active ? LOOP_CTRL : LOOP_WORK; MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD, &send_req); - if (control_iteration_active) - { - std::cout << "[Worker " << this->comm_rank << "] Sent results." << std::endl; - } - if (dht_enabled || interp_enabled) { /* write results to DHT */ dht_fill_start = MPI_Wtime(); - dht->fillDHT(s_curr_wp_copy); + dht->fillDHT(control_iteration_active ? s_curr_wp_pqc : s_curr_wp); dht_fill_end = MPI_Wtime(); if (interp_enabled) diff --git a/src/IO/HDF5Functions.hpp b/src/IO/HDF5Functions.hpp index e7954a2f7..87687f2b9 100644 --- a/src/IO/HDF5Functions.hpp +++ b/src/IO/HDF5Functions.hpp @@ -1,8 +1,9 @@ -#pragma once +#pragma once #include #include "Datatypes.hpp" int write_checkpoint(const std::string &file_path, struct Checkpoint_s &&checkpoint); -int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint); \ No newline at end of file +int read_checkpoint(const std::string &file_path, struct Checkpoint_s &checkpoint); + diff --git a/src/IO/StatsIO.cpp b/src/IO/StatsIO.cpp new file mode 100644 index 000000000..4312a46dd --- /dev/null +++ b/src/IO/StatsIO.cpp @@ -0,0 +1,37 @@ +#include "IO/StatsIO.hpp" +#include +#include +#include + +namespace poet +{ + void writeStatsToCSV(const std::vector &all_stats, + const std::vector &species_names, + const std::string &filename) + { + std::ofstream out(filename); + if (!out.is_open()) + { + std::cerr << "Could not open " << filename << " !" << std::endl; + return; + } + + // header + out << "Iteration, Species, MAPE, RRSME \n"; + + for (size_t i = 0; i < all_stats.size(); ++i) + { + for (size_t j = 0; j < species_names.size(); ++j) + { + out << all_stats[i].iteration << ",\t" + << species_names[j] << ",\t" + << all_stats[i].mape[j] << ",\t" + << all_stats[i].rrsme[j] << "\n"; + } + out << std::endl; + } + + out.close(); + std::cout << "Stats written to " << filename << "\n"; + } +} // namespace poet \ No newline at end of file diff --git a/src/IO/StatsIO.hpp b/src/IO/StatsIO.hpp new file mode 100644 index 000000000..a865cc64a --- /dev/null +++ b/src/IO/StatsIO.hpp @@ -0,0 +1,9 @@ +#include +#include "Chemistry/ChemistryModule.hpp" + +namespace poet +{ + void writeStatsToCSV(const std::vector &all_stats, + const std::vector &species_names, + const std::string &filename); +} // namespace poet diff --git a/src/poet.cpp b/src/poet.cpp index 408bb56ff..2f39673be 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -28,6 +28,7 @@ #include "DataStructures/Field.hpp" #include "IO/Datatypes.hpp" #include "IO/HDF5Functions.hpp" +#include "IO/StatsIO.hpp" #include "Init/InitialList.hpp" #include "Transport/DiffusionModule.hpp" @@ -67,7 +68,8 @@ static poet::DEFunc ReadRObj_R; static poet::DEFunc SaveRObj_R; static poet::DEFunc source_R; -static void init_global_functions(RInside &R) { +static void init_global_functions(RInside &R) +{ R.parseEval(kin_r_library); master_init_R = DEFunc("master_init"); master_iteration_end_R = DEFunc("master_iteration_end"); @@ -90,9 +92,15 @@ static void init_global_functions(RInside &R) { // R.parseEval("mysetup$state_C <- TMP"); // } -enum ParseRet { PARSER_OK, PARSER_ERROR, PARSER_HELP }; +enum ParseRet +{ + PARSER_OK, + PARSER_ERROR, + PARSER_HELP +}; -int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { +int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) +{ CLI::App app{"POET - Potsdam rEactive Transport simulator"}; @@ -174,9 +182,12 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { "Output directory of the simulation") ->required(); - try { + try + { app.parse(argc, argv); - } catch (const CLI::ParseError &e) { + } + catch (const CLI::ParseError &e) + { app.exit(e); return -1; } @@ -188,14 +199,16 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { if (params.as_qs) params.out_ext = "qs"; - if (MY_RANK == 0) { + if (MY_RANK == 0) + { // MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); MSG("Output format/extension is " + params.out_ext); MSG("Work Package Size: " + std::to_string(params.work_package_size)); MSG("DHT is " + BOOL_PRINT(params.use_dht)); MSG("AI Surrogate is " + BOOL_PRINT(params.use_ai_surrogate)); - if (params.use_dht) { + if (params.use_dht) + { // MSG("DHT strategy is " + std::to_string(simparams.dht_strategy)); // MDL: these should be outdated (?) // MSG("DHT key default digits (ignored if 'signif_vector' is " @@ -209,7 +222,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { // MSG("DHT load file is " + chem_params.dht_file); } - if (params.use_interp) { + if (params.use_interp) + { MSG("PHT interpolation enabled: " + BOOL_PRINT(params.use_interp)); MSG("PHT interp-size = " + std::to_string(params.interp_size)); MSG("PHT interp-min = " + std::to_string(params.interp_min_entries)); @@ -237,7 +251,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { // // log before rounding? // R["dht_log"] = simparams.dht_log; - try { + try + { Rcpp::List init_params_(ReadRObj_R(init_file)); params.init_params = init_params_; @@ -251,12 +266,13 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { params.timesteps = Rcpp::as>(global_rt_setup->operator[]("timesteps")); - params.control_iteration = - Rcpp::as(global_rt_setup->operator[]("control_iteration")); - params.species_epsilon = + params.control_iteration = + Rcpp::as(global_rt_setup->operator[]("control_iteration")); + params.species_epsilon = Rcpp::as>(global_rt_setup->operator[]("species_epsilon")); - - } catch (const std::exception &e) { + } + catch (const std::exception &e) + { ERRMSG("Error while parsing R scripts: " + std::string(e.what())); return ParseRet::PARSER_ERROR; } @@ -266,7 +282,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { // HACK: this is a step back as the order and also the count of fields is // predefined, but it will change in the future -void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) { +void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) +{ R["TMP"] = Rcpp::wrap(trans.AsVector()); R["TMP_PROPS"] = Rcpp::wrap(trans.GetProps()); R.parseEval(std::string("state_T <- setNames(data.frame(matrix(TMP, nrow=" + @@ -285,13 +302,15 @@ void call_master_iter_end(RInside &R, const Field &trans, const Field &chem) { static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, DiffusionModule &diffusion, - ChemistryModule &chem) { + ChemistryModule &chem) +{ /* Iteration Count is dynamic, retrieving value from R (is only needed by * master for the following loop) */ uint32_t maxiter = params.timesteps.size(); - if (params.print_progress) { + if (params.print_progress) + { chem.setProgressBarPrintout(true); } R["TMP_PROPS"] = Rcpp::wrap(chem.getField().GetProps()); @@ -299,10 +318,11 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, /* SIMULATION LOOP */ double dSimTime{0}; - for (uint32_t iter = 1; iter < maxiter + 1; iter++) { + for (uint32_t iter = 1; iter < maxiter + 1; iter++) + { + + params.control_iteration_active = (iter % params.control_iteration == 0 && iter != 0); - params.control_iteration_active = (iter % params.control_iteration == 0); - double start_t = MPI_Wtime(); const double &dt = params.timesteps[iter - 1]; @@ -323,7 +343,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.getField().update(diffusion.getField()); // MSG("Chemistry start"); - if (params.use_ai_surrogate) { + if (params.use_ai_surrogate) + { double ai_start_t = MPI_Wtime(); // Save current values from the tug field as predictor for the ai step R["TMP"] = Rcpp::wrap(chem.getField().AsVector()); @@ -374,7 +395,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.simulate(dt); /* AI surrogate iterative training*/ - if (params.use_ai_surrogate) { + if (params.use_ai_surrogate) + { double ai_start_t = MPI_Wtime(); R["TMP"] = Rcpp::wrap(chem.getField().AsVector()); @@ -408,19 +430,32 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, // TODO: write checkpoint // checkpoint struct --> field and iteration - if (iter == 1) { - write_checkpoint("checkpoint1.hdf5", - {.field = chem.getField(), .iteration = iter}); - } else if (iter == 2) { + /*else if (iter == 2) { Checkpoint_s checkpoint_read{.field = chem.getField()}; read_checkpoint("checkpoint1.hdf5", checkpoint_read); iter = checkpoint_read.iteration; - } + }*/ diffusion.getField().update(chem.getField()); MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); + + /* + if (params.control_iteration_active) + { + std::string file_path = "checkpoint" + std::to_string(iter) + ".hdf5"; + write_checkpoint(file_path, + {.field = chem.getField(), .iteration = iter}); + } + + + if (iter % params.control_iteration == 0 && iter != 0) + { + write_checkpoint("checkpoint" + std::to_string(iter) + ".hdf5", + {.field = chem.getField(), .iteration = iter}); + } + */ // MSG(); } // END SIMULATION LOOP @@ -437,7 +472,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, Rcpp::List diffusion_profiling; diffusion_profiling["simtime"] = diffusion.getTransportTime(); - if (params.use_dht) { + if (params.use_dht) + { chem_profiling["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); chem_profiling["dht_evictions"] = Rcpp::wrap(chem.GetWorkerDHTEvictions()); chem_profiling["dht_get_time"] = Rcpp::wrap(chem.GetWorkerDHTGetTimings()); @@ -445,7 +481,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, Rcpp::wrap(chem.GetWorkerDHTFillTimings()); } - if (params.use_interp) { + if (params.use_interp) + { chem_profiling["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); chem_profiling["interp_r"] = @@ -466,11 +503,14 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, RuntimeParameters ¶ms, chem.MasterLoopBreak(); + writeStatsToCSV(chem.error_stats_history, chem.getField().GetProps(), "stats_overview"); + return profiling; } std::vector getSpeciesNames(const Field &&field, int root, - MPI_Comm comm) { + MPI_Comm comm) +{ std::uint32_t n_elements; std::uint32_t n_string_size; @@ -480,11 +520,13 @@ std::vector getSpeciesNames(const Field &&field, int root, const bool is_master = root == rank; // first, the master sends all the species names iterative - if (is_master) { + if (is_master) + { n_elements = field.GetProps().size(); MPI_Bcast(&n_elements, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); - for (std::uint32_t i = 0; i < n_elements; i++) { + for (std::uint32_t i = 0; i < n_elements; i++) + { n_string_size = field.GetProps()[i].size(); MPI_Bcast(&n_string_size, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); MPI_Bcast(const_cast(field.GetProps()[i].c_str()), n_string_size, @@ -499,7 +541,8 @@ std::vector getSpeciesNames(const Field &&field, int root, std::vector species_names_out(n_elements); - for (std::uint32_t i = 0; i < n_elements; i++) { + for (std::uint32_t i = 0; i < n_elements; i++) + { MPI_Bcast(&n_string_size, 1, MPI_UINT32_T, root, MPI_COMM_WORLD); char recv_buf[n_string_size]; @@ -512,7 +555,8 @@ std::vector getSpeciesNames(const Field &&field, int root, return species_names_out; } -std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) { +std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) +{ std::array base_totals; int rank; @@ -520,7 +564,8 @@ std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) { const bool is_master = root == rank; - if (is_master) { + if (is_master) + { const auto h_col = field["H"]; const auto o_col = field["O"]; @@ -535,7 +580,8 @@ std::array getBaseTotals(Field &&field, int root, MPI_Comm comm) { return base_totals; } -bool getHasID(Field &&field, int root, MPI_Comm comm) { +bool getHasID(Field &&field, int root, MPI_Comm comm) +{ bool has_id; int rank; @@ -543,7 +589,8 @@ bool getHasID(Field &&field, int root, MPI_Comm comm) { const bool is_master = root == rank; - if (is_master) { + if (is_master) + { const auto ID_field = field["ID"]; std::set unique_IDs(ID_field.begin(), ID_field.end()); @@ -560,7 +607,8 @@ bool getHasID(Field &&field, int root, MPI_Comm comm) { return has_id; } -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) +{ int world_size; MPI_Init(&argc, &argv); @@ -571,7 +619,8 @@ int main(int argc, char *argv[]) { RInsidePOET &R = RInsidePOET::getInstance(); - if (MY_RANK == 0) { + if (MY_RANK == 0) + { MSG("Running POET version " + std::string(poet_version)); } @@ -579,7 +628,8 @@ int main(int argc, char *argv[]) { RuntimeParameters run_params; - if (parseInitValues(argc, argv, run_params) != 0) { + if (parseInitValues(argc, argv, run_params) != 0) + { MPI_Finalize(); return 0; } @@ -621,9 +671,12 @@ int main(int argc, char *argv[]) { chemistry.masterEnableSurrogates(surr_setup); - if (MY_RANK > 0) { + if (MY_RANK > 0) + { chemistry.WorkerLoop(); - } else { + } + else + { // R.parseEvalQ("mysetup <- setup"); // // if (MY_RANK == 0) { // get timestep vector from // // grid_init function ... // @@ -637,7 +690,8 @@ int main(int argc, char *argv[]) { R["out_ext"] = run_params.out_ext; R["out_dir"] = run_params.out_dir; - if (run_params.use_ai_surrogate) { + if (run_params.use_ai_surrogate) + { /* Incorporate ai surrogate from R */ R.parseEvalQ(ai_surrogate_r_library); /* Use dht species for model input and output */ @@ -686,7 +740,8 @@ int main(int argc, char *argv[]) { MPI_Finalize(); - if (MY_RANK == 0) { + if (MY_RANK == 0) + { MSG("done, bye!"); } diff --git a/src/poet.hpp.in b/src/poet.hpp.in index c08199b6d..e9fb2acac 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -53,7 +53,7 @@ struct RuntimeParameters { bool print_progress = false; bool control_iteration_active = false; - std::uint32_t control_iteration = 25; + std::uint32_t control_iteration = 1; static constexpr std::uint32_t WORK_PACKAGE_SIZE_DEFAULT = 32; std::uint32_t work_package_size = WORK_PACKAGE_SIZE_DEFAULT; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8bc494158..ddd8ed94a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,7 +12,7 @@ get_filename_component(TEST_RInsideSourceFile "RInsidePOET_funcs.R" REALPATH) configure_file(testDataStructures.hpp.in testDataStructures.hpp) target_include_directories(testPOET PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") -add_custom_target(check +add_custom_target(check_poet COMMAND $ DEPENDS testPOET ) diff --git a/test/testStats.cpp b/test/testStats.cpp new file mode 100644 index 000000000..355d89b0c --- /dev/null +++ b/test/testStats.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include + +#include + +TEST_CASE("Stats calculation") +{ + std::vector real = + { + 2, 2, 2, 2, // species 1 + 2.0, 0.01, 0.7, 0.5, // species 2 + 0.0, 0.0, 0.0, 0.0, // species 3 + 0.0, 0.0, 0.0, 0.0, // species 4 + -2.5, -0.02, -0.7, -0.5, // species 5 + 7.7, 6.01, 4.7, 0.5 // species 6 + }; + + std::vector pred = + { + 2, 2, 2, 2, + 2.0, 0.02, 0.6, 0.5, + 0.1, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, + 2.5, 0.01, 0.6, 0.5, + 2.8, 0.02, 0.7, 0.5 + }; + + poet::ChemistryModule::error_stats stats(6, 5); + poet::ChemistryModule::computeStats(real, pred, /*size_per_prop*/ 4, /*species_count*/ 6, stats); + + SUBCASE("Non-zero values") + { + + // species 1 is ID, should stay 0 + CHECK_EQ(stats.mape[0], 0); + CHECK_EQ(stats.rrsme[0], 0); + + /* + mape species 2 + cell0: |(2.0 - 2.0)/2.0| = 0 + cell1: |(0.01 - 0.02)/0.01| = 1 + cell2: |(0.7 - 0.6)/0.7| = 0.142857143 + cell3: |(0.5 - 0.5)/0.5| = 0 + mape = 1.142857143/ 4 = 0.285714286 *100 + rrsme species 1 + squared err sum = 1.02040816 + rrsme = sqrt(1.02040816/4) = 0.50507627 + */ + + CHECK_EQ(stats.mape[1], doctest::Approx(28.5714286).epsilon(1e-6)); + CHECK_EQ(stats.rrsme[1], doctest::Approx(0.50507627).epsilon(1e-6)); + } + + SUBCASE("Zero-denominator case") + { + /* + mape species 3 + cell0: |(0.0 - 0.1)/0.0| + cell1: |(0.0 - 0.0)/0.0| + cell2: |(0.0 - 0.0)/0.0| + cell3: |(0.0 - 0.0)/0.0| + mape = 1 *100 + rrsme = 1 + */ + + CHECK_EQ(stats.mape[2], 100.0); + CHECK_EQ(stats.rrsme[2], 1.0); + } + + SUBCASE("True and predicted values are zero") + { + /* + mape species 4 + cell0: |(0.0 - 0.0)/0.0| + cell1: |(0.0 - 0.0)/0.0| + cell2: |(0.0 - 0.0)/0.0| + cell3: |(0.0 - 0.0)/0.0| + mape = 0.0 + rrsme = 0.0 + */ + + CHECK_EQ(stats.mape[3], 0.0); + CHECK_EQ(stats.rrsme[3], 0.0); + } + + SUBCASE("Negative values") + { + /* + mape species 5 + cell0: |(-2.5 - 2.5)/-2.5| = 2 + cell1: |(-0.02 - 0.01)/-0.02| = 1.5 + cell2: |(-0.7 - 0.6)/-0.7| = 1.85714286 + cell3: |(-0.5 - 0.5)/-0.5| = 2 + mape = (100.0 / 4) * 7.35714286 = 183.92857143 + rrsme = sqrt(13.6989796 / 4) = 1.85060663 + */ + + CHECK_EQ(stats.mape[4], doctest::Approx(183.92857143).epsilon(1e-6)); + CHECK_EQ(stats.rrsme[4], doctest::Approx(1.85060663).epsilon(1e-6)); + } + + SUBCASE("Large differences") + { + /* + mape species 6 + cell0: |(7.7 - 2.8)/7.7| = 0.63636364 + cell1: |(6.01 - 0.02)/6.01| = 0.99667221 + cell2: |(4.7 - 0.7)/4.7| = 0.85106383 + cell3: |(0.5 - 0.5)/0.5| = 0 + mape = (100.0 / 4) * 2.48409968 = 62.102492 + rrsme = sqrt(2,12262382 / 4) = 0.72846136 + */ + + CHECK_EQ(stats.mape[5], doctest::Approx(62.102492).epsilon(1e-6)); + CHECK_EQ(stats.rrsme[5], doctest::Approx(0.72846136).epsilon(1e-6)); + } +}