From 999ecac13965f3651c90015ed0a9a9daafb52dfc Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Tue, 25 Apr 2023 17:38:58 +0200 Subject: [PATCH 01/52] feat: introduce LookupKey and rounding schemes feat: implement data clustering using PHT feat: implement interpolation refactor: use named vector for DHT species definition and significant digits data: remove unusable input scripts data: move Phreeqc database to benchmark dir refactor: rename dolomite benchmark directory refactor: remove DHT prop type from input script --- CMake/POET_Scripts.cmake | 2 +- CMakeLists.txt | 1 - README.md | 2 +- R_lib/kin_r_library.R | 53 +-- app/poet.cpp | 90 ++-- bench/CMakeLists.txt | 2 +- bench/barite/CMakeLists.txt | 1 + bench/barite/barite.R | 16 +- bench/barite/barite_interp_eval.R | 151 +++++++ .../{dolo_diffu_inner => dolo}/CMakeLists.txt | 2 + bench/{dolo_diffu_inner => dolo}/Eval.R | 0 {data => bench/dolo}/dol.pqi | 0 .../dolo_diffu_inner.R | 33 +- .../dolo_diffu_inner_large.R | 22 +- .../{dolo_diffu_inner => dolo}/dolo_inner.pqi | 0 bench/dolo/dolo_interp_long.R | 171 ++++++++ {data => bench/dolo}/phreeqc_kin.dat | 6 +- data/CMakeLists.txt | 8 - data/SimDol1D_diffu.R | 180 -------- data/SimDol2D_diffu.R | 213 --------- data/SimDol2D_diffu.yaml | 13 - ext/phreeqcrm | 2 +- include/poet/ChemistryModule.hpp | 126 +++--- include/poet/DHT.h | 51 ++- include/poet/DHT_Types.hpp | 2 +- include/poet/DHT_Wrapper.hpp | 121 +++--- include/poet/DataStructures.hpp | 35 ++ include/poet/Interpolation.hpp | 266 ++++++++++++ include/poet/LookupKey.hpp | 50 +++ include/poet/RInsidePOET.hpp | 40 ++ include/poet/Rounding.hpp | 92 ++++ include/poet/SimParams.hpp | 72 ++-- src/CMakeLists.txt | 9 +- src/ChemistryModule/ChemistryModule.cpp | 406 +++++++++++++----- src/ChemistryModule/DHT_Wrapper.cpp | 226 ---------- src/ChemistryModule/MasterFunctions.cpp | 153 ++++++- .../SurrogateModels/CMakeLists.txt | 21 + .../{ => SurrogateModels}/DHT.c | 233 ++++++++-- .../SurrogateModels/DHT_Wrapper.cpp | 300 +++++++++++++ .../{ => SurrogateModels}/HashFunctions.cpp | 2 +- .../SurrogateModels/InterpolationModule.cpp | 197 +++++++++ .../SurrogateModels/ProximityHashTable.cpp | 275 ++++++++++++ src/ChemistryModule/WorkerFunctions.cpp | 293 ++++++++++--- src/SimParams.cpp | 115 ++--- test/testRounding.cpp | 79 ++++ util/data_evaluation/RFun_Eval.R | 95 +++- util/data_evaluation/interpret_keys.cpp | 13 + 47 files changed, 3003 insertions(+), 1237 deletions(-) create mode 100644 bench/barite/barite_interp_eval.R rename bench/{dolo_diffu_inner => dolo}/CMakeLists.txt (74%) rename bench/{dolo_diffu_inner => dolo}/Eval.R (100%) rename {data => bench/dolo}/dol.pqi (100%) rename bench/{dolo_diffu_inner => dolo}/dolo_diffu_inner.R (84%) rename bench/{dolo_diffu_inner => dolo}/dolo_diffu_inner_large.R (88%) rename bench/{dolo_diffu_inner => dolo}/dolo_inner.pqi (100%) create mode 100644 bench/dolo/dolo_interp_long.R rename {data => bench/dolo}/phreeqc_kin.dat (99%) delete mode 100644 data/CMakeLists.txt delete mode 100644 data/SimDol1D_diffu.R delete mode 100644 data/SimDol2D_diffu.R delete mode 100644 data/SimDol2D_diffu.yaml create mode 100644 include/poet/DataStructures.hpp create mode 100644 include/poet/Interpolation.hpp create mode 100644 include/poet/LookupKey.hpp create mode 100644 include/poet/Rounding.hpp delete mode 100644 src/ChemistryModule/DHT_Wrapper.cpp create mode 100644 src/ChemistryModule/SurrogateModels/CMakeLists.txt rename src/ChemistryModule/{ => SurrogateModels}/DHT.c (68%) create mode 100644 src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp rename src/ChemistryModule/{ => SurrogateModels}/HashFunctions.cpp (97%) create mode 100644 src/ChemistryModule/SurrogateModels/InterpolationModule.cpp create mode 100644 src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp create mode 100644 test/testRounding.cpp diff --git a/CMake/POET_Scripts.cmake b/CMake/POET_Scripts.cmake index 69760cc44..89fc1e84f 100644 --- a/CMake/POET_Scripts.cmake +++ b/CMake/POET_Scripts.cmake @@ -9,7 +9,7 @@ macro(get_POET_version) OUTPUT_VARIABLE POET_GIT_BRANCH OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process( - COMMAND ${GIT_EXECUTABLE} describe --always + COMMAND ${GIT_EXECUTABLE} describe --tags --always WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE POET_GIT_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb87f3a7a..9fd34afeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,6 @@ find_package(RRuntime REQUIRED) add_subdirectory(src) add_subdirectory(R_lib) -add_subdirectory(data) add_subdirectory(app) add_subdirectory(bench) diff --git a/README.md b/README.md index 4e29aea28..7ef495139 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # POET diff --git a/R_lib/kin_r_library.R b/R_lib/kin_r_library.R index e350ac35e..c759bf9e1 100644 --- a/R_lib/kin_r_library.R +++ b/R_lib/kin_r_library.R @@ -64,28 +64,33 @@ master_init <- function(setup) { ## calculated time step if store_result is TRUE and increments the ## iteration counter master_iteration_end <- function(setup) { - iter <- setup$iter - ## MDL Write on disk state_T and state_C after every iteration - ## comprised in setup$out_save - if (setup$store_result) { - if (iter %in% setup$out_save) { - nameout <- paste0(fileout, "/iter_", sprintf("%03d", iter), ".rds") - info <- list( - tr_req_dt = as.integer(setup$req_dt) - # tr_allow_dt = setup$allowed_dt, - # tr_inniter = as.integer(setup$inniter) - ) - saveRDS(list( - T = setup$state_T, C = setup$state_C, - simtime = as.integer(setup$simtime), - tr_info = info - ), file = nameout) - msgm("results stored in <", nameout, ">") + iter <- setup$iter + ## max digits for iterations + dgts <- as.integer(ceiling(log10(setup$iterations + 1))) + ## string format to use in sprintf + fmt <- paste0("%0", dgts, "d") + + ## Write on disk state_T and state_C after every iteration + ## comprised in setup$out_save + if (setup$store_result) { + if (iter %in% setup$out_save) { + nameout <- paste0(fileout, "/iter_", sprintf(fmt=fmt, iter), ".rds") + info <- list( + tr_req_dt = as.integer(setup$req_dt) + ## tr_allow_dt = setup$allowed_dt, + ## tr_inniter = as.integer(setup$inniter) + ) + saveRDS(list( + T = setup$state_T, C = setup$state_C, + simtime = as.integer(setup$simtime), + tr_info = info + ), file = nameout) + msgm("results stored in <", nameout, ">") + } } - } - msgm("done iteration", iter, "/", setup$maxiter) - setup$iter <- setup$iter + 1 - return(setup) + msgm("done iteration", iter, "/", setup$maxiter) + setup$iter <- setup$iter + 1 + return(setup) } ## function for the workers to compute chemistry through PHREEQC @@ -264,9 +269,9 @@ StoreSetup <- function(setup) { if (dht_enabled) { to_store$DHT <- list( enabled = dht_enabled, - log = dht_log, - signif = dht_final_signif, - proptype = dht_final_proptype + log = dht_log + #signif = dht_final_signif, + #proptype = dht_final_proptype ) } else { to_store$DHT <- FALSE diff --git a/app/poet.cpp b/app/poet.cpp index bc47c2c2f..69d1b470f 100644 --- a/app/poet.cpp +++ b/app/poet.cpp @@ -70,7 +70,7 @@ void writeFieldsToR(RInside &R, const Field &trans, const Field &chem) { void set_chem_parameters(poet::ChemistryModule &chem, uint32_t wp_size, const std::string &database_path) { chem.SetErrorHandlerMode(1); - chem.SetComponentH2O(true); + chem.SetComponentH2O(false); chem.SetRebalanceFraction(0.5); chem.SetRebalanceByCell(true); chem.UseSolutionDensityVolume(false); @@ -112,7 +112,6 @@ void set_chem_parameters(poet::ChemistryModule &chem, uint32_t wp_size, } inline double RunMasterLoop(SimParams ¶ms, RInside &R, - ChemistryParams &chem_params, const GridParams &g_params, uint32_t nxyz_master) { DiffusionParams d_params{R}; @@ -123,10 +122,11 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, double sim_time = .0; - ChemistryModule chem(nxyz_master, params.getNumParams().wp_size, - MPI_COMM_WORLD); - set_chem_parameters(chem, nxyz_master, chem_params.database_path); - chem.RunInitFile(chem_params.input_script); + ChemistryModule chem(nxyz_master, params.getNumParams().wp_size, maxiter, + params.getChemParams(), MPI_COMM_WORLD); + + set_chem_parameters(chem, nxyz_master, params.getChemParams().database_path); + chem.RunInitFile(params.getChemParams().input_script); poet::ChemistryModule::SingleCMap init_df = DFToHashMap(d_params.initial_t); chem.initializeField(diffusion.getField()); @@ -135,27 +135,11 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, chem.setProgressBarPrintout(true); } - if (params.getNumParams().dht_enabled) { - chem.SetDHTEnabled(true, params.getNumParams().dht_size_per_process, - chem_params.dht_species); - if (!chem_params.dht_signif.empty()) { - chem.SetDHTSignifVector(chem_params.dht_signif); - } - if (!params.getDHTPropTypeVector().empty()) { - chem.SetDHTPropTypeVector(params.getDHTPropTypeVector()); - } - if (!params.getDHTFile().empty()) { - chem.ReadDHTFile(params.getDHTFile()); - } - if (params.getNumParams().dht_snaps > 0) { - chem.SetDHTSnaps(params.getNumParams().dht_snaps, params.getOutDir()); - } - } - /* SIMULATION LOOP */ - double dStartTime = MPI_Wtime(); + double dSimTime{0}; for (uint32_t iter = 1; iter < maxiter + 1; iter++) { + double start_t = MPI_Wtime(); uint32_t tick = 0; // cout << "CPP: Evaluating next time step" << endl; // R.parseEvalQ("mysetup <- master_iteration_setup(mysetup)"); @@ -203,7 +187,8 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, << endl; // MPI_Barrier(MPI_COMM_WORLD); - + double end_t = MPI_Wtime(); + dSimTime += end_t - start_t; } // END SIMULATION LOOP R.parseEvalQ("profiling <- list()"); @@ -232,23 +217,36 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, // R["phreeqc_count"] = phreeqc_counts; // R.parseEvalQ("profiling$phreeqc_count <- phreeqc_count"); - if (params.getNumParams().dht_enabled) { + if (params.getChemParams().use_dht) { R["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); R.parseEvalQ("profiling$dht_hits <- dht_hits"); - R["dht_miss"] = Rcpp::wrap(chem.GetWorkerDHTMiss()); - R.parseEvalQ("profiling$dht_miss <- dht_miss"); R["dht_evictions"] = Rcpp::wrap(chem.GetWorkerDHTEvictions()); R.parseEvalQ("profiling$dht_evictions <- dht_evictions"); R["dht_get_time"] = Rcpp::wrap(chem.GetWorkerDHTGetTimings()); R.parseEvalQ("profiling$dht_get_time <- dht_get_time"); R["dht_fill_time"] = Rcpp::wrap(chem.GetWorkerDHTFillTimings()); R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time"); + if (params.getChemParams().use_interp) { + R["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); + R.parseEvalQ("profiling$interp_write <- interp_w"); + R["interp_r"] = Rcpp::wrap(chem.GetWorkerInterpolationReadTimings()); + R.parseEvalQ("profiling$interp_read <- interp_r"); + R["interp_g"] = Rcpp::wrap(chem.GetWorkerInterpolationGatherTimings()); + R.parseEvalQ("profiling$interp_gather <- interp_g"); + R["interp_fc"] = + Rcpp::wrap(chem.GetWorkerInterpolationFunctionCallTimings()); + R.parseEvalQ("profiling$interp_function_calls <- interp_fc"); + R["interp_calls"] = Rcpp::wrap(chem.GetWorkerInterpolationCalls()); + R.parseEvalQ("profiling$interp_calls <- interp_calls"); + R["interp_cached"] = Rcpp::wrap(chem.GetWorkerPHTCacheHits()); + R.parseEvalQ("profiling$interp_cached <- interp_cached"); + } } chem.MasterLoopBreak(); diffusion.end(); - return MPI_Wtime() - dStartTime; + return dSimTime; } int main(int argc, char *argv[]) { @@ -268,19 +266,23 @@ int main(int argc, char *argv[]) { if (world_rank > 0) { { - uint32_t c_size; - MPI_Bcast(&c_size, 1, MPI_UINT32_T, 0, MPI_COMM_WORLD); - char *buffer = new char[c_size + 1]; - MPI_Bcast(buffer, c_size, MPI_CHAR, 0, MPI_COMM_WORLD); - buffer[c_size] = '\0'; + SimParams params(world_rank, world_size); + int pret = params.parseFromCmdl(argv, RInsidePOET::getInstance()); + + if (pret == poet::PARSER_ERROR) { + MPI_Finalize(); + return EXIT_FAILURE; + } else if (pret == poet::PARSER_HELP) { + MPI_Finalize(); + return EXIT_SUCCESS; + } // ChemistryModule worker(nxyz, nxyz, MPI_COMM_WORLD); - ChemistryModule worker = - poet::ChemistryModule::createWorker(MPI_COMM_WORLD); - set_chem_parameters(worker, worker.GetWPSize(), std::string(buffer)); - - delete[] buffer; + ChemistryModule worker = poet::ChemistryModule::createWorker( + MPI_COMM_WORLD, params.getChemParams()); + set_chem_parameters(worker, worker.GetWPSize(), + params.getChemParams().database_path); worker.WorkerLoop(); } @@ -334,17 +336,9 @@ int main(int argc, char *argv[]) { // MPI_Barrier(MPI_COMM_WORLD); - poet::ChemistryParams chem_params(R); - - /* THIS IS EXECUTED BY THE MASTER */ - std::string db_path = chem_params.database_path; - uint32_t c_size = db_path.size(); - MPI_Bcast(&c_size, 1, MPI_UINT32_T, 0, MPI_COMM_WORLD); - - MPI_Bcast(db_path.data(), c_size, MPI_CHAR, 0, MPI_COMM_WORLD); uint32_t nxyz_master = (world_size == 1 ? g_params.total_n : 1); - dSimTime = RunMasterLoop(params, R, chem_params, g_params, nxyz_master); + dSimTime = RunMasterLoop(params, R, g_params, nxyz_master); cout << "CPP: finished simulation loop" << endl; diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index a891897c0..6b5e9b9f3 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -1,3 +1,3 @@ -add_subdirectory(dolo_diffu_inner) +add_subdirectory(dolo) add_subdirectory(surfex) add_subdirectory(barite) diff --git a/bench/barite/CMakeLists.txt b/bench/barite/CMakeLists.txt index 29982a1e0..6c8b63aee 100644 --- a/bench/barite/CMakeLists.txt +++ b/bench/barite/CMakeLists.txt @@ -1,5 +1,6 @@ install(FILES barite.R + barite_interp_eval.R barite.pqi db_barite.dat DESTINATION diff --git a/bench/barite/barite.R b/bench/barite/barite.R index dad3b79ea..d6941d3ed 100644 --- a/bench/barite/barite.R +++ b/bench/barite/barite.R @@ -115,16 +115,20 @@ diffusion <- list( ## # Needed when using DHT - -dht_species <- names(init_diffu) -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5) +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "Ba" = 5, + "Cl" = 5, + "S(6)" = 5, + "Sr" = 5 +) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species, - dht_signif = dht_signif + dht_species = dht_species ) ################################################################# diff --git a/bench/barite/barite_interp_eval.R b/bench/barite/barite_interp_eval.R new file mode 100644 index 000000000..fcbe4884b --- /dev/null +++ b/bench/barite/barite_interp_eval.R @@ -0,0 +1,151 @@ +## Time-stamp: "Last modified 2023-07-21 15:04:49 mluebke" + +database <- normalizePath("../share/poet/bench/barite/db_barite.dat") +input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 400 +m <- 200 + +types <- c("scratch", "phreeqc", "rds") + +init_cell <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.217E-09, + "Ba" = 1.E-10, + "Cl" = 2.E-10, + "S" = 6.205E-4, + "Sr" = 6.205E-4, + "Barite" = 0.001, + "Celestite" = 1 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(n / 10, m / 10), + type = types[1], + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions + +init_diffu <- list( + # "H" = 110.0124, + "H" = 0.00000028904, + # "O" = 55.5087, + "O" = 0.000000165205, + # "Charge" = -1.217E-09, + "Charge" = -3.337E-08, + "Ba" = 1.E-10, + "Cl" = 1.E-10, + "S(6)" = 6.205E-4, + "Sr" = 6.205E-4 +) + +injection_diff <- list( + list( + # "H" = 111.0124, + "H" = 0.0000002890408, + # "O" = 55.50622, + "O" = 0.00002014464, + # "Charge" = -3.337E-08, + "Charge" = -3.337000004885E-08, + "Ba" = 0.1, + "Cl" = 0.2, + "S(6)" = 0, + "Sr" = 0 + ) +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-06, + "O" = 1E-06, + "Charge" = 1E-06, + "Ba" = 1E-06, + "Cl" = 1E-06, + "S(6)" = 1E-06, + "Sr" = 1E-06 +) + +vecinj_inner <- list( + l1 = c(1, floor(n / 2), floor(m / 2)) + ## l2 = c(2,80,80), + ## l3 = c(2,60,80) +) + +boundary <- list( + # "N" = rep(1, n), + "N" = rep(0, n), + "E" = rep(0, n), + "S" = rep(0, n), + "W" = rep(0, n) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, injection_diff) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_inner = vecinj_inner, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + +## # Needed when using DHT +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "Ba" = 5, + "Cl" = 5, + "S(6)" = 5, + "Sr" = 5 +) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# + + +iterations <- 200 +dt <- 250 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = seq(1, iterations) +) diff --git a/bench/dolo_diffu_inner/CMakeLists.txt b/bench/dolo/CMakeLists.txt similarity index 74% rename from bench/dolo_diffu_inner/CMakeLists.txt rename to bench/dolo/CMakeLists.txt index cde190af0..d2c0acd1e 100644 --- a/bench/dolo_diffu_inner/CMakeLists.txt +++ b/bench/dolo/CMakeLists.txt @@ -2,6 +2,8 @@ install(FILES dolo_diffu_inner.R dolo_diffu_inner_large.R dolo_inner.pqi + dolo_interp_long.R + phreeqc_kin.dat DESTINATION share/poet/bench/dolo ) diff --git a/bench/dolo_diffu_inner/Eval.R b/bench/dolo/Eval.R similarity index 100% rename from bench/dolo_diffu_inner/Eval.R rename to bench/dolo/Eval.R diff --git a/data/dol.pqi b/bench/dolo/dol.pqi similarity index 100% rename from data/dol.pqi rename to bench/dolo/dol.pqi diff --git a/bench/dolo_diffu_inner/dolo_diffu_inner.R b/bench/dolo/dolo_diffu_inner.R similarity index 84% rename from bench/dolo_diffu_inner/dolo_diffu_inner.R rename to bench/dolo/dolo_diffu_inner.R index f243889d4..f981ed5c4 100644 --- a/bench/dolo_diffu_inner/dolo_diffu_inner.R +++ b/bench/dolo/dolo_diffu_inner.R @@ -1,6 +1,6 @@ -## Time-stamp: "Last modified 2023-04-24 15:12:02 mluebke" +## Time-stamp: "Last modified 2023-07-28 16:57:40 mluebke" -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") ################################################################# @@ -44,8 +44,8 @@ grid <- list( ## initial conditions init_diffu <- list( - "H" = 0.000211313883539788, - "O" = 0.00398302904424952, + "H" = 110.683, + "O" = 55.3413, "Charge" = -5.0822e-19, "C(4)" = 1.2279E-4, "Ca" = 1.2279E-4, @@ -67,8 +67,8 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( list( - "H" = 0.0001540445, - "O" = 0.002148006, + "H" = 110.683, + "O" = 55.3413, "Charge" = 1.90431e-16, "C(4)" = 0, "Ca" = 0, @@ -76,8 +76,8 @@ vecinj_diffu <- list( "Mg" = 0.001 ), list( - "H" = 0.0001610193, - "O" = 0.002386934, + "H" = 110.683, + "O" = 55.3413, "Charge" = 1.90431e-16, "C(4)" = 0, "Ca" = 0.0, @@ -120,16 +120,21 @@ diffusion <- list( ## # Needed when using DHT -dht_species <- c("H", "O", "Charge", "C(4)", "Ca", "Cl", "Mg", "Calcite", - "Dolomite") -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5, 5, 5) +dht_species <- c("H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" =5 + ) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species, - dht_signif = dht_signif + dht_species = dht_species ) ################################################################# diff --git a/bench/dolo_diffu_inner/dolo_diffu_inner_large.R b/bench/dolo/dolo_diffu_inner_large.R similarity index 88% rename from bench/dolo_diffu_inner/dolo_diffu_inner_large.R rename to bench/dolo/dolo_diffu_inner_large.R index 69b528ce3..5dfb95677 100644 --- a/bench/dolo_diffu_inner/dolo_diffu_inner_large.R +++ b/bench/dolo/dolo_diffu_inner_large.R @@ -1,6 +1,6 @@ -## Time-stamp: "Last modified 2023-04-24 15:43:26 mluebke" +## Time-stamp: "Last modified 2023-07-28 16:57:40 mluebke" -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") ################################################################# @@ -118,18 +118,22 @@ diffusion <- list( ## Chemistry module (Phreeqc) ## ################################################################# - ## # Needed when using DHT -dht_species <- c("H", "O", "Charge", "C(4)", "Ca", "Cl", "Mg", "Calcite", - "Dolomite") -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5, 5, 5) +dht_species <- c("H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" =5 + ) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species, - dht_signif = dht_signif + dht_species = dht_species ) ################################################################# diff --git a/bench/dolo_diffu_inner/dolo_inner.pqi b/bench/dolo/dolo_inner.pqi similarity index 100% rename from bench/dolo_diffu_inner/dolo_inner.pqi rename to bench/dolo/dolo_inner.pqi diff --git a/bench/dolo/dolo_interp_long.R b/bench/dolo/dolo_interp_long.R new file mode 100644 index 000000000..b333891ea --- /dev/null +++ b/bench/dolo/dolo_interp_long.R @@ -0,0 +1,171 @@ +## Time-stamp: "Last modified 2023-08-01 18:34:47 mluebke" + +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") +input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 400 +m <- 200 + +types <- c("scratch", "phreeqc", "rds") + +init_cell <- list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = -5.0822e-19, + "C" = 1.2279E-4, + "Ca" = 1.2279E-4, + "Cl" = 0, + "Mg" = 0, + "O2g" = 0.499957, + "Calcite" = 2.07e-4, + "Dolomite" = 0 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(5, 2.5), + type = types[1], + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions +init_diffu <- list( + "H" = 1.110124E+02, + "O" = 5.550833E+01, + "Charge" = -1.216307659761E-09, + "C(4)" = 1.230067028174E-04, + "Ca" = 1.230067028174E-04, + "Cl" = 0, + "Mg" = 0 +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-6, + "O" = 1E-6, + "Charge" = 1E-6, + "C(4)" = 1E-6, + "Ca" = 1E-6, + "Cl" = 1E-6, + "Mg" = 1E-6 +) + +## list of boundary conditions/inner nodes +vecinj_diffu <- list( + list( + "H" = 1.110124E+02, + "O" = 5.550796E+01, + "Charge" = -3.230390327801E-08, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ), + init_diffu +) + +vecinj_inner <- list( + l1 = c(1, floor(n / 2), floor(m / 2)) + # l2 = c(2,1400,800), + # l3 = c(2,1600,800) +) + +boundary <- list( + # "N" = c(1, rep(0, n-1)), + "N" = rep(3, n), + "E" = rep(3, m), + "S" = rep(3, n), + "W" = rep(3, m) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, vecinj_diffu) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_inner = vecinj_inner, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + + +## # optional when using DHT +dht_species <- c( + "H" = 3, + "O" = 3, + "Charge" = 3, + "C(4)" = 6, + "Ca" = 6, + "Cl" = 3, + "Mg" = 5, + "Calcite" = 4, + "Dolomite" = 4 +) + +## # Optional when using Interpolation (example with less key species and custom +## # significant digits) + +#pht_species <- c( +# "C(4)" = 3, +# "Ca" = 3, +# "Mg" = 2, +# "Calcite" = 2, +# "Dolomite" = 2 +#) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species +# pht_species = pht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# + + +iterations <- 20000 +dt <- 200 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = c(1, seq(50, iterations, by = 50)) +) diff --git a/data/phreeqc_kin.dat b/bench/dolo/phreeqc_kin.dat similarity index 99% rename from data/phreeqc_kin.dat rename to bench/dolo/phreeqc_kin.dat index 8c1b671aa..01d5bf516 100644 --- a/data/phreeqc_kin.dat +++ b/bench/dolo/phreeqc_kin.dat @@ -2,7 +2,7 @@ ### SURFACE and with the RATES for Calcite and Dolomite to use with ### RedModRphree -### Time-stamp: "Last modified 2018-05-06 14:36:23 delucia" +### Time-stamp: "Last modified 2023-05-23 10:35:56 mluebke" # PHREEQC.DAT for calculating pressure dependence of reactions, with # molal volumina of aqueous species and of minerals, and @@ -1276,9 +1276,9 @@ Calcite 110 logK25=-5.81 120 mech_c=(10^logK25)*(e^(-Ea/R*deltaT)) 130 rate=mech_a+mech_c - ## 140 IF SI("Calcite")<0 then moles=parm(1)*rate*(1-SR("Calcite")) # dissolution + 140 IF (SI("Calcite")<0 AND M>0) then moles=parm(1)*rate*(1-SR("Calcite")) # dissolution ## 145 IF SI("Calcite")>0 then moles=parm(1)*M*rate*(-1+SR("Calcite")) # precipitation - 150 moles=parm(1)*rate*(1-SR("Calcite")) # precipitation + ## 150 moles=parm(1)*rate*(1-SR("Calcite")) # precipitation 200 save moles*time -end diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt deleted file mode 100644 index 664e4ac45..000000000 --- a/data/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -install( -FILES - SimDol2D_diffu.R - SimDol1D_diffu.R - phreeqc_kin.dat - dol.pqi -DESTINATION - share/poet/examples) diff --git a/data/SimDol1D_diffu.R b/data/SimDol1D_diffu.R deleted file mode 100644 index 6e74e2ccf..000000000 --- a/data/SimDol1D_diffu.R +++ /dev/null @@ -1,180 +0,0 @@ -################################################################# -## Section 1 ## -## Grid initialization ## -################################################################# - -n <- 5 -m <- 5 - -types <- c("scratch", "phreeqc", "rds") - -# initsim <- c("SELECTED_OUTPUT", "-high_precision true", -# "-reset false", -# "USER_PUNCH", -# "-head C Ca Cl Mg pH pe O2g Calcite Dolomite", -# "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")", -# "SOLUTION 1", -# "units mol/kgw", -# "temp 25.0", "water 1", -# "pH 9.91 charge", -# "pe 4.0", -# "C 1.2279E-04", -# "Ca 1.2279E-04", -# "Cl 1E-12", -# "Mg 1E-12", -# "PURE 1", -# "O2g -0.6788 10.0", -# "Calcite 0.0 2.07E-3", -# "Dolomite 0.0 0.0", -# "END") - -# needed if init type is set to "scratch" -# prop <- c("C", "Ca", "Cl", "Mg", "pH", "pe", "O2g", "Calcite", "Dolomite") - -init_cell <- list( - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "pH" = 9.91, - "pe" = 4, - "O2g" = 10, - "Calcite" = 2.07e-4, - "Dolomite" = 0 -) - -grid <- list( - n_cells = n, - s_cells = n, - type = types[1], - init_cell = as.data.frame(init_cell), - props = names(init_cell), - init_script = NULL -) - - -################################################################## -## Section 2 ## -## Diffusion parameters and boundary conditions ## -################################################################## - -init_diffu <- c( - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "pe" = 4, - "pH" = 7 -) - -alpha_diffu <- c( - "C" = 1E-4, - "Ca" = 1E-4, - "Cl" = 1E-4, - "Mg" = 1E-4, - "pe" = 1E-4, - "pH" = 1E-4 -) - -vecinj_diffu <- list( - list( - "C" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001, - "pe" = 4, - "pH" = 9.91 - ) -) - -boundary <- list( - "E" = 0, - "W" = 1 -) - -diffu_list <- names(alpha_diffu) - -diffusion <- list( - init = init_diffu, - vecinj = do.call(rbind.data.frame, vecinj_diffu), - vecinj_index = boundary, - alpha = alpha_diffu -) - -################################################################## -## Section 3 ## -## Phreeqc simulation ## -################################################################## - -db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", - package = "RedModRphree" -), is.db = TRUE) - -phreeqc::phrLoadDatabaseString(db) - -# NOTE: This won't be needed in the future either. Could also be done in a. pqi -# file -base <- c( - "SOLUTION 1", - "units mol/kgw", - "temp 25.0", - "water 1", - "pH 9.91 charge", - "pe 4.0", - "C 1.2279E-04", - "Ca 1.2279E-04", - "Mg 0.001", - "Cl 0.002", - "PURE 1", - "O2g -0.1675 10", - "KINETICS 1", - "-steps 100", - "-step_divide 100", - "-bad_step_max 2000", - "Calcite", "-m 0.000207", - "-parms 0.0032", - "Dolomite", - "-m 0.0", - "-parms 0.00032", - "END" -) - -selout <- c( - "SELECTED_OUTPUT", "-high_precision true", "-reset false", - "-time", "-soln", "-temperature true", "-water true", - "-pH", "-pe", "-totals C Ca Cl Mg", - "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g" -) - -# Needed when using DHT -signif_vector <- c(7, 7, 7, 7, 7, 7, 7, 5, 5) -prop_type <- c("act", "act", "act", "act", "logact", "logact", "ignore", "act", "act") -prop <- names(init_cell) - -iterations <- 500 - -setup <- list( - # bound = myboundmat, - base = base, - first = selout, - # initsim = initsim, - # Cf = 1, - grid = grid, - diffusion = diffusion, - prop = prop, - immobile = c(7, 8, 9), - kin = c(8, 9), - ann = list(O2g = -0.1675), - # phase = "FLUX1", - # density = "DEN1", - reduce = FALSE, - # snapshots = demodir, ## directory where we will read MUFITS SUM files - # gridfile = paste0(demodir, "/d2ascii.run.GRID.SUM") - # init = init, - # vecinj = vecinj, - # cinj = c(0,1), - # boundary = boundary, - # injections = FALSE, - iterations = iterations, - timesteps = rep(10, iterations) -) diff --git a/data/SimDol2D_diffu.R b/data/SimDol2D_diffu.R deleted file mode 100644 index df965bcc6..000000000 --- a/data/SimDol2D_diffu.R +++ /dev/null @@ -1,213 +0,0 @@ -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") -input_script <- normalizePath("../share/poet/examples/dol.pqi") - -################################################################# -## Section 1 ## -## Grid initialization ## -################################################################# - -n <- 100 -m <- 100 - -types <- c("scratch", "phreeqc", "rds") - -# initsim <- c("SELECTED_OUTPUT", "-high_precision true", -# "-reset false", -# "USER_PUNCH", -# "-head C Ca Cl Mg pH pe O2g Calcite Dolomite", -# "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")", -# "SOLUTION 1", -# "units mol/kgw", -# "temp 25.0", "water 1", -# "pH 9.91 charge", -# "pe 4.0", -# "C 1.2279E-04", -# "Ca 1.2279E-04", -# "Cl 1E-12", -# "Mg 1E-12", -# "PURE 1", -# "O2g -0.6788 10.0", -# "Calcite 0.0 2.07E-3", -# "Dolomite 0.0 0.0", -# "END") - -# needed if init type is set to "scratch" -# prop <- c("C", "Ca", "Cl", "Mg", "pH", "pe", "O2g", "Calcite", "Dolomite") - -init_cell <- list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = -5.0822e-19, - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "O2g" = 0.499957, - "Calcite" = 2.07e-4, - "Dolomite" = 0 -) - -grid <- list( - n_cells = c(n, m), - s_cells = c(n, m), - type = types[1], - init_cell = as.data.frame(init_cell), - props = names(init_cell), - database = database, - input_script = input_script -) - - -################################################################## -## Section 2 ## -## Diffusion parameters and boundary conditions ## -################################################################## - -init_diffu <- c( - "H" = 110.683, - "O" = 55.3413, - "Charge" = -5.0822e-19, - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0 -) - -alpha_diffu <- c( - "H" = 1E-4, - "O" = 1E-4, - "Charge" = 1E-4, - "C" = 1E-4, - "Ca" = 1E-4, - "Cl" = 1E-4, - "Mg" = 1E-4 -) - -vecinj_diffu <- list( - list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = 1.90431e-16, - "C" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ) -) - -#inner_index <- c(5, 15, 25) -#inner_vecinj_index <- rep(1, 3) -# -#vecinj_inner <- cbind(inner_index, inner_vecinj_index) -vecinj_inner <- list( - l1 = c(1,2,2) -) - - -boundary <- list( - "N" = rep(0, n), - "E" = rep(0, n), - "S" = rep(0, n), - "W" = rep(0, n) -) - -diffu_list <- names(alpha_diffu) - -diffusion <- list( - init = init_diffu, - vecinj = do.call(rbind.data.frame, vecinj_diffu), - vecinj_inner = vecinj_inner, - vecinj_index = boundary, - alpha = alpha_diffu -) - -################################################################# -## Section 3 ## -## Chemitry module (Phreeqc) ## -################################################################# - -# db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", -# package = "RedModRphree" -# ), is.db = TRUE) -# -# phreeqc::phrLoadDatabaseString(db) - -# NOTE: This won't be needed in the future either. Could also be done in a. pqi -# file -base <- c( - "SOLUTION 1", - "units mol/kgw", - "temp 25.0", - "water 1", - "pH 9.91 charge", - "pe 4.0", - "C 1.2279E-04", - "Ca 1.2279E-04", - "Mg 0.001", - "Cl 0.002", - "PURE 1", - "O2g -0.1675 10", - "KINETICS 1", - "-steps 100", - "-step_divide 100", - "-bad_step_max 2000", - "Calcite", "-m 0.000207", - "-parms 0.0032", - "Dolomite", - "-m 0.0", - "-parms 0.00032", - "END" -) - -selout <- c( - "SELECTED_OUTPUT", "-high_precision true", "-reset false", - "-time", "-soln", "-temperature true", "-water true", - "-pH", "-pe", "-totals C Ca Cl Mg", - "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g" -) - - -# Needed when using DHT -signif_vector <- c(10, 10, 10, 7, 7, 7, 7, 0, 5, 5) -prop_type <- c("", "", "", "act", "act", "act", "act", "ignore", "", "") -prop <- names(init_cell) - -chemistry <- list( - database = database, - input_script = input_script -) - -################################################################# -## Section 4 ## -## Putting all those things together ## -################################################################# - - -iterations <- 10 - -setup <- list( - # bound = myboundmat, - base = base, - first = selout, - # initsim = initsim, - # Cf = 1, - grid = grid, - diffusion = diffusion, - chemistry = chemistry, - prop = prop, - immobile = c(7, 8, 9), - kin = c(8, 9), - ann = list(O2g = -0.1675), - # phase = "FLUX1", - # density = "DEN1", - reduce = FALSE, - # snapshots = demodir, ## directory where we will read MUFITS SUM files - # gridfile = paste0(demodir, "/d2ascii.run.GRID.SUM") - # init = init, - # vecinj = vecinj, - # cinj = c(0,1), - # boundary = boundary, - # injections = FALSE, - iterations = iterations, - timesteps = rep(10, iterations) -) diff --git a/data/SimDol2D_diffu.yaml b/data/SimDol2D_diffu.yaml deleted file mode 100644 index 84c04dd29..000000000 --- a/data/SimDol2D_diffu.yaml +++ /dev/null @@ -1,13 +0,0 @@ -phreeqc: - RebalanceByCell: True - RebalanceFraction: 0.5 - SolutionDensityVolume: False - PartitionUZSolids: False - Units: - Solution: "mg/L" - PPassamblage: "mol/L" - Exchange: "mol/L" - Surface: "mol/L" - GasPhase: "mol/L" - SSassemblage: "mol/L" - Kinetics: "mol/L" diff --git a/ext/phreeqcrm b/ext/phreeqcrm index ba5dc40af..89f713b27 160000 --- a/ext/phreeqcrm +++ b/ext/phreeqcrm @@ -1 +1 @@ -Subproject commit ba5dc40af119da015d36d2554ecd558ef9da1eb6 +Subproject commit 89f713b273cd5340b2e8169523da04c2d7ad89c9 diff --git a/include/poet/ChemistryModule.hpp b/include/poet/ChemistryModule.hpp index 2db3cd17e..16dc1f5f9 100644 --- a/include/poet/ChemistryModule.hpp +++ b/include/poet/ChemistryModule.hpp @@ -1,20 +1,23 @@ -// Time-stamp: "Last modified 2023-07-21 12:35:23 mluebke" +// Time-stamp: "Last modified 2023-08-01 13:13:18 mluebke" #ifndef CHEMISTRYMODULE_H_ #define CHEMISTRYMODULE_H_ +#include "DHT_Wrapper.hpp" #include "Field.hpp" +#include "Interpolation.hpp" #include "IrmResult.h" #include "PhreeqcRM.h" -#include "poet/DHT_Wrapper.hpp" +#include "SimParams.hpp" +#include "DataStructures.hpp" #include #include #include #include +#include #include #include -#include #include namespace poet { @@ -39,7 +42,8 @@ public: * \param wp_size Count of grid cells to fill each work package at maximum. * \param communicator MPI communicator to distribute work in. */ - ChemistryModule(uint32_t nxyz, uint32_t wp_size, MPI_Comm communicator); + ChemistryModule(uint32_t nxyz, uint32_t wp_size, std::uint32_t maxiter, + const ChemistryParams &chem_param, MPI_Comm communicator); /** * Deconstructor, which frees DHT data structure if used. @@ -86,7 +90,8 @@ public: * * \returns A worker instance with fixed work package size. */ - static ChemistryModule createWorker(MPI_Comm communicator); + static ChemistryModule createWorker(MPI_Comm communicator, + const ChemistryParams &chem_param); /** * Default work package size. @@ -109,9 +114,9 @@ public: * Enumerating DHT file options */ enum { - DHT_FILES_DISABLED = 0, //!< disabled file output - DHT_FILES_SIMEND, //!< only output of snapshot after simulation - DHT_FILES_ITEREND //!< output snapshots after each iteration + DHT_SNAPS_DISABLED = 0, //!< disabled file output + DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation + DHT_SNAPS_ITEREND //!< output snapshots after each iteration }; /** @@ -135,47 +140,6 @@ public: */ void MasterLoopBreak(); - /** - * **Master only** Enables DHT usage for the workers. - * - * If called multiple times enabling the DHT, the current state of DHT will be - * overwritten with a new instance of the DHT. - * - * \param enable Enables or disables the usage of the DHT. - * \param size_mb Size in megabyte to allocate for the DHT if enabled. - * \param key_species Name of species to be used for key creation. - */ - void SetDHTEnabled(bool enable, uint32_t size_mb, - const std::vector &key_species); - /** - * **Master only** Set DHT snapshots to specific mode. - * - * \param type DHT snapshot mode. - * \param out_dir Path to output DHT snapshots. - */ - void SetDHTSnaps(int type, const std::string &out_dir); - /** - * **Master only** Set the vector with significant digits to round before - * inserting into DHT. - * - * \param signif_vec Vector defining significant digit for each species. Order - * is defined by prop_type vector (ChemistryModule::GetPropNames). - */ - void SetDHTSignifVector(std::vector &signif_vec); - /** - * **Master only** Set the DHT rounding type of each species. See - * DHT_PROP_TYPES enumemartion for explanation. - * - * \param proptype_vec Vector defining DHT prop type for each species. - */ - void SetDHTPropTypeVector(std::vector proptype_vec); - /** - * **Master only** Load the state of the DHT from given file. - * - * \param input_file File to load data from. - */ - void ReadDHTFile(const std::string &input_file); - /** * **Master only** Return count of grid cells. */ @@ -237,13 +201,6 @@ public: */ std::vector GetWorkerDHTHits() const; - /** - * **Master only** Collect and return DHT misses of all workers. - * - * \return Vector of all count of DHT misses. - */ - std::vector GetWorkerDHTMiss() const; - /** * **Master only** Collect and return DHT evictions of all workers. * @@ -263,9 +220,29 @@ public: * * \param enabled True if print progressbar, false if not. */ - void setProgressBarPrintout(bool enabled); + void setProgressBarPrintout(bool enabled) { + this->print_progessbar = enabled; + }; + + std::vector GetWorkerInterpolationCalls() const; + + std::vector GetWorkerInterpolationWriteTimings() const; + std::vector GetWorkerInterpolationReadTimings() const; + std::vector GetWorkerInterpolationGatherTimings() const; + std::vector GetWorkerInterpolationFunctionCallTimings() const; + + std::vector GetWorkerPHTCacheHits() const; protected: + void initializeDHT(uint32_t size_mb, + const NamedVector &key_species); + void setDHTSnapshots(int type, const std::string &out_dir); + void setDHTReadFile(const std::string &input_file); + + void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, + std::uint32_t min_entries, + const NamedVector &key_species); + enum { CHEM_INIT, CHEM_WP_SIZE, @@ -275,9 +252,11 @@ protected: CHEM_DHT_PROP_TYPE_VEC, CHEM_DHT_SNAPS, CHEM_DHT_READ_FILE, + CHEM_IP_ENABLE, + CHEM_IP_MIN_ENTRIES, + CHEM_IP_SIGNIF_VEC, CHEM_WORK_LOOP, CHEM_PERF, - CHEM_PROGRESSBAR, CHEM_BREAK_MAIN_LOOP }; @@ -288,11 +267,20 @@ protected: WORKER_DHT_GET, WORKER_DHT_FILL, WORKER_IDLE, + WORKER_IP_WRITE, + WORKER_IP_READ, + WORKER_IP_GATHER, + WORKER_IP_FC, WORKER_DHT_HITS, - WORKER_DHT_MISS, - WORKER_DHT_EVICTIONS + WORKER_DHT_EVICTIONS, + WORKER_PHT_CACHE_HITS, + WORKER_IP_CALLS }; + std::vector interp_calls; + std::vector dht_hits; + std::vector dht_evictions; + struct worker_s { double phreeqc_t = 0.; double dht_get = 0.; @@ -347,8 +335,11 @@ protected: void unshuffleField(const std::vector &in_buffer, uint32_t size_per_prop, uint32_t prop_count, uint32_t wp_count, std::vector &out_field); - std::vector - parseDHTSpeciesVec(const std::vector &species_vec) const; + std::vector + parseDHTSpeciesVec(const NamedVector &key_species, + const std::vector &to_compare) const; + + void BCastStringVec(std::vector &io); int comm_size, comm_rank; MPI_Comm group_comm; @@ -358,11 +349,14 @@ protected: uint32_t wp_size{CHEM_DEFAULT_WP_SIZE}; bool dht_enabled{false}; - int dht_snaps_type{DHT_FILES_DISABLED}; + int dht_snaps_type{DHT_SNAPS_DISABLED}; std::string dht_file_out_dir; poet::DHT_Wrapper *dht = nullptr; + bool interp_enabled{false}; + std::unique_ptr interp; + static constexpr uint32_t BUFFER_OFFSET = 5; inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { @@ -381,7 +375,9 @@ protected: bool print_progessbar{false}; - double chem_t = 0.; + std::uint32_t file_pad; + + double chem_t{0.}; uint32_t n_cells = 0; uint32_t prop_count = 0; @@ -391,6 +387,8 @@ protected: static constexpr int MODULE_COUNT = 5; + const ChemistryParams ¶ms; + std::array speciesPerModule{}; }; } // namespace poet diff --git a/include/poet/DHT.h b/include/poet/DHT.h index e8f8399f8..ba0022bff 100644 --- a/include/poet/DHT.h +++ b/include/poet/DHT.h @@ -67,7 +67,7 @@ */ typedef struct { /** Count of writes to specific process this process did. */ - int* writes_local; + int *writes_local; /** Writes after last call of DHT_print_statistics. */ int old_writes; /** How many read misses occur? */ @@ -100,27 +100,36 @@ typedef struct { /** Size of the MPI communicator respectively all participating processes. */ int comm_size; /** Pointer to a hashfunction. */ - uint64_t (*hash_func)(int, const void*); + uint64_t (*hash_func)(int, const void *); /** Pre-allocated memory where a bucket can be received. */ - void* recv_entry; + void *recv_entry; /** Pre-allocated memory where a bucket to send can be stored. */ - void* send_entry; + void *send_entry; /** Allocated memory on which the MPI window was created. */ - void* mem_alloc; + void *mem_alloc; /** Count of read misses over all time. */ int read_misses; /** Count of evictions over all time. */ int evictions; /** Array of indeces where a bucket can be stored. */ - uint64_t* index; + uint64_t *index; /** Count of possible indeces. */ unsigned int index_count; + + int (*accumulate_callback)(int, void *, int, void *); #ifdef DHT_STATISTICS /** Detailed statistics of the usage of the DHT. */ - DHT_stats* stats; + DHT_stats *stats; #endif } DHT; +extern void DHT_set_accumulate_callback(DHT *table, + int (*callback_func)(int, void *, int, + void *)); + +extern int DHT_write_accumulate(DHT *table, const void *key, int send_size, + void *data, uint32_t *proc, uint32_t *index, int *callback_ret); + /** * @brief Create a DHT. * @@ -141,9 +150,9 @@ typedef struct { * @return DHT* The returned value is the \a DHT-object which serves as a handle * for all DHT operations. If an error occured NULL is returned. */ -extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, +extern DHT *DHT_create(MPI_Comm comm, uint64_t size_per_process, unsigned int data_size, unsigned int key_size, - uint64_t (*hash_func)(int, const void*)); + uint64_t (*hash_func)(int, const void *)); /** * @brief Write data into DHT. @@ -161,10 +170,14 @@ extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, * @param table Pointer to the \a DHT-object. * @param key Pointer to the key. * @param data Pointer to the data. + * @param proc If not NULL, returns the process number written to. + * @param index If not NULL, returns the index of the bucket where the data was + * written to. * @return int Returns either DHT_SUCCESS on success or correspondending error * value on eviction or error. */ -extern int DHT_write(DHT* table, void* key, void* data); +extern int DHT_write(DHT *table, void *key, void *data, uint32_t *proc, + uint32_t *index); /** * @brief Read data from DHT. @@ -187,8 +200,10 @@ extern int DHT_write(DHT* table, void* key, void* data); * @return int Returns either DHT_SUCCESS on success or correspondending error * value on read miss or error. */ -extern int DHT_read(DHT* table, void* key, void* destination); +extern int DHT_read(DHT *table, const void *key, void *destination); +extern int DHT_read_location(DHT *table, uint32_t proc, uint32_t index, + void *destination); /** * @brief Write current state of DHT to file. * @@ -203,7 +218,7 @@ extern int DHT_read(DHT* table, void* key, void* destination); * @return int Returns DHT_SUCCESS on succes, DHT_FILE_IO_ERROR if file can't be * opened/closed or DHT_WRITE_ERROR if file is not writable. */ -extern int DHT_to_file(DHT* table, const char* filename); +extern int DHT_to_file(DHT *table, const char *filename); /** * @brief Read state of DHT from file. @@ -223,7 +238,7 @@ extern int DHT_to_file(DHT* table, const char* filename); * file doesn't match expectation. This is possible if the data size or key size * is different. */ -extern int DHT_from_file(DHT* table, const char* filename); +extern int DHT_from_file(DHT *table, const char *filename); /** * @brief Free ressources of DHT. @@ -241,7 +256,7 @@ extern int DHT_from_file(DHT* table, const char* filename); * @return int Returns either DHT_SUCCESS on success or DHT_MPI_ERROR on * internal MPI error. */ -extern int DHT_free(DHT* table, int* eviction_counter, int* readerror_counter); +extern int DHT_free(DHT *table, int *eviction_counter, int *readerror_counter); /** * @brief Prints a table with statistics about current use of DHT. @@ -267,7 +282,7 @@ extern int DHT_free(DHT* table, int* eviction_counter, int* readerror_counter); * @return int Returns DHT_SUCCESS on success or DHT_MPI_ERROR on internal MPI * error. */ -extern int DHT_print_statistics(DHT* table); +extern int DHT_print_statistics(DHT *table); /** * @brief Determine destination rank and index. @@ -286,8 +301,8 @@ extern int DHT_print_statistics(DHT* table); * @param index_count Count of possible indeces. */ static void determine_dest(uint64_t hash, int comm_size, - unsigned int table_size, unsigned int* dest_rank, - uint64_t* index, unsigned int index_count); + unsigned int table_size, unsigned int *dest_rank, + uint64_t *index, unsigned int index_count); /** * @brief Set the occupied flag. @@ -296,7 +311,7 @@ static void determine_dest(uint64_t hash, int comm_size, * * @param flag_byte First byte of a bucket. */ -static void set_flag(char* flag_byte); +static void set_flag(char *flag_byte); /** * @brief Get the occupied flag. diff --git a/include/poet/DHT_Types.hpp b/include/poet/DHT_Types.hpp index be0ed295b..15eb9913e 100644 --- a/include/poet/DHT_Types.hpp +++ b/include/poet/DHT_Types.hpp @@ -2,7 +2,7 @@ #define DHT_TYPES_H_ namespace poet { -enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_IGNORE, DHT_TYPE_TOTAL }; +enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_TOTAL }; } #endif // DHT_TYPES_H_ diff --git a/include/poet/DHT_Wrapper.hpp b/include/poet/DHT_Wrapper.hpp index c1ebffcb1..f2ed226ab 100644 --- a/include/poet/DHT_Wrapper.hpp +++ b/include/poet/DHT_Wrapper.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-04-24 16:23:42 mluebke" +// Time-stamp: "Last modified 2023-08-01 13:48:34 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -23,10 +23,18 @@ #ifndef DHT_WRAPPER_H #define DHT_WRAPPER_H +#include "DataStructures.hpp" +#include "LookupKey.hpp" #include "poet/DHT_Types.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" #include #include +#include #include +#include +#include #include extern "C" { @@ -37,35 +45,7 @@ extern "C" { namespace poet { -struct DHT_SCNotation { - std::int8_t exp : 8; - std::int64_t significant : 56; -}; - -union DHT_Keyelement { - double fp_elemet; - DHT_SCNotation sc_notation; -}; - -using DHT_ResultObject = struct DHTResobj { - uint32_t length; - std::vector> keys; - std::vector> results; - std::vector needPhreeqc; -}; - -/** - * @brief Return user-defined md5sum - * - * This function will calculate a hashsum with the help of md5sum. Therefore the - * md5sum for a given key is calculated and divided into two 64-bit parts. These - * will be XORed and returned as the hash. - * - * @param key_size Size of key in bytes - * @param key Pointer to key - * @return uint64_t Hashsum as an unsigned 64-bit integer - */ -static uint64_t get_md5(int key_size, void *key); +using DHT_Location = std::pair; /** * @brief C++-Wrapper around DHT implementation @@ -76,6 +56,20 @@ static uint64_t get_md5(int key_size, void *key); */ class DHT_Wrapper { public: + using DHT_ResultObject = struct DHTResobj { + uint32_t length; + std::vector keys; + std::vector> results; + std::vector old_values; + std::vector needPhreeqc; + std::vector locations; + }; + + static constexpr std::int32_t DHT_KEY_INPUT_CUSTOM = + std::numeric_limits::min(); + + static constexpr int DHT_KEY_SIGNIF_DEFAULT = 5; + /** * @brief Construct a new dht wrapper object * @@ -91,8 +85,9 @@ public: * for key creation. * @param data_count Count of data entries */ - DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size, - const std::vector &key_indices, + DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, + const NamedVector &key_species, + const std::vector &key_indices, uint32_t data_count); /** * @brief Destroy the dht wrapper object @@ -105,6 +100,9 @@ public: */ ~DHT_Wrapper(); + DHT_Wrapper &operator=(const DHT_Wrapper &) = delete; + DHT_Wrapper(const DHT_Wrapper &) = delete; + /** * @brief Check if values of workpackage are stored in DHT * @@ -123,7 +121,7 @@ public: */ auto checkDHT(int length, double dt, const std::vector &work_package, std::vector &curr_mapping) - -> const poet::DHT_ResultObject &; + -> const DHT_ResultObject &; /** * @brief Write simulated values into DHT @@ -183,13 +181,6 @@ public: */ auto getHits() { return this->dht_hits; }; - /** - * @brief Get the Misses object - * - * @return uint64_t Count of read misses - */ - auto getMisses() { return this->dht_miss; }; - /** * @brief Get the Evictions object * @@ -197,32 +188,56 @@ public: */ auto getEvictions() { return this->dht_evictions; }; - void SetSignifVector(std::vector signif_vec); - void SetPropTypeVector(std::vector prop_type_vec); - - void setBaseTotals(const std::array &bt) { - this->base_totals = bt; + void resetCounter() { + this->dht_hits = 0; + this->dht_evictions = 0; } + void SetSignifVector(std::vector signif_vec); + + auto getDataCount() { return this->data_count; } + auto getCommunicator() { return this->communicator; } + DHT *getDHT() { return this->dht_object; }; + + DHT_ResultObject &getDHTResults() { return this->dht_results; } + + const auto &getKeyElements() const { return this->input_key_elements; } + const auto &getKeySpecies() const { return this->key_species; } + + void setBaseTotals(double tot_h, double tot_o) { + this->base_totals = {tot_h, tot_o}; + } + + std::uint32_t getInputCount() const { + return this->input_key_elements.size(); + } + + std::uint32_t getOutputCount() const { return this->data_count; } + private: uint32_t key_count; uint32_t data_count; DHT *dht_object; + MPI_Comm communicator; - std::vector fuzzForDHT(int var_count, void *key, double dt); + LookupKey fuzzForDHT(int var_count, void *key, double dt); + + std::vector + outputToInputAndRates(const std::vector &old_results, + const std::vector &new_results); + + std::vector + inputAndRatesToOutput(const std::vector &dht_data); uint32_t dht_hits = 0; - uint32_t dht_miss = 0; uint32_t dht_evictions = 0; - std::vector dht_signif_vector; - std::vector dht_prop_type_vector; - std::vector input_key_elements; + NamedVector key_species; - static constexpr int DHT_KEY_SIGNIF_DEFAULT = 5; - static constexpr int DHT_KEY_SIGNIF_TOTALS = 10; - static constexpr int DHT_KEY_SIGNIF_CHARGE = 3; + std::vector dht_signif_vector; + std::vector dht_prop_type_vector; + std::vector input_key_elements; DHT_ResultObject dht_results; diff --git a/include/poet/DataStructures.hpp b/include/poet/DataStructures.hpp new file mode 100644 index 000000000..ec323d6fe --- /dev/null +++ b/include/poet/DataStructures.hpp @@ -0,0 +1,35 @@ +#ifndef DATASTRUCTURES_H_ +#define DATASTRUCTURES_H_ + +#include +#include +#include +#include + +namespace poet { + +template class NamedVector { +public: + void insert(std::pair to_insert) { + this->names.push_back(to_insert.first); + this->values.push_back(to_insert.second); + container_size++; + } + + bool empty() const { return this->container_size == 0; } + + std::size_t size() const { return this->container_size; } + + value_type &operator[](std::size_t i) { return values[i]; } + + const std::vector &getNames() const { return this->names; } + const std::vector &getValues() const { return this->values; } + +private: + std::size_t container_size{0}; + + std::vector names{}; + std::vector values{}; +}; +} // namespace poet +#endif // DATASTRUCTURES_H_ diff --git a/include/poet/Interpolation.hpp b/include/poet/Interpolation.hpp new file mode 100644 index 000000000..c80cdc97b --- /dev/null +++ b/include/poet/Interpolation.hpp @@ -0,0 +1,266 @@ +// Time-stamp: "Last modified 2023-08-01 18:10:48 mluebke" + +#ifndef INTERPOLATION_H_ +#define INTERPOLATION_H_ + +#include "DHT.h" +#include "DHT_Wrapper.hpp" +#include "DataStructures.hpp" +#include "LookupKey.hpp" +#include "poet/DHT_Wrapper.hpp" +#include "poet/Rounding.hpp" +#include +#include +#include +#include +#include +#include +#include +extern "C" { +#include "poet/DHT.h" +} + +#include "poet/LookupKey.hpp" +#include +#include +#include +#include + +namespace poet { + +class ProximityHashTable { +public: + using bucket_indicator = std::uint64_t; + + ProximityHashTable(uint32_t key_size, uint32_t data_size, + uint32_t entry_count, uint32_t size_per_process, + MPI_Comm communicator); + ~ProximityHashTable(); + + // delete assign and copy operator + ProximityHashTable &operator=(const ProximityHashTable *) = delete; + ProximityHashTable(const ProximityHashTable &) = delete; + + struct PHT_Result { + std::uint32_t size; + std::vector> in_values; + std::vector> out_values; + + std::uint32_t getSize() const { + std::uint32_t sum{0}; + for (const auto &results : out_values) { + sum += results.size() * sizeof(double); + } + return sum; + } + }; + + void setSourceDHT(DHT *src) { + this->source_dht = src; + this->dht_key_count = src->key_size / sizeof(Lookup_Keyelement); + this->dht_data_count = src->data_size / sizeof(double); + this->dht_buffer.resize(src->data_size + src->key_size); + } + + void writeLocationToPHT(LookupKey key, DHT_Location location); + + const PHT_Result &query(const LookupKey &key, + const std::vector &signif, + std::uint32_t min_entries_needed, + std::uint32_t input_count, + std::uint32_t output_count); + + std::uint64_t getLocations(const LookupKey &key); + + void getEntriesFromLocation(const PHT_Result &locations, + const std::vector &signif); + + void writeStats() { DHT_print_statistics(this->prox_ht); } + + DHT *getDHTObject() { return this->prox_ht; } + + auto getPHTWriteTime() const -> double { return this->pht_write_t; }; + auto getPHTReadTime() const -> double { return this->pht_read_t; }; + auto getDHTGatherTime() const -> double { return this->pht_gather_dht_t; }; + + auto getLocalCacheHits() const -> std::vector { + return this->all_cache_hits; + } + + void storeAndResetCounter() { + all_cache_hits.push_back(cache_hits); + cache_hits = 0; + } + +#if POET_PHT_ADD + void incrementReadCounter(const LookupKey &key); +#endif + +private: + enum { INTERP_CB_OK, INTERP_CB_FULL, INTERP_CB_ALREADY_IN }; + + static int PHT_callback_function(int in_data_size, void *in_data, + int out_data_size, void *out_data); + + static std::vector convertKeysFromDHT(Lookup_Keyelement *keys_in, + std::uint32_t key_size); + + static bool similarityCheck(const LookupKey &fine, const LookupKey &coarse, + const std::vector &signif); + + char *bucket_store; + + class Cache + : private std::unordered_map { + public: + void operator()(const LookupKey &key, const PHT_Result val); + + std::pair operator[](const LookupKey &key); + void flush() { this->clear(); } + + protected: + private: + static constexpr std::int64_t MAX_CACHE_SIZE = 100E6; + + std::int64_t free_mem{MAX_CACHE_SIZE}; + + std::list lru_queue; + using lru_iterator = typename std::list::iterator; + + std::unordered_map keyfinder; + }; + + Cache localCache; + DHT *prox_ht; + + std::uint32_t dht_evictions = 0; + + DHT *source_dht = nullptr; + + PHT_Result lookup_results; + std::vector dht_buffer; + + std::uint32_t dht_key_count; + std::uint32_t dht_data_count; + + MPI_Comm communicator; + + double pht_write_t = 0.; + double pht_read_t = 0.; + double pht_gather_dht_t = 0.; + + std::uint32_t cache_hits{0}; + std::vector all_cache_hits{}; +}; + +class InterpolationModule { +public: + using InterpFunction = std::vector (*)( + const std::vector &, const std::vector &, + const std::vector> &, + const std::vector> &); + + InterpolationModule(std::uint32_t entries_per_bucket, + std::uint64_t size_per_process, + std::uint32_t min_entries_needed, DHT_Wrapper &dht, + const NamedVector &interp_key_signifs, + const std::vector &dht_key_indices); + + enum result_status { RES_OK, INSUFFICIENT_DATA, NOT_NEEDED }; + + struct InterpolationResult { + std::vector> results; + std::vector status; + + void ResultsToWP(std::vector &currWP); + }; + + void setInterpolationFunction(InterpFunction func) { + this->f_interpolate = func; + } + + void setMinEntriesNeeded(std::uint32_t entries) { + this->min_entries_needed = entries; + } + + auto getMinEntriesNeeded() { return this->min_entries_needed; } + + void writePairs(const DHT_Wrapper::DHT_ResultObject &in); + + void tryInterpolation(DHT_Wrapper::DHT_ResultObject &dht_results, + std::vector &curr_mapping); + + void resultsToWP(std::vector &work_package) const; + + auto getPHTWriteTime() const { return pht->getPHTWriteTime(); }; + auto getPHTReadTime() const { return pht->getPHTReadTime(); }; + auto getDHTGatherTime() const { return pht->getDHTGatherTime(); }; + auto getInterpolationTime() const { return this->interp_t; }; + + auto getInterpolationCount() const -> std::uint32_t { + return this->interpolations; + } + + auto getPHTLocalCacheHits() const -> std::vector { + return this->pht->getLocalCacheHits(); + } + + void resetCounter() { + this->interpolations = 0; + this->pht->storeAndResetCounter(); + } + + void writePHTStats() { this->pht->writeStats(); } + void dumpPHTState(const std::string &filename) { + DHT_to_file(this->pht->getDHTObject(), filename.c_str()); + } + + static constexpr std::uint32_t COARSE_DIFF = 2; + static constexpr std::uint32_t COARSE_SIGNIF_DEFAULT = + DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT - COARSE_DIFF; + +private: + void initPHT(std::uint32_t key_count, std::uint32_t entries_per_bucket, + std::uint32_t size_per_process, MPI_Comm communicator); + + static std::vector dummy(const std::vector &, + const std::vector &, + const std::vector> &, + const std::vector> &) { + return {}; + } + + double interp_t = 0.; + + std::uint32_t interpolations{0}; + + InterpFunction f_interpolate = dummy; + + std::uint32_t min_entries_needed = 5; + + std::unique_ptr pht; + + DHT_Wrapper &dht_instance; + + NamedVector key_signifs; + std::vector key_indices; + + InterpolationResult interp_result; + PHT_Rounder rounder; + + LookupKey roundKey(const LookupKey &in_key) { + LookupKey out_key; + + for (std::uint32_t i = 0; i < key_indices.size(); i++) { + out_key.push_back(rounder.round(in_key[key_indices[i]], key_signifs[i])); + } + + // timestep + out_key.push_back(in_key.back()); + + return out_key; + } +}; +} // namespace poet + +#endif // INTERPOLATION_H_ diff --git a/include/poet/LookupKey.hpp b/include/poet/LookupKey.hpp new file mode 100644 index 000000000..c3366735f --- /dev/null +++ b/include/poet/LookupKey.hpp @@ -0,0 +1,50 @@ +// Time-stamp: "Last modified 2023-07-26 10:20:10 mluebke" + +#ifndef LOOKUPKEY_H_ +#define LOOKUPKEY_H_ + +#include "poet/HashFunctions.hpp" +#include +#include +#include + +namespace poet { + +struct Lookup_SC_notation { + std::int8_t exp : 8; + std::int64_t significant : 56; +}; + +union Lookup_Keyelement { + double fp_element; + Lookup_SC_notation sc_notation; + + bool operator==(const Lookup_Keyelement &other) const { + return std::memcmp(this, &other, sizeof(Lookup_Keyelement)) == 0 ? true + : false; + } +}; + +class LookupKey : public std::vector { +public: + explicit LookupKey(size_type count); + + using std::vector::vector; + + std::vector to_double() const; + static Lookup_SC_notation round_from_double(const double in, + std::uint32_t signif); + static double to_double(const Lookup_SC_notation in); +}; + +struct LookupKeyHasher { + std::uint64_t operator()(const LookupKey &k) const { + const uint32_t key_size = k.size() * sizeof(Lookup_Keyelement); + + return poet::Murmur2_64A(key_size, k.data()); + } +}; + +} // namespace poet + +#endif // LOOKUPKEY_H_ diff --git a/include/poet/RInsidePOET.hpp b/include/poet/RInsidePOET.hpp index a1d2b8306..fb2102685 100644 --- a/include/poet/RInsidePOET.hpp +++ b/include/poet/RInsidePOET.hpp @@ -1,7 +1,15 @@ #ifndef RPOET_H_ #define RPOET_H_ +#include "DataStructures.hpp" + #include +#include +#include +#include +#include +#include +#include class RInsidePOET : public RInside { public: @@ -14,6 +22,38 @@ public: RInsidePOET(RInsidePOET const &) = delete; void operator=(RInsidePOET const &) = delete; + inline bool checkIfExists(const std::string &R_name, + const std::string &where) { + return Rcpp::as( + this->parseEval("'" + R_name + "' %in% names(" + where + ")")); + } + + template inline T getSTL(const std::string &R_name) { + return Rcpp::as(RInside::operator[](R_name)); + } + + template + poet::NamedVector wrapNamedVector(const std::string &R_name) { + std::size_t name_size = this->parseEval("length(names(" + R_name + "))"); + if (name_size == 0) { + throw std::runtime_error("wrapNamedVector: " + R_name + + " is not a named vector!"); + } + + auto names = Rcpp::as>( + this->parseEval("names(" + R_name + ")")); + auto values = Rcpp::as(this->parseEval(R_name)); + + poet::NamedVector ret_map; + + for (std::size_t i = 0; i < names.size(); i++) { + ret_map.insert( + std::make_pair(names[i], static_cast(values[i]))); + } + + return ret_map; + } + private: RInsidePOET() : RInside(){}; }; diff --git a/include/poet/Rounding.hpp b/include/poet/Rounding.hpp new file mode 100644 index 000000000..3f659290e --- /dev/null +++ b/include/poet/Rounding.hpp @@ -0,0 +1,92 @@ +#ifndef ROUNDING_H_ +#define ROUNDING_H_ + +#include "LookupKey.hpp" +#include +#include + +namespace poet { + +constexpr std::int8_t AQUEOUS_EXP = -13; + +template +class IRounding { +public: + virtual Output round(const Input &, std::uint32_t signif) = 0; + virtual ConvertTo convert(const Output &) = 0; +}; + +class DHT_Rounder { +public: + Lookup_Keyelement round(const double &value, std::uint32_t signif, + bool is_ho) { + std::int8_t exp = + static_cast(std::floor(std::log10(std::fabs(value)))); + + if (!is_ho) { + if (exp < AQUEOUS_EXP) { + return {.sc_notation = {0, 0}}; + } + + std::int8_t diff = exp - signif + 1; + if (diff < AQUEOUS_EXP) { + signif -= AQUEOUS_EXP - diff; + } + } + + Lookup_Keyelement elem; + elem.sc_notation.exp = exp; + elem.sc_notation.significant = + static_cast(value * std::pow(10, signif - exp - 1)); + + return elem; + } + + double convert(const Lookup_Keyelement &key_elem) { + std::int32_t normalized_exp = static_cast( + -std::log10(std::fabs(key_elem.sc_notation.significant))); + + // add stored exponent to normalized exponent + normalized_exp += key_elem.sc_notation.exp; + + // return significant times 10 to the power of exponent + return key_elem.sc_notation.significant * std::pow(10., normalized_exp); + } +}; + +class PHT_Rounder : public IRounding { +public: + Lookup_Keyelement round(const Lookup_Keyelement &value, + std::uint32_t signif) { + Lookup_Keyelement new_val = value; + + std::uint32_t diff_signif = + static_cast( + std::ceil(std::log10(std::abs(value.sc_notation.significant)))) - + signif; + + new_val.sc_notation.significant = static_cast( + value.sc_notation.significant / std::pow(10., diff_signif)); + + if (new_val.sc_notation.significant == 0) { + new_val.sc_notation.exp = 0; + } + + return new_val; + } + + double convert(const Lookup_Keyelement &key_elem) { + std::int32_t normalized_exp = static_cast( + -std::log10(std::fabs(key_elem.sc_notation.significant))); + + // add stored exponent to normalized exponent + normalized_exp += key_elem.sc_notation.exp; + + // return significant times 10 to the power of exponent + return key_elem.sc_notation.significant * std::pow(10., normalized_exp); + } +}; + +} // namespace poet + +#endif // ROUNDING_H_ diff --git a/include/poet/SimParams.hpp b/include/poet/SimParams.hpp index 3e7483e31..8a28624df 100644 --- a/include/poet/SimParams.hpp +++ b/include/poet/SimParams.hpp @@ -24,17 +24,20 @@ #include #include #include +#include #include +#include "DataStructures.hpp" +#include "RInsidePOET.hpp" #include "argh.hpp" // Argument handler https://github.com/adishavit/argh #include #include // BSD-licenced /** Standard DHT Size. Defaults to 1 GB (1000 MB) */ -constexpr uint32_t DHT_SIZE_PER_PROCESS_MB = 1E3; +constexpr uint32_t DHT_SIZE_PER_PROCESS_MB = 1.5E3; /** Standard work package size */ -#define WORK_PACKAGE_SIZE_DEFAULT 5 +#define WORK_PACKAGE_SIZE_DEFAULT 32 namespace poet { @@ -70,6 +73,8 @@ typedef struct { /** indicating whether the progress bar during chemistry simulation should be * printed or not */ bool print_progressbar; + + bool interp_enabled; } t_simparams; using GridParams = struct s_GridParams { @@ -101,13 +106,24 @@ using DiffusionParams = struct s_DiffusionParams { s_DiffusionParams(RInside &R); }; -using ChemistryParams = struct s_ChemistryParams { +struct ChemistryParams { std::string database_path; std::string input_script; - std::vector dht_species; - std::vector dht_signif; - s_ChemistryParams(RInside &R); + bool use_dht; + std::uint64_t dht_size; + int dht_snaps; + std::string dht_file; + std::string dht_outdir; + NamedVector dht_signifs; + + bool use_interp; + std::uint64_t pht_size; + std::uint32_t pht_max_entries; + std::uint32_t interp_min_entries; + NamedVector pht_signifs; + + void initFromR(RInsidePOET &R); }; /** @@ -159,7 +175,7 @@ public: * @return int Returns with 0 if no error occured, otherwise value less than 0 * is returned. */ - int parseFromCmdl(char *argv[], RInside &R); + int parseFromCmdl(char *argv[], RInsidePOET &R); /** * @brief Init std::vector values @@ -193,25 +209,10 @@ public: */ auto getDHTSignifVector() const { return this->dht_signif_vector; }; - /** - * @brief Get the DHT_Prop_Type_Vector - * - * Returns a vector indicating of which type a variable of a grid cell is. - * - * @return std::vector Vector if strings defining a type of a - * variable - */ - auto getDHTPropTypeVector() const { return this->dht_prop_type_vector; }; + auto getPHTSignifVector() const { return this->pht_signif_vector; }; - /** - * @brief Return name of DHT snapshot. - * - * Name of the DHT file which is used to initialize the DHT with a previously - * written snapshot. - * - * @return std::string Absolute paht to the DHT snapshot - */ - auto getDHTFile() const { return this->dht_file; }; + auto getPHTBucketSize() const { return this->pht_bucket_size; }; + auto getPHTMinEntriesNeeded() const { return this->pht_min_entries_needed; }; /** * @brief Get the filesim name @@ -233,23 +234,30 @@ public: */ auto getOutDir() const { return this->out_dir; }; + const auto &getChemParams() const { return chem_params; } + private: std::list validateOptions(argh::parser cmdl); - const std::set flaglist{"ignore-result", "dht", "dht-nolog", "P", - "progress"}; - const std::set paramlist{"work-package-size", "dht-signif", - "dht-strategy", "dht-size", - "dht-snaps", "dht-file"}; + const std::set flaglist{ + "ignore-result", "dht", "dht-nolog", "P", "progress", "interp"}; + const std::set paramlist{ + "work-package-size", "dht-signif", "dht-strategy", + "dht-size", "dht-snaps", "dht-file", + "interp-size", "interp-min", "interp-bucket-entries"}; t_simparams simparams; std::vector dht_signif_vector; - std::vector dht_prop_type_vector; + std::vector pht_signif_vector; + + uint32_t pht_bucket_size; + uint32_t pht_min_entries_needed; - std::string dht_file; std::string filesim; std::string out_dir; + + ChemistryParams chem_params; }; } // namespace poet #endif // PARSER_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e273c28f8..74a61c8c7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,9 +2,6 @@ file(GLOB_RECURSE poet_lib_SRC CONFIGURE_DEPENDS "*.cpp" "*.c") -find_library(MATH_LIBRARY m) -find_library(CRYPTO_LIBRARY crypto) - add_library(poet_lib ${poet_lib_SRC}) target_include_directories(poet_lib PUBLIC ${PROJECT_SOURCE_DIR}/include) target_link_libraries(poet_lib PUBLIC @@ -20,3 +17,9 @@ option(POET_DHT_DEBUG "Build with DHT debug info" OFF) if(POET_DHT_DEBUG) target_compile_definitions(poet_lib PRIVATE DHT_STATISTICS) endif() + +option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) + +if (POET_PHT_ADDITIONAL_INFO) + target_compile_definitions(poet_lib PRIVATE POET_PHT_ADD) +endif() diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index a42d8151c..52a315734 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -1,20 +1,167 @@ #include "poet/ChemistryModule.hpp" #include "PhreeqcRM.h" #include "poet/DHT_Wrapper.hpp" +#include "poet/Interpolation.hpp" #include #include +#include #include #include +#include #include #include #include #include #include +constexpr uint32_t MB_FACTOR = 1E6; + +std::vector +inverseDistanceWeighting(const std::vector &to_calc, + const std::vector &from, + const std::vector> &input, + const std::vector> &output) { + std::vector results = from; + + const std::uint32_t buffer_size = input.size() + 1; + double buffer[buffer_size]; + double from_rescaled; + + const std::uint32_t data_set_n = input.size(); + double rescaled[to_calc.size()][data_set_n + 1]; + double weights[data_set_n]; + + // rescaling over all key elements + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + const auto output_comp_i = to_calc[key_comp_i]; + + // rescale input between 0 and 1 + for (int point_i = 0; point_i < data_set_n; point_i++) { + rescaled[key_comp_i][point_i] = input[point_i][key_comp_i]; + } + + rescaled[key_comp_i][data_set_n] = from[output_comp_i]; + + const double min = *std::min_element(rescaled[key_comp_i], + rescaled[key_comp_i] + data_set_n + 1); + const double max = *std::max_element(rescaled[key_comp_i], + rescaled[key_comp_i] + data_set_n + 1); + + for (int point_i = 0; point_i < data_set_n; point_i++) { + rescaled[key_comp_i][point_i] = + ((max - min) != 0 + ? (rescaled[key_comp_i][point_i] - min) / (max - min) + : 0); + } + rescaled[key_comp_i][data_set_n] = + ((max - min) != 0 ? (from[output_comp_i] - min) / (max - min) : 0); + } + + // calculate distances for each data set + double inv_sum = 0; + for (int point_i = 0; point_i < data_set_n; point_i++) { + double distance = 0; + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + distance += std::pow( + rescaled[key_comp_i][point_i] - rescaled[key_comp_i][data_set_n], 2); + } + weights[point_i] = 1 / std::sqrt(distance); + assert(!std::isnan(weights[point_i])); + inv_sum += weights[point_i]; + } + + assert(!std::isnan(inv_sum)); + + // actual interpolation + // bool has_h = false; + // bool has_o = false; + + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + const auto output_comp_i = to_calc[key_comp_i]; + double key_delta = 0; + + // if (interp_i == 0) { + // has_h = true; + // } + + // if (interp_i == 1) { + // has_o = true; + // } + + for (int j = 0; j < data_set_n; j++) { + key_delta += weights[j] * input[j][key_comp_i]; + } + + key_delta /= inv_sum; + + results[output_comp_i] = from[output_comp_i] + key_delta; + } + + // if (!has_h) { + // double new_val = 0; + // for (int j = 0; j < data_set_n; j++) { + // new_val += weights[j] * output[j][0]; + // } + // results[0] = new_val / inv_sum; + // } + + // if (!has_h) { + // double new_val = 0; + // for (int j = 0; j < data_set_n; j++) { + // new_val += weights[j] * output[j][1]; + // } + // results[1] = new_val / inv_sum; + // } + + // for (std::uint32_t i = 0; i < to_calc.size(); i++) { + // const std::uint32_t interp_i = to_calc[i]; + + // // rescale input between 0 and 1 + // for (int j = 0; j < input.size(); j++) { + // buffer[j] = input[j].at(i); + // } + + // buffer[buffer_size - 1] = from[interp_i]; + + // const double min = *std::min_element(buffer, buffer + buffer_size); + // const double max = *std::max_element(buffer, buffer + buffer_size); + + // for (int j = 0; j < input.size(); j++) { + // buffer[j] = ((max - min) != 0 ? (buffer[j] - min) / (max - min) : 1); + // } + // from_rescaled = + // ((max - min) != 0 ? (from[interp_i] - min) / (max - min) : 0); + + // double inv_sum = 0; + + // // calculate distances for each point + // for (int i = 0; i < input.size(); i++) { + // const double distance = std::pow(buffer[i] - from_rescaled, 2); + + // buffer[i] = distance > 0 ? (1 / std::sqrt(distance)) : 0; + // inv_sum += buffer[i]; + // } + // // calculate new values + // double new_val = 0; + // for (int i = 0; i < output.size(); i++) { + // new_val += buffer[i] * output[i][interp_i]; + // } + // results[interp_i] = new_val / inv_sum; + // if (std::isnan(results[interp_i])) { + // std::cout << "nan with new_val = " << output[0][i] << std::endl; + // } + // } + + return results; +} + poet::ChemistryModule::ChemistryModule(uint32_t nxyz, uint32_t wp_size, + std::uint32_t maxiter, + const ChemistryParams &chem_param, MPI_Comm communicator) - : PhreeqcRM(nxyz, 1), group_comm(communicator), wp_size(wp_size) { + : PhreeqcRM(nxyz, 1), group_comm(communicator), wp_size(wp_size), + params(chem_param) { MPI_Comm_size(communicator, &this->comm_size); MPI_Comm_rank(communicator, &this->comm_rank); @@ -24,7 +171,10 @@ poet::ChemistryModule::ChemistryModule(uint32_t nxyz, uint32_t wp_size, if (!is_sequential && is_master) { MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, this->group_comm); + MPI_Bcast(&maxiter, 1, MPI_UINT32_T, 0, this->group_comm); } + + this->file_pad = std::ceil(std::log10(maxiter + 1)); } poet::ChemistryModule::~ChemistryModule() { @@ -34,11 +184,15 @@ poet::ChemistryModule::~ChemistryModule() { } poet::ChemistryModule -poet::ChemistryModule::createWorker(MPI_Comm communicator) { +poet::ChemistryModule::createWorker(MPI_Comm communicator, + const ChemistryParams &chem_param) { uint32_t wp_size; MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, communicator); - return ChemistryModule(wp_size, wp_size, communicator); + std::uint32_t maxiter; + MPI_Bcast(&maxiter, 1, MPI_UINT32_T, 0, communicator); + + return ChemistryModule(wp_size, wp_size, maxiter, chem_param, communicator); } void poet::ChemistryModule::RunInitFile(const std::string &input_script_path) { @@ -122,7 +276,9 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { } // now sort the new values - std::sort(new_solution_names.begin() + 4, new_solution_names.end()); + std::sort(new_solution_names.begin() + 3, new_solution_names.end()); + this->SetPOETSolutionNames(new_solution_names); + this->speciesPerModule[0] = new_solution_names.size(); // and append other processes than solutions std::vector new_prop_names = new_solution_names; @@ -133,8 +289,6 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { this->prop_names = std::move(new_prop_names); this->prop_count = prop_names.size(); - this->SetPOETSolutionNames(new_solution_names); - if (is_master) { this->n_cells = trans_field.GetRequestedVecSize(); chem_field = Field(n_cells); @@ -151,162 +305,186 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { std::vector> initial_values; - for (int i = 0; i < phreeqc_init.size(); i++) { + for (const auto &vec : trans_field.As2DVector()) { + initial_values.push_back(vec); + } + + this->base_totals = {initial_values.at(0).at(0), + initial_values.at(1).at(0)}; + ChemBCast(base_totals.data(), 2, MPI_DOUBLE); + + for (int i = speciesPerModule[0]; i < phreeqc_init.size(); i++) { std::vector init(n_cells, phreeqc_init[i]); initial_values.push_back(std::move(init)); } chem_field.InitFromVec(initial_values, prop_names); + } else { + ChemBCast(base_totals.data(), 2, MPI_DOUBLE); + } + + if (this->params.use_dht || this->params.use_interp) { + initializeDHT(this->params.dht_size, this->params.dht_signifs); + setDHTSnapshots(this->params.dht_snaps, this->params.dht_outdir); + setDHTReadFile(this->params.dht_file); + + this->dht_enabled = this->params.use_dht; + + if (this->params.use_interp) { + initializeInterp(this->params.pht_max_entries, this->params.pht_size, + this->params.interp_min_entries, + this->params.pht_signifs); + this->interp_enabled = this->params.use_interp; + } } } -void poet::ChemistryModule::SetDHTEnabled( - bool enable, uint32_t size_mb, - const std::vector &key_species) { - +void poet::ChemistryModule::initializeDHT( + uint32_t size_mb, const NamedVector &key_species) { constexpr uint32_t MB_FACTOR = 1E6; - std::vector key_inidices; + + this->dht_enabled = true; + + MPI_Comm dht_comm; if (this->is_master) { - int ftype = CHEM_DHT_ENABLE; - PropagateFunctionType(ftype); - ChemBCast(&enable, 1, MPI_CXX_BOOL); - ChemBCast(&size_mb, 1, MPI_UINT32_T); - - key_inidices = parseDHTSpeciesVec(key_species); - int vec_size = key_inidices.size(); - ChemBCast(&vec_size, 1, MPI_INT); - ChemBCast(key_inidices.data(), key_inidices.size(), MPI_UINT32_T); - } else { - int vec_size; - ChemBCast(&vec_size, 1, MPI_INT); - key_inidices.resize(vec_size); - ChemBCast(key_inidices.data(), vec_size, MPI_UINT32_T); + MPI_Comm_split(this->group_comm, MPI_UNDEFINED, this->comm_rank, &dht_comm); + return; } - this->dht_enabled = enable; - - if (enable) { - MPI_Comm dht_comm; - - if (this->is_master) { - MPI_Comm_split(this->group_comm, MPI_UNDEFINED, this->comm_rank, - &dht_comm); - return; - } + if (!this->is_master) { MPI_Comm_split(this->group_comm, 1, this->comm_rank, &dht_comm); + auto map_copy = key_species; + + if (key_species.empty()) { + const auto &all_species = this->prop_names; + for (std::size_t i = 0; i < all_species.size(); i++) { + map_copy.insert(std::make_pair(all_species[i], + DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT)); + } + } + + auto key_indices = parseDHTSpeciesVec(key_species, this->prop_names); + if (this->dht) { delete this->dht; } - const uint32_t dht_size = size_mb * MB_FACTOR; + const std::uint64_t dht_size = size_mb * MB_FACTOR; - this->dht = - new DHT_Wrapper(dht_comm, dht_size, key_inidices, this->prop_count); - // this->dht->setBaseTotals(this->base_totals); + this->dht = new DHT_Wrapper(dht_comm, dht_size, map_copy, key_indices, + this->prop_count); + this->dht->setBaseTotals(base_totals.at(0), base_totals.at(1)); } } -inline std::vector poet::ChemistryModule::parseDHTSpeciesVec( - const std::vector &species_vec) const { - std::vector species_indices; +inline std::vector poet::ChemistryModule::parseDHTSpeciesVec( + const NamedVector &key_species, + const std::vector &to_compare) const { + std::vector species_indices; + species_indices.reserve(key_species.size()); - if (species_vec.empty()) { - species_indices.resize(this->prop_count); - - int i = 0; - std::generate(species_indices.begin(), species_indices.end(), - [&] { return i++; }); - return species_indices; - } - - species_indices.reserve(species_vec.size()); - - for (const auto &name : species_vec) { - auto it = std::find(this->prop_names.begin(), this->prop_names.end(), name); - if (it == prop_names.end()) { - throw std::runtime_error( - "DHT species name was not found in prop name vector!"); + for (const auto &species : key_species.getNames()) { + auto it = std::find(to_compare.begin(), to_compare.end(), species); + if (it == to_compare.end()) { + species_indices.push_back(DHT_Wrapper::DHT_KEY_INPUT_CUSTOM); + continue; } - const std::uint32_t index = it - prop_names.begin(); + const std::uint32_t index = it - to_compare.begin(); species_indices.push_back(index); } return species_indices; } -void poet::ChemistryModule::SetDHTSnaps(int type, const std::string &out_dir) { +void poet::ChemistryModule::BCastStringVec(std::vector &io) { + if (this->is_master) { - int ftype = CHEM_DHT_SNAPS; - PropagateFunctionType(ftype); + int vec_size = io.size(); + ChemBCast(&vec_size, 1, MPI_INT); - int str_size = out_dir.size(); - - ChemBCast(&type, 1, MPI_INT); - ChemBCast(&str_size, 1, MPI_INT); - ChemBCast(const_cast(out_dir.data()), str_size, MPI_CHAR); - } - - this->dht_snaps_type = type; - this->dht_file_out_dir = out_dir; -} - -void poet::ChemistryModule::SetDHTSignifVector( - std::vector &signif_vec) { - if (this->is_master) { - int ftype = CHEM_DHT_SIGNIF_VEC; - PropagateFunctionType(ftype); - - int data_count = signif_vec.size(); - ChemBCast(&data_count, 1, MPI_INT); - ChemBCast(signif_vec.data(), signif_vec.size(), MPI_UINT32_T); - - return; - } - - int data_count; - ChemBCast(&data_count, 1, MPI_INT); - signif_vec.resize(data_count); - ChemBCast(signif_vec.data(), data_count, MPI_UINT32_T); - - this->dht->SetSignifVector(signif_vec); -} - -void poet::ChemistryModule::SetDHTPropTypeVector( - std::vector proptype_vec) { - if (this->is_master) { - if (proptype_vec.size() != this->prop_count) { - throw std::runtime_error("Prop type vector sizes mismatches prop count."); + for (const auto &value : io) { + int buf_size = value.size() + 1; + ChemBCast(&buf_size, 1, MPI_INT); + ChemBCast(const_cast(value.c_str()), buf_size, MPI_CHAR); } + } else { + int vec_size; + ChemBCast(&vec_size, 1, MPI_INT); - int ftype = CHEM_DHT_PROP_TYPE_VEC; - PropagateFunctionType(ftype); - ChemBCast(proptype_vec.data(), proptype_vec.size(), MPI_UINT32_T); + io.resize(vec_size); + for (int i = 0; i < vec_size; i++) { + int buf_size; + ChemBCast(&buf_size, 1, MPI_INT); + char buf[buf_size]; + ChemBCast(buf, buf_size, MPI_CHAR); + io[i] = std::string{buf}; + } + } +} + +void poet::ChemistryModule::setDHTSnapshots(int type, + const std::string &out_dir) { + if (this->is_master) { return; } - this->dht->SetPropTypeVector(proptype_vec); + this->dht_file_out_dir = out_dir; + this->dht_snaps_type = type; } -void poet::ChemistryModule::ReadDHTFile(const std::string &input_file) { +void poet::ChemistryModule::setDHTReadFile(const std::string &input_file) { if (this->is_master) { - int ftype = CHEM_DHT_READ_FILE; - PropagateFunctionType(ftype); - int str_size = input_file.size(); - - ChemBCast(&str_size, 1, MPI_INT); - ChemBCast(const_cast(input_file.data()), str_size, MPI_CHAR); + return; } - if (!this->dht_enabled) { - throw std::runtime_error("DHT file cannot be read. DHT is not enabled."); + if (!input_file.empty()) { + WorkerReadDHTDump(input_file); } +} + +void poet::ChemistryModule::initializeInterp( + std::uint32_t bucket_size, std::uint32_t size_mb, std::uint32_t min_entries, + const NamedVector &key_species) { if (!this->is_master) { - WorkerReadDHTDump(input_file); + + constexpr uint32_t MB_FACTOR = 1E6; + + assert(this->dht); + + this->interp_enabled = true; + + auto map_copy = key_species; + + if (key_species.empty()) { + map_copy = this->dht->getKeySpecies(); + for (std::size_t i = 0; i < map_copy.size(); i++) { + const std::uint32_t signif = + map_copy[i] - (map_copy[i] > InterpolationModule::COARSE_DIFF + ? InterpolationModule::COARSE_DIFF + : 0); + map_copy[i] = signif; + } + } + + auto key_indices = + parseDHTSpeciesVec(key_species, dht->getKeySpecies().getNames()); + + if (this->interp) { + this->interp.reset(); + } + + const uint64_t pht_size = size_mb * MB_FACTOR; + + interp = std::make_unique( + bucket_size, pht_size, min_entries, *(this->dht), map_copy, + key_indices); + + interp->setInterpolationFunction(inverseDistanceWeighting); } } diff --git a/src/ChemistryModule/DHT_Wrapper.cpp b/src/ChemistryModule/DHT_Wrapper.cpp deleted file mode 100644 index 129b45340..000000000 --- a/src/ChemistryModule/DHT_Wrapper.cpp +++ /dev/null @@ -1,226 +0,0 @@ -/* -** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of -** Potsdam) -** -** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam) -** -** POET is free software; you can redistribute it and/or modify it under the -** terms of the GNU General Public License as published by the Free Software -** Foundation; either version 2 of the License, or (at your option) any later -** version. -** -** POET is distributed in the hope that it will be useful, but WITHOUT ANY -** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -** A PARTICULAR PURPOSE. See the GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License along with -** this program; if not, write to the Free Software Foundation, Inc., 51 -** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -*/ - -#include "poet/DHT_Wrapper.hpp" -#include "poet/DHT_Types.hpp" -#include "poet/HashFunctions.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace poet; -using namespace std; - -inline DHT_SCNotation round_key_element(double value, std::uint32_t signif) { - std::int8_t exp = - static_cast(std::floor(std::log10(std::fabs(value)))); - std::int64_t significant = - static_cast(value * std::pow(10, signif - exp - 1)); - - DHT_SCNotation elem; - elem.exp = exp; - elem.significant = significant; - - return elem; -} - -DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size, - const std::vector &key_indices, - uint32_t data_count) - : key_count(key_indices.size()), data_count(data_count), - input_key_elements(key_indices) { - // initialize DHT object - uint32_t key_size = (key_count + 1) * sizeof(DHT_Keyelement); - uint32_t data_size = data_count * sizeof(double); - uint32_t buckets_per_process = dht_size / (1 + data_size + key_size); - dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, - &poet::Murmur2_64A); - - this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); - this->dht_signif_vector[0] = DHT_KEY_SIGNIF_TOTALS; - this->dht_signif_vector[1] = DHT_KEY_SIGNIF_TOTALS; - this->dht_signif_vector[2] = DHT_KEY_SIGNIF_CHARGE; - - this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); - this->dht_prop_type_vector[0] = DHT_TYPE_TOTAL; - this->dht_prop_type_vector[1] = DHT_TYPE_TOTAL; - this->dht_prop_type_vector[2] = DHT_TYPE_CHARGE; -} - -DHT_Wrapper::~DHT_Wrapper() { - // free DHT - DHT_free(dht_object, NULL, NULL); -} -auto DHT_Wrapper::checkDHT(int length, double dt, - const std::vector &work_package, - std::vector &curr_mapping) - -> const poet::DHT_ResultObject & { - - dht_results.length = length; - dht_results.keys.resize(length); - dht_results.results.resize(length); - dht_results.needPhreeqc.resize(length); - - std::vector new_mapping; - - // loop over every grid cell contained in work package - for (int i = 0; i < length; i++) { - // point to current grid cell - void *key = (void *)&(work_package[i * this->data_count]); - auto &data = dht_results.results[i]; - auto &key_vector = dht_results.keys[i]; - - data.resize(this->data_count); - key_vector = fuzzForDHT(this->key_count, key, dt); - - // overwrite input with data from DHT, IF value is found in DHT - int res = DHT_read(this->dht_object, key_vector.data(), data.data()); - - switch (res) { - case DHT_SUCCESS: - dht_results.needPhreeqc[i] = false; - this->dht_hits++; - break; - case DHT_READ_MISS: - dht_results.needPhreeqc[i] = true; - new_mapping.push_back(curr_mapping[i]); - this->dht_miss++; - break; - } - } - - curr_mapping = std::move(new_mapping); - - return dht_results; -} - -void DHT_Wrapper::fillDHT(int length, const std::vector &work_package) { - // loop over every grid cell contained in work package - for (int i = 0; i < length; i++) { - // If true grid cell was simulated, needs to be inserted into dht - if (dht_results.needPhreeqc[i]) { - const auto &key = dht_results.keys[i]; - void *data = (void *)&(work_package[i * this->data_count]); - // fuzz data (round, logarithm etc.) - - // insert simulated data with fuzzed key into DHT - int res = DHT_write(this->dht_object, (void *)key.data(), data); - - // if data was successfully written ... - if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { - dht_evictions++; - } - } - } -} - -void DHT_Wrapper::resultsToWP(std::vector &work_package) { - for (int i = 0; i < dht_results.length; i++) { - if (!dht_results.needPhreeqc[i]) { - std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), - work_package.begin() + (data_count * i)); - } - } -} - -int DHT_Wrapper::tableToFile(const char *filename) { - int res = DHT_to_file(dht_object, filename); - return res; -} - -int DHT_Wrapper::fileToTable(const char *filename) { - int res = DHT_from_file(dht_object, filename); - if (res != DHT_SUCCESS) - return res; - -#ifdef DHT_STATISTICS - DHT_print_statistics(dht_object); -#endif - - return DHT_SUCCESS; -} - -void DHT_Wrapper::printStatistics() { - int res; - - res = DHT_print_statistics(dht_object); - - if (res != DHT_SUCCESS) { - // MPI ERROR ... WHAT TO DO NOW? - // RUNNING CIRCLES WHILE SCREAMING - } -} - -std::vector DHT_Wrapper::fuzzForDHT(int var_count, void *key, - double dt) { - constexpr double zero_val = 10E-14; - - std::vector vecFuzz(var_count + 1); - std::memset(&vecFuzz[0], 0, sizeof(DHT_Keyelement) * var_count); - - int totals_i = 0; - // introduce fuzzing to allow more hits in DHT - // loop over every variable of grid cell - for (std::uint32_t i = 0; i < input_key_elements.size(); i++) { - double curr_key = ((double *)key)[input_key_elements[i]]; - if (curr_key != 0) { - if (curr_key < zero_val && - this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { - continue; - } - if (this->dht_prop_type_vector[i] == DHT_TYPE_IGNORE) { - continue; - } - if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { - curr_key -= base_totals[totals_i++]; - } - vecFuzz[i].sc_notation = round_key_element(curr_key, dht_signif_vector[i]); - } - } - // if timestep differs over iterations set current current time step at the - // end of fuzzing buffer - vecFuzz[var_count].fp_elemet = dt; - - return vecFuzz; -} - -void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) { - if (signif_vec.size() != this->key_count) { - throw std::runtime_error( - "Significant vector size mismatches count of key elements."); - } - - this->dht_signif_vector = signif_vec; -} -void poet::DHT_Wrapper::SetPropTypeVector(std::vector prop_type_vec) { - if (prop_type_vec.size() != this->key_count) { - throw std::runtime_error( - "Prop type vector size mismatches count of key elements."); - } - - this->dht_prop_type_vector = prop_type_vec; -} diff --git a/src/ChemistryModule/MasterFunctions.cpp b/src/ChemistryModule/MasterFunctions.cpp index 316b64cab..b40341dbd 100644 --- a/src/ChemistryModule/MasterFunctions.cpp +++ b/src/ChemistryModule/MasterFunctions.cpp @@ -2,6 +2,7 @@ #include "poet/ChemistryModule.hpp" #include +#include #include #include #include @@ -62,19 +63,136 @@ std::vector poet::ChemistryModule::GetWorkerIdleTimings() const { std::vector poet::ChemistryModule::GetWorkerDHTHits() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_HITS); -} - -std::vector poet::ChemistryModule::GetWorkerDHTMiss() const { - int type = CHEM_PERF; + type = WORKER_DHT_HITS; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_MISS); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_DHT_HITS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_DHT_HITS, + this->group_comm, NULL); + + return ret; } std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_EVICTIONS); + type = WORKER_DHT_EVICTIONS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_DHT_EVICTIONS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_DHT_EVICTIONS, + this->group_comm, NULL); + + return ret; +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationWriteTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_WRITE); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationReadTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_READ); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationGatherTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_GATHER); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationFunctionCallTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_FC); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationCalls() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + type = WORKER_IP_CALLS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_IP_CALLS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_IP_CALLS, + this->group_comm, NULL); + + return ret; +} + +std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + type = WORKER_PHT_CACHE_HITS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, type, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, type, + this->group_comm, NULL); + + return ret; +} + +inline std::vector shuffleField(const std::vector &in_field, + uint32_t size_per_prop, + uint32_t prop_count, + uint32_t wp_count) { + std::vector out_buffer(in_field.size()); + uint32_t write_i = 0; + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_buffer[(write_i * prop_count) + k] = + in_field[(k * size_per_prop) + j]; + } + write_i++; + } + } + return out_buffer; +} + +inline void unshuffleField(const std::vector &in_buffer, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count, std::vector &out_field) { + uint32_t read_i = 0; + + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_field[(k * size_per_prop) + j] = + in_buffer[(read_i * prop_count) + k]; + } + read_i++; + } + } } inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { @@ -190,12 +308,15 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, } void poet::ChemistryModule::RunCells() { + double start_t{MPI_Wtime()}; if (this->is_sequential) { MasterRunSequential(); return; } MasterRunParallel(); + double end_t{MPI_Wtime()}; + this->chem_t += end_t - start_t; } void poet::ChemistryModule::MasterRunSequential() { @@ -211,7 +332,6 @@ void poet::ChemistryModule::MasterRunSequential() { void poet::ChemistryModule::MasterRunParallel() { /* declare most of the needed variables here */ - double chem_a, chem_b; double seq_a, seq_b, seq_c, seq_d; double worker_chemistry_a, worker_chemistry_b; double sim_e_chemistry, sim_f_chemistry; @@ -227,9 +347,6 @@ void poet::ChemistryModule::MasterRunParallel() { double dt = this->PhreeqcRM::GetTimeStep(); static uint32_t iteration = 0; - /* start time measurement of whole chemistry simulation */ - chem_a = MPI_Wtime(); - /* start time measurement of sequential part */ seq_a = MPI_Wtime(); @@ -308,8 +425,6 @@ void poet::ChemistryModule::MasterRunParallel() { for (int i = 1; i < this->comm_size; i++) { MPI_Send(NULL, 0, MPI_DOUBLE, i, LOOP_END, this->group_comm); } - chem_b = MPI_Wtime(); - this->chem_t += chem_b - chem_a; this->simtime += dt; iteration++; @@ -324,7 +439,8 @@ std::vector poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, uint32_t wp_size) const { bool mod_pkgs = (n_cells % wp_size) != 0; - uint32_t n_packages = (uint32_t)(n_cells / wp_size) + mod_pkgs; + uint32_t n_packages = + (uint32_t)(n_cells / wp_size) + static_cast(mod_pkgs); std::vector wp_sizes_vector(n_packages, 0); @@ -334,12 +450,3 @@ poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, return wp_sizes_vector; } - -void poet::ChemistryModule::setProgressBarPrintout(bool enabled) { - if (is_master) { - int type = CHEM_PROGRESSBAR; - ChemBCast(&type, 1, MPI_INT); - ChemBCast(&enabled, 1, MPI_CXX_BOOL); - } - this->print_progessbar = enabled; -} diff --git a/src/ChemistryModule/SurrogateModels/CMakeLists.txt b/src/ChemistryModule/SurrogateModels/CMakeLists.txt new file mode 100644 index 000000000..334ec98b5 --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/CMakeLists.txt @@ -0,0 +1,21 @@ +file(GLOB surrogate_models_SRC + CONFIGURE_DEPENDS + "*.cpp" "*.c") + +find_library(MATH_LIBRARY m) + +add_library(surrogate_models ${surrogate_models_SRC}) +target_include_directories(surrogate_models PUBLIC ${PROJECT_SOURCE_DIR}/include) +target_link_libraries(surrogate_models PUBLIC + MPI::MPI_CXX ${MATH_LIBRARY}) +target_compile_definitions(surrogate_models PUBLIC OMPI_SKIP_MPICXX) + +option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) + +if (POET_PHT_ADDITIONAL_INFO) + target_compile_definitions(surrogate_models PRIVATE POET_PHT_ADD) +endif() + +if(POET_DHT_DEBUG) + target_compile_definitions(surrogate_models PRIVATE DHT_STATISTICS) +endif() diff --git a/src/ChemistryModule/DHT.c b/src/ChemistryModule/SurrogateModels/DHT.c similarity index 68% rename from src/ChemistryModule/DHT.c rename to src/ChemistryModule/SurrogateModels/DHT.c index 485f50e18..5275cb02c 100644 --- a/src/ChemistryModule/DHT.c +++ b/src/ChemistryModule/SurrogateModels/DHT.c @@ -1,3 +1,4 @@ +/// Time-stamp: "Last modified 2023-06-28 15:58:19 mluebke" /* ** Copyright (C) 2017-2021 Max Luebke (University of Potsdam) ** @@ -15,10 +16,12 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include #include #include +#include #include #include #include @@ -52,7 +55,8 @@ static int read_flag(char flag_byte) { } DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, - unsigned int key_size, uint64_t (*hash_func)(int, const void *)) { + unsigned int key_size, + uint64_t (*hash_func)(int, const void *)) { DHT *object; MPI_Win window; void *mem_alloc; @@ -61,17 +65,20 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, // calculate how much bytes for the index are needed to address count of // buckets per process index_bytes = (int)ceil(log2(size)); - if (index_bytes % 8 != 0) index_bytes = index_bytes + (8 - (index_bytes % 8)); + if (index_bytes % 8 != 0) + index_bytes = index_bytes + (8 - (index_bytes % 8)); // allocate memory for dht-object object = (DHT *)malloc(sizeof(DHT)); - if (object == NULL) return NULL; + if (object == NULL) + return NULL; // every memory allocation has 1 additional byte for flags etc. if (MPI_Alloc_mem(size * (1 + data_size + key_size), MPI_INFO_NULL, &mem_alloc) != 0) return NULL; - if (MPI_Comm_size(comm, &comm_size) != 0) return NULL; + if (MPI_Comm_size(comm, &comm_size) != 0) + return NULL; // since MPI_Alloc_mem doesn't provide memory allocation with the memory set // to zero, we're doing this here @@ -104,7 +111,8 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, DHT_stats *stats; stats = (DHT_stats *)malloc(sizeof(DHT_stats)); - if (stats == NULL) return NULL; + if (stats == NULL) + return NULL; object->stats = stats; object->stats->writes_local = (int *)calloc(comm_size, sizeof(int)); @@ -118,7 +126,106 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, return object; } -int DHT_write(DHT *table, void *send_key, void *send_data) { +void DHT_set_accumulate_callback(DHT *table, + int (*callback_func)(int, void *, int, + void *)) { + table->accumulate_callback = callback_func; +} + +int DHT_write_accumulate(DHT *table, const void *send_key, int data_size, + void *send_data, uint32_t *proc, uint32_t *index, + int *callback_ret) { + unsigned int dest_rank, i; + int result = DHT_SUCCESS; + +#ifdef DHT_STATISTICS + table->stats->w_access++; +#endif + + // determine destination rank and index by hash of key + determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, + table->table_size, &dest_rank, table->index, + table->index_count); + + // concatenating key with data to write entry to DHT + set_flag((char *)table->send_entry); + memcpy((char *)table->send_entry + 1, (char *)send_key, table->key_size); + /* memcpy((char *)table->send_entry + table->key_size + 1, (char *)send_data, + */ + /* table->data_size); */ + + // locking window of target rank with exclusive lock + if (MPI_Win_lock(MPI_LOCK_EXCLUSIVE, dest_rank, 0, table->window) != 0) + return DHT_MPI_ERROR; + for (i = 0; i < table->index_count; i++) { + if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, + MPI_BYTE, dest_rank, table->index[i], + 1 + table->data_size + table->key_size, MPI_BYTE, + table->window) != 0) + return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + // increment eviction counter if receiving key doesn't match sending key + // entry has write flag and last index is reached. + if (read_flag(*(char *)table->recv_entry)) { + if (memcmp(send_key, (char *)table->recv_entry + 1, table->key_size) != + 0) { + if (i == (table->index_count) - 1) { + table->evictions += 1; +#ifdef DHT_STATISTICS + table->stats->evictions += 1; +#endif + result = DHT_WRITE_SUCCESS_WITH_EVICTION; + break; + } + } else + break; + } else { +#ifdef DHT_STATISTICS + table->stats->writes_local[dest_rank]++; +#endif + break; + } + } + + if (result == DHT_WRITE_SUCCESS_WITH_EVICTION) { + memset((char *)table->send_entry + 1 + table->key_size, '\0', + table->data_size); + } else { + memcpy((char *)table->send_entry + 1 + table->key_size, + (char *)table->recv_entry + 1 + table->key_size, table->data_size); + } + + *callback_ret = table->accumulate_callback( + data_size, (char *)send_data, table->data_size, + (char *)table->send_entry + 1 + table->key_size); + + // put data to DHT (with last selected index by value i) + if (*callback_ret == 0) { + if (MPI_Put(table->send_entry, 1 + table->data_size + table->key_size, + MPI_BYTE, dest_rank, table->index[i], + 1 + table->data_size + table->key_size, MPI_BYTE, + table->window) != 0) + return DHT_MPI_ERROR; + } + // unlock window of target rank + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + if (proc) { + *proc = dest_rank; + } + + if (index) { + *index = table->index[i]; + } + + return result; +} + +int DHT_write(DHT *table, void *send_key, void *send_data, uint32_t *proc, + uint32_t *index) { unsigned int dest_rank, i; int result = DHT_SUCCESS; @@ -146,7 +253,8 @@ int DHT_write(DHT *table, void *send_key, void *send_data) { 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR; - if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // increment eviction counter if receiving key doesn't match sending key // entry has write flag and last index is reached. @@ -178,12 +286,21 @@ int DHT_write(DHT *table, void *send_key, void *send_data) { table->window) != 0) return DHT_MPI_ERROR; // unlock window of target rank - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + if (proc) { + *proc = dest_rank; + } + + if (index) { + *index = table->index[i]; + } return result; } -int DHT_read(DHT *table, void *send_key, void *destination) { +int DHT_read(DHT *table, const void *send_key, void *destination) { unsigned int dest_rank, i; #ifdef DHT_STATISTICS @@ -205,7 +322,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR; - if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // increment read error counter if write flag isn't set ... if ((read_flag(*(char *)table->recv_entry)) == 0) { @@ -214,7 +332,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { table->stats->read_misses += 1; #endif // unlock window and return - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; return DHT_READ_MISS; } @@ -227,7 +346,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { table->stats->read_misses += 1; #endif // unlock window an return - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; return DHT_READ_MISS; } } else @@ -235,7 +355,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { } // unlock window of target rank - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // if matching key was found copy data into memory of passed pointer memcpy((char *)destination, (char *)table->recv_entry + table->key_size + 1, @@ -244,6 +365,34 @@ int DHT_read(DHT *table, void *send_key, void *destination) { return DHT_SUCCESS; } +int DHT_read_location(DHT *table, uint32_t proc, uint32_t index, + void *destination) { + const uint32_t bucket_size = table->data_size + table->key_size + 1; + +#ifdef DHT_STATISTICS + table->stats->r_access++; +#endif + + // locking window of target rank with shared lock + if (MPI_Win_lock(MPI_LOCK_SHARED, proc, 0, table->window) != 0) + return DHT_MPI_ERROR; + // receive data + if (MPI_Get(table->recv_entry, bucket_size, MPI_BYTE, proc, index, + bucket_size, MPI_BYTE, table->window) != 0) { + return DHT_MPI_ERROR; + } + + // unlock window of target rank + if (MPI_Win_unlock(proc, table->window) != 0) + return DHT_MPI_ERROR; + + // if matching key was found copy data into memory of passed pointer + memcpy((char *)destination, (char *)table->recv_entry + 1 + table->key_size, + table->data_size); + + return DHT_SUCCESS; +} + int DHT_to_file(DHT *table, const char *filename) { // open file MPI_File file; @@ -257,17 +406,15 @@ int DHT_to_file(DHT *table, const char *filename) { // write header (key_size and data_size) if (rank == 0) { - if (MPI_File_write(file, &table->key_size, 1, MPI_INT, MPI_STATUS_IGNORE) != - 0) + if (MPI_File_write_shared(file, &table->key_size, 1, MPI_INT, + MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR; - if (MPI_File_write(file, &table->data_size, 1, MPI_INT, - MPI_STATUS_IGNORE) != 0) + if (MPI_File_write_shared(file, &table->data_size, 1, MPI_INT, + MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR; } - // seek file pointer behind header for all processes - if (MPI_File_seek_shared(file, DHT_FILEHEADER_SIZE, MPI_SEEK_SET) != 0) - return DHT_FILE_IO_ERROR; + MPI_Barrier(table->communicator); char *ptr; int bucket_size = table->key_size + table->data_size + 1; @@ -283,8 +430,12 @@ int DHT_to_file(DHT *table, const char *filename) { return DHT_FILE_WRITE_ERROR; } } + + MPI_Barrier(table->communicator); + // close file - if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_close(&file) != 0) + return DHT_FILE_IO_ERROR; return DHT_SUCCESS; } @@ -303,7 +454,8 @@ int DHT_from_file(DHT *table, const char *filename) { return DHT_FILE_IO_ERROR; // get file size - if (MPI_File_get_size(file, &f_size) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_get_size(file, &f_size) != 0) + return DHT_FILE_IO_ERROR; MPI_Comm_rank(table->communicator, &rank); @@ -322,8 +474,10 @@ int DHT_from_file(DHT *table, const char *filename) { return DHT_FILE_READ_ERROR; // compare if written header data and key size matches current sizes - if (*(int *)buffer != table->key_size) return DHT_WRONG_FILE; - if (*(int *)(buffer + 4) != table->data_size) return DHT_WRONG_FILE; + if (*(int *)buffer != table->key_size) + return DHT_WRONG_FILE; + if (*(int *)(buffer + 4) != table->data_size) + return DHT_WRONG_FILE; // set offset for each process offset = bucket_size * table->comm_size; @@ -348,14 +502,16 @@ int DHT_from_file(DHT *table, const char *filename) { // extract key and data and write to DHT key = buffer; data = (buffer + table->key_size); - if (DHT_write(table, key, data) == DHT_MPI_ERROR) return DHT_MPI_ERROR; + if (DHT_write(table, key, data, NULL, NULL) == DHT_MPI_ERROR) + return DHT_MPI_ERROR; // increment current position cur_pos += offset; } free(buffer); - if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_close(&file) != 0) + return DHT_FILE_IO_ERROR; return DHT_SUCCESS; } @@ -377,8 +533,10 @@ int DHT_free(DHT *table, int *eviction_counter, int *readerror_counter) { return DHT_MPI_ERROR; *readerror_counter = buf; } - if (MPI_Win_free(&(table->window)) != 0) return DHT_MPI_ERROR; - if (MPI_Free_mem(table->mem_alloc) != 0) return DHT_MPI_ERROR; + if (MPI_Win_free(&(table->window)) != 0) + return DHT_MPI_ERROR; + if (MPI_Free_mem(table->mem_alloc) != 0) + return DHT_MPI_ERROR; free(table->recv_entry); free(table->send_entry); free(table->index); @@ -407,7 +565,8 @@ int DHT_print_statistics(DHT *table) { #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" // obtaining all values from all processes in the communicator - if (rank == 0) read_misses = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + read_misses = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->read_misses, 1, MPI_INT, read_misses, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -416,7 +575,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->read_misses = 0; - if (rank == 0) evictions = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + evictions = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->evictions, 1, MPI_INT, evictions, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -425,7 +585,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->evictions = 0; - if (rank == 0) w_access = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + w_access = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->w_access, 1, MPI_INT, w_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -434,7 +595,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->w_access = 0; - if (rank == 0) r_access = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + r_access = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->r_access, 1, MPI_INT, r_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -443,13 +605,14 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->r_access = 0; - if (rank == 0) written_buckets = (int *)calloc(table->comm_size, sizeof(int)); + if (rank == 0) + written_buckets = (int *)calloc(table->comm_size, sizeof(int)); if (MPI_Reduce(table->stats->writes_local, written_buckets, table->comm_size, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR; - if (rank == 0) { // only process with rank 0 will print out results as a - // table + if (rank == 0) { // only process with rank 0 will print out results as a + // table int sum_written_buckets = 0; for (int i = 0; i < table->comm_size; i++) { diff --git a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp new file mode 100644 index 000000000..86df22d49 --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp @@ -0,0 +1,300 @@ +// Time-stamp: "Last modified 2023-08-01 13:41:57 mluebke" + +/* +** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of +** Potsdam) +** +** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam) +** +** POET is free software; you can redistribute it and/or modify it under the +** terms of the GNU General Public License as published by the Free Software +** Foundation; either version 2 of the License, or (at your option) any later +** version. +** +** POET is distributed in the hope that it will be useful, but WITHOUT ANY +** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +** A PARTICULAR PURPOSE. See the GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along with +** this program; if not, write to the Free Software Foundation, Inc., 51 +** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + +#include "poet/DHT_Wrapper.hpp" +#include "poet/DHT_Types.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/Interpolation.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +namespace poet { + +DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, + const NamedVector &key_species, + const std::vector &key_indices, + uint32_t data_count) + : key_count(key_indices.size()), data_count(data_count), + input_key_elements(key_indices), communicator(dht_comm), + key_species(key_species) { + // initialize DHT object + // key size = count of key elements + timestep + uint32_t key_size = (key_count + 1) * sizeof(Lookup_Keyelement); + uint32_t data_size = + (data_count + input_key_elements.size()) * sizeof(double); + uint32_t buckets_per_process = + static_cast(dht_size / (data_size + key_size)); + dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, + &poet::Murmur2_64A); + + dht_signif_vector = key_species.getValues(); + + // this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); + + this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); + + const auto key_names = key_species.getNames(); + + auto tot_h = std::find(key_names.begin(), key_names.end(), "H"); + if (tot_h != key_names.end()) { + this->dht_prop_type_vector[tot_h - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto tot_o = std::find(key_names.begin(), key_names.end(), "O"); + if (tot_o != key_names.end()) { + this->dht_prop_type_vector[tot_o - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto charge = std::find(key_names.begin(), key_names.end(), "Charge"); + if (charge != key_names.end()) { + this->dht_prop_type_vector[charge - key_names.begin()] = DHT_TYPE_CHARGE; + } +} + +DHT_Wrapper::~DHT_Wrapper() { + // free DHT + DHT_free(dht_object, NULL, NULL); +} +auto DHT_Wrapper::checkDHT(int length, double dt, + const std::vector &work_package, + std::vector &curr_mapping) + -> const DHT_ResultObject & { + + dht_results.length = length; + dht_results.keys.resize(length); + dht_results.results.resize(length); + dht_results.needPhreeqc.resize(length); + + std::vector bucket_writer(this->data_count + + input_key_elements.size()); + std::vector new_mapping; + + // loop over every grid cell contained in work package + for (int i = 0; i < length; i++) { + // point to current grid cell + void *key = (void *)&(work_package[i * this->data_count]); + auto &data = dht_results.results[i]; + auto &key_vector = dht_results.keys[i]; + + // data.resize(this->data_count); + key_vector = fuzzForDHT(this->key_count, key, dt); + + // overwrite input with data from DHT, IF value is found in DHT + int res = + DHT_read(this->dht_object, key_vector.data(), bucket_writer.data()); + + switch (res) { + case DHT_SUCCESS: + dht_results.results[i] = inputAndRatesToOutput(bucket_writer); + dht_results.needPhreeqc[i] = false; + this->dht_hits++; + break; + case DHT_READ_MISS: + dht_results.needPhreeqc[i] = true; + new_mapping.push_back(curr_mapping[i]); + dht_results.results[i] = std::vector{ + &work_package[i * this->data_count], + &work_package[i * this->data_count] + this->data_count}; + + // HACK: apply normalization to total H and O in results field of DHT + // dht_results.results[i][0] -= base_totals[0]; + // dht_results.results[i][1] -= base_totals[1]; + break; + } + } + + curr_mapping = std::move(new_mapping); + dht_results.old_values = work_package; + + return dht_results; +} + +void DHT_Wrapper::fillDHT(int length, const std::vector &work_package) { + + // loop over every grid cell contained in work package + dht_results.locations.resize(length); + for (int i = 0; i < length; i++) { + // If true grid cell was simulated, needs to be inserted into dht + if (dht_results.needPhreeqc[i]) { + + // check if calcite or dolomite is absent and present, resp.n and vice + // versa in input/output. If this is the case -> Do not write to DHT! + // HACK: hardcoded, should be fixed! + if ((dht_results.old_values[i * this->data_count + 7] == 0) != + (work_package[i * this->data_count + 7] == 0)) { + dht_results.needPhreeqc[i] = false; + continue; + } + + if ((dht_results.old_values[i * this->data_count + 9] == 0) != + (work_package[i * this->data_count + 9] == 0)) { + dht_results.needPhreeqc[i] = false; + continue; + } + + uint32_t proc, index; + const auto &key = dht_results.keys[i]; + const auto curr_old_data = std::vector( + dht_results.old_values.begin() + (i * this->data_count), + dht_results.old_values.begin() + ((i + 1) * this->data_count)); + const auto curr_new_data = std::vector( + work_package.begin() + (i * this->data_count), + work_package.begin() + ((i + 1) * this->data_count)); + const auto data = outputToInputAndRates(curr_old_data, curr_new_data); + // void *data = (void *)&(work_package[i * this->data_count]); + // fuzz data (round, logarithm etc.) + + // insert simulated data with fuzzed key into DHT + int res = DHT_write(this->dht_object, (void *)(key.data()), + const_cast(data.data()), &proc, &index); + + dht_results.locations[i] = {proc, index}; + + // if data was successfully written ... + if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { + dht_evictions++; + } + } + } +} + +std::vector +DHT_Wrapper::outputToInputAndRates(const std::vector &old_results, + const std::vector &new_results) { + const int prefix_size = this->input_key_elements.size(); + + std::vector output(prefix_size + this->data_count); + std::copy(new_results.begin(), new_results.end(), + output.begin() + prefix_size); + + for (int i = 0; i < prefix_size; i++) { + const int data_elem_i = input_key_elements[i]; + output[i] = old_results[data_elem_i]; + output[prefix_size + data_elem_i] -= old_results[data_elem_i]; + } + + return output; +} + +std::vector +DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data) { + const int prefix_size = this->input_key_elements.size(); + + std::vector output{dht_data.begin() + prefix_size, dht_data.end()}; + + for (int i = 0; i < prefix_size; i++) { + const int data_elem_i = input_key_elements[i]; + output[data_elem_i] += dht_data[i]; + } + + return output; +} + +void DHT_Wrapper::resultsToWP(std::vector &work_package) { + for (int i = 0; i < dht_results.length; i++) { + if (!dht_results.needPhreeqc[i]) { + std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), + work_package.begin() + (data_count * i)); + } + } +} + +int DHT_Wrapper::tableToFile(const char *filename) { + int res = DHT_to_file(dht_object, filename); + return res; +} + +int DHT_Wrapper::fileToTable(const char *filename) { + int res = DHT_from_file(dht_object, filename); + if (res != DHT_SUCCESS) + return res; + +#ifdef DHT_STATISTICS + DHT_print_statistics(dht_object); +#endif + + return DHT_SUCCESS; +} + +void DHT_Wrapper::printStatistics() { + int res; + + res = DHT_print_statistics(dht_object); + + if (res != DHT_SUCCESS) { + // MPI ERROR ... WHAT TO DO NOW? + // RUNNING CIRCLES WHILE SCREAMING + } +} + +LookupKey DHT_Wrapper::fuzzForDHT(int var_count, void *key, double dt) { + const auto c_zero_val = std::pow(10, AQUEOUS_EXP); + + const Lookup_Keyelement dummy = {.0}; + LookupKey vecFuzz(var_count + 1, dummy); + DHT_Rounder rounder; + + int totals_i = 0; + // introduce fuzzing to allow more hits in DHT + // loop over every variable of grid cell + for (std::uint32_t i = 0; i < input_key_elements.size(); i++) { + if (input_key_elements[i] == DHT_KEY_INPUT_CUSTOM) { + continue; + } + double curr_key = ((double *)key)[input_key_elements[i]]; + if (curr_key != 0) { + if (curr_key < c_zero_val && + this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { + continue; + } + if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { + curr_key -= base_totals[totals_i++]; + } + vecFuzz[i] = + rounder.round(curr_key, dht_signif_vector[i], + this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); + } + } + // add timestep to the end of the key as double value + vecFuzz[var_count].fp_element = dt; + + return vecFuzz; +} + +void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) { + if (signif_vec.size() != this->key_count) { + throw std::runtime_error( + "Significant vector size mismatches count of key elements."); + } + + this->dht_signif_vector = signif_vec; +} +} // namespace poet diff --git a/src/ChemistryModule/HashFunctions.cpp b/src/ChemistryModule/SurrogateModels/HashFunctions.cpp similarity index 97% rename from src/ChemistryModule/HashFunctions.cpp rename to src/ChemistryModule/SurrogateModels/HashFunctions.cpp index b2739aea6..431cd286f 100644 --- a/src/ChemistryModule/HashFunctions.cpp +++ b/src/ChemistryModule/SurrogateModels/HashFunctions.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-04-24 16:56:18 mluebke" +// Time-stamp: "Last modified 2023-04-24 23:20:55 mluebke" /* **----------------------------------------------------------------------------- ** MurmurHash2 was written by Austin Appleby, and is placed in the public diff --git a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp new file mode 100644 index 000000000..0d139ac3e --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp @@ -0,0 +1,197 @@ +// Time-stamp: "Last modified 2023-08-01 17:54:53 mluebke" + +#include "poet/DHT_Wrapper.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/Interpolation.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +extern "C" { +#include "poet/DHT.h" +} + +namespace poet { + +InterpolationModule::InterpolationModule( + std::uint32_t entries_per_bucket, std::uint64_t size_per_process, + std::uint32_t min_entries_needed, DHT_Wrapper &dht, + const NamedVector &interp_key_signifs, + const std::vector &dht_key_indices) + : dht_instance(dht), key_signifs(interp_key_signifs), + key_indices(dht_key_indices), min_entries_needed(min_entries_needed) { + + initPHT(this->key_signifs.size(), entries_per_bucket, size_per_process, + dht.getCommunicator()); + + pht->setSourceDHT(dht.getDHT()); +} + +void InterpolationModule::initPHT(std::uint32_t key_count, + std::uint32_t entries_per_bucket, + std::uint32_t size_per_process, + MPI_Comm communicator) { + uint32_t key_size = key_count * sizeof(Lookup_Keyelement) + sizeof(double); + uint32_t data_size = sizeof(DHT_Location); + + pht = std::make_unique( + key_size, data_size, entries_per_bucket, size_per_process, communicator); +} + +void InterpolationModule::writePairs(const DHT_Wrapper::DHT_ResultObject &in) { + for (int i = 0; i < in.length; i++) { + if (in.needPhreeqc[i]) { + const auto coarse_key = roundKey(in.keys[i]); + pht->writeLocationToPHT(coarse_key, in.locations[i]); + } + } +} + +void InterpolationModule::tryInterpolation( + DHT_Wrapper::DHT_ResultObject &dht_results, + std::vector &curr_mapping) { + interp_result.status.resize(dht_results.length, NOT_NEEDED); + interp_result.results.resize(dht_results.length, {}); + + for (int i = 0; i < dht_results.length; i++) { + if (!dht_results.needPhreeqc[i]) { + interp_result.status[i] = NOT_NEEDED; + continue; + } + + auto pht_result = + pht->query(roundKey(dht_results.keys[i]), this->key_signifs.getValues(), + this->min_entries_needed, dht_instance.getInputCount(), + dht_instance.getOutputCount()); + + int pht_i = 0; + + while (pht_i < pht_result.size) { + if (pht_result.size < this->min_entries_needed) { + break; + } + + auto in_it = pht_result.in_values.begin() + pht_i; + auto out_it = pht_result.out_values.begin() + pht_i; + + bool same_sig_calcite = (pht_result.in_values[pht_i][7] == 0) == + (dht_results.results[i][7] == 0); + bool same_sig_dolomite = (pht_result.in_values[pht_i][8] == 0) == + (dht_results.results[i][9] == 0); + if (!same_sig_calcite || !same_sig_dolomite) { + pht_result.size -= 1; + pht_result.in_values.erase(in_it); + pht_result.out_values.erase(out_it); + continue; + } + + pht_i += 1; + } + + if (pht_result.size < this->min_entries_needed) { + interp_result.status[i] = INSUFFICIENT_DATA; + continue; + } + +#ifdef POET_PHT_ADD + this->pht->incrementReadCounter(roundKey(dht_results.keys[i])); +#endif + + double start_fc = MPI_Wtime(); + // mean water + // double mean_water = 0; + // for (int out_i = 0; out_i < pht_result.size; out_i++) { + // mean_water += pht_result.out_values[out_i][0]; + // } + // mean_water /= pht_result.size; + + auto calcMassBalance = [](const std::vector &input) { + double C, Ca, Mg; + C = input[3] + input[7] + 2 * input[9]; + Ca = input[4] + input[7] + input[9]; + Mg = input[6] + input[9]; + + return std::array{C, Ca, Mg}; + }; + + auto mass_in = calcMassBalance(dht_results.results[i]); + + const auto DHT_inputElements = dht_instance.getKeyElements(); + + // HACK: transform input elements to uint, as there shall no any user + // defined key species present yet + + // std::vector interp_inputElements; + // std::transform(DHT_inputElements.begin(), DHT_inputElements.end(), + // interp_inputElements.begin(), [](std::int32_t x) { + // if (x < 0) { + // x = 0; + // } + // return x; + // }); + + interp_result.results[i] = + f_interpolate(dht_instance.getKeyElements(), dht_results.results[i], + pht_result.in_values, pht_result.out_values); + + auto mass_out = calcMassBalance(interp_result.results[i]); + + double diff[3]; + + std::transform(mass_in.begin(), mass_in.end(), mass_out.begin(), diff, + std::minus<>()); + + bool exceeding = false; + for (const auto comp : diff) { + if (std::abs(comp) > 1e-10) { + exceeding = true; + break; + } + } + + if (exceeding) { + interp_result.status[i] = INSUFFICIENT_DATA; + continue; + } + + if (interp_result.results[i][7] < 0 || interp_result.results[i][9] < 0) { + interp_result.status[i] = INSUFFICIENT_DATA; + continue; + } + + // interp_result.results[i][0] = mean_water; + this->interp_t += MPI_Wtime() - start_fc; + + this->interpolations++; + + curr_mapping.erase(std::remove(curr_mapping.begin(), curr_mapping.end(), i), + curr_mapping.end()); + + dht_results.needPhreeqc[i] = false; + interp_result.status[i] = RES_OK; + } +} + +void InterpolationModule::resultsToWP(std::vector &work_package) const { + for (uint32_t i = 0; i < interp_result.status.size(); i++) { + if (interp_result.status[i] == RES_OK) { + const std::size_t length = + interp_result.results[i].end() - interp_result.results[i].begin(); + std::copy(interp_result.results[i].begin(), + interp_result.results[i].end(), + work_package.begin() + (length * i)); + } + } +} + +} // namespace poet diff --git a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp new file mode 100644 index 000000000..e0f014755 --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp @@ -0,0 +1,275 @@ +// Time-stamp: "Last modified 2023-08-01 17:11:42 mluebke" + +#include "poet/DHT_Wrapper.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/Interpolation.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" +#include +#include +#include +#include +#include +#include +#include +extern "C" { +#include "poet/DHT.h" +} + +namespace poet { + +ProximityHashTable::ProximityHashTable(uint32_t key_size, uint32_t data_size, + uint32_t entry_count, + uint32_t size_per_process, + MPI_Comm communicator_) + : communicator(communicator_) { + + data_size *= entry_count; + data_size += sizeof(bucket_indicator); + +#ifdef POET_PHT_ADD + data_size += sizeof(std::uint64_t); +#endif + + bucket_store = new char[data_size]; + + uint32_t buckets_per_process = + static_cast(size_per_process / (data_size + key_size)); + + this->prox_ht = DHT_create(communicator, buckets_per_process, data_size, + key_size, &poet::Murmur2_64A); + + DHT_set_accumulate_callback(this->prox_ht, PHT_callback_function); +} + +ProximityHashTable::~ProximityHashTable() { + delete[] bucket_store; + if (prox_ht) { + DHT_free(prox_ht, NULL, NULL); + } +} + +int ProximityHashTable::PHT_callback_function(int in_data_size, void *in_data, + int out_data_size, + void *out_data) { + const int max_elements_per_bucket = + static_cast((out_data_size - sizeof(bucket_indicator) +#ifdef POET_PHT_ADD + - sizeof(std::uint64_t) +#endif + ) / + in_data_size); + DHT_Location *input = reinterpret_cast(in_data); + + bucket_indicator *occupied_buckets = + reinterpret_cast(out_data); + DHT_Location *pairs = reinterpret_cast(occupied_buckets + 1); + + if (*occupied_buckets == max_elements_per_bucket) { + return INTERP_CB_FULL; + } + + for (bucket_indicator i = 0; i < *occupied_buckets; i++) { + if (pairs[i] == *input) { + return INTERP_CB_ALREADY_IN; + } + } + + pairs[(*occupied_buckets)++] = *input; + + return INTERP_CB_OK; +} + +void ProximityHashTable::writeLocationToPHT(LookupKey key, + DHT_Location location) { + + double start = MPI_Wtime(); + + // if (localCache[key].first) { + // return; + // } + + int ret_val; + + int status = DHT_write_accumulate(prox_ht, key.data(), sizeof(location), + &location, NULL, NULL, &ret_val); + + if (status == DHT_WRITE_SUCCESS_WITH_EVICTION) { + this->dht_evictions++; + } + + // if (ret_val == INTERP_CB_FULL) { + // localCache(key, {}); + // } + + this->pht_write_t += MPI_Wtime() - start; +} + +const ProximityHashTable::PHT_Result &ProximityHashTable::query( + const LookupKey &key, const std::vector &signif, + std::uint32_t min_entries_needed, std::uint32_t input_count, + std::uint32_t output_count) { + + double start_r = MPI_Wtime(); + const auto cache_ret = localCache[key]; + if (cache_ret.first) { + cache_hits++; + return (lookup_results = cache_ret.second); + } + + int res = DHT_read(prox_ht, key.data(), bucket_store); + this->pht_read_t += MPI_Wtime() - start_r; + + if (res != DHT_SUCCESS) { + this->lookup_results.size = 0; + return lookup_results; + } + + auto *bucket_element_count = + reinterpret_cast(bucket_store); + auto *bucket_elements = + reinterpret_cast(bucket_element_count + 1); + + if (*bucket_element_count < min_entries_needed) { + this->lookup_results.size = 0; + return lookup_results; + } + + lookup_results.size = *bucket_element_count; + auto locations = std::vector( + bucket_elements, bucket_elements + *(bucket_element_count)); + + lookup_results.in_values.clear(); + lookup_results.in_values.reserve(*bucket_element_count); + + lookup_results.out_values.clear(); + lookup_results.out_values.reserve(*bucket_element_count); + + for (const auto &loc : locations) { + double start_g = MPI_Wtime(); + DHT_read_location(source_dht, loc.first, loc.second, dht_buffer.data()); + this->pht_gather_dht_t += MPI_Wtime() - start_g; + + auto *buffer = reinterpret_cast(dht_buffer.data()); + + lookup_results.in_values.push_back( + std::vector(buffer, buffer + input_count)); + + buffer += input_count; + lookup_results.out_values.push_back( + std::vector(buffer, buffer + output_count)); + + // if (!similarityCheck(check_key, key, signif)) { + // // TODO: original stored location in PHT was overwritten in DHT. + // Need to + // // handle this! + // lookup_results.size--; + // if (lookup_results.size < min_entries_needed) { + // lookup_results.size = 0; + // break; + // } + // continue; + // } + + // auto input = convertKeysFromDHT(buffer_start, dht_key_count); + // // remove timestep from the key + // input.pop_back(); + // lookup_results.in_keys.push_back(input); + + // auto *data = reinterpret_cast(buffer + dht_key_count); + // lookup_results.out_values.push_back( + // std::vector(data, data + dht_data_count)); + } + + if (lookup_results.size != 0) { + localCache(key, lookup_results); + } + + return lookup_results; +} + +inline bool +ProximityHashTable::similarityCheck(const LookupKey &fine, + const LookupKey &coarse, + const std::vector &signif) { + + PHT_Rounder rounder; + + for (int i = 0; i < signif.size(); i++) { + if (!(rounder.round(fine[i], signif[i]) == coarse[i])) { + return false; + } + } + + return true; +} + +inline std::vector +ProximityHashTable::convertKeysFromDHT(Lookup_Keyelement *keys_in, + std::uint32_t key_size) { + std::vector output(key_size); + DHT_Rounder rounder; + for (int i = 0; i < key_size; i++) { + output[i] = rounder.convert(keys_in[i]); + } + + return output; +} + +void ProximityHashTable::Cache::operator()(const LookupKey &key, + const PHT_Result val) { + const auto elemIt = this->find(key); + + if (elemIt == this->end()) { + + if (this->free_mem < 0) { + const LookupKey &to_del = this->lru_queue.back(); + const auto elem_d = this->find(to_del); + this->free_mem += elem_d->second.getSize(); + this->erase(to_del); + this->keyfinder.erase(to_del); + this->lru_queue.pop_back(); + } + + this->insert({key, val}); + this->lru_queue.emplace_front(key); + this->keyfinder[key] = lru_queue.begin(); + this->free_mem -= val.getSize(); + return; + } + + elemIt->second = val; +} + +std::pair +ProximityHashTable::Cache::operator[](const LookupKey &key) { + const auto elemIt = this->find(key); + + if (elemIt == this->end()) { + return {false, {}}; + } + + this->lru_queue.splice(lru_queue.begin(), lru_queue, this->keyfinder[key]); + return {true, elemIt->second}; +} + +#ifdef POET_PHT_ADD +static int PHT_increment_counter(int in_data_size, void *in_data, + int out_data_size, void *out_data) { + char *start = reinterpret_cast(out_data); + std::uint64_t *counter = reinterpret_cast( + start + out_data_size - sizeof(std::uint64_t)); + *counter += 1; + + return 0; +} + +void ProximityHashTable::incrementReadCounter(const LookupKey &key) { + auto *old_func_ptr = this->prox_ht->accumulate_callback; + DHT_set_accumulate_callback(prox_ht, PHT_increment_counter); + int ret, dummy; + DHT_write_accumulate(prox_ht, key.data(), 0, NULL, NULL, NULL, &ret); + DHT_set_accumulate_callback(prox_ht, old_func_ptr); +} +#endif +} // namespace poet diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/ChemistryModule/WorkerFunctions.cpp index 4f5a59fa0..a87b07a86 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/ChemistryModule/WorkerFunctions.cpp @@ -1,10 +1,13 @@ -// Time-stamp: "Last modified 2023-07-12 12:56:17 mluebke" +// Time-stamp: "Last modified 2023-08-01 17:22:20 mluebke" #include "IrmResult.h" #include "poet/ChemistryModule.hpp" +#include "poet/DHT_Wrapper.hpp" +#include "poet/Interpolation.hpp" #include -#include +#include +#include #include #include #include @@ -15,6 +18,147 @@ #include #include +namespace poet { + +//std::vector +//inverseDistanceWeighting(const std::vector &to_calc, +// const std::vector &from, +// const std::vector> &input, +// const std::vector> &output) { +// std::vector results = from; +// +// const std::uint32_t buffer_size = input.size() + 1; +// double buffer[buffer_size]; +// double from_rescaled; +// +// const std::uint32_t data_set_n = input.size(); +// double rescaled[to_calc.size()][data_set_n + 1]; +// double weights[data_set_n]; +// +// // rescaling over all key elements +// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { +// const auto output_comp_i = to_calc[key_comp_i]; +// +// // rescale input between 0 and 1 +// for (int point_i = 0; point_i < data_set_n; point_i++) { +// rescaled[key_comp_i][point_i] = input[point_i][key_comp_i]; +// } +// +// rescaled[key_comp_i][data_set_n] = from[output_comp_i]; +// +// const double min = *std::min_element(rescaled[key_comp_i], +// rescaled[key_comp_i] + data_set_n + 1); +// const double max = *std::max_element(rescaled[key_comp_i], +// rescaled[key_comp_i] + data_set_n + 1); +// +// for (int point_i = 0; point_i < data_set_n; point_i++) { +// rescaled[key_comp_i][point_i] = +// ((max - min) != 0 +// ? (rescaled[key_comp_i][point_i] - min) / (max - min) +// : 0); +// } +// rescaled[key_comp_i][data_set_n] = +// ((max - min) != 0 ? (from[output_comp_i] - min) / (max - min) : 0); +// } +// +// // calculate distances for each data set +// double inv_sum = 0; +// for (int point_i = 0; point_i < data_set_n; point_i++) { +// double distance = 0; +// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { +// distance += std::pow( +// rescaled[key_comp_i][point_i] - rescaled[key_comp_i][data_set_n], 2); +// } +// weights[point_i] = 1 / std::sqrt(distance); +// assert(!std::isnan(weights[point_i])); +// inv_sum += weights[point_i]; +// } +// +// assert(!std::isnan(inv_sum)); +// +// // actual interpolation +// // bool has_h = false; +// // bool has_o = false; +// +// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { +// const auto output_comp_i = to_calc[key_comp_i]; +// double key_delta = 0; +// +// // if (interp_i == 0) { +// // has_h = true; +// // } +// +// // if (interp_i == 1) { +// // has_o = true; +// // } +// +// for (int j = 0; j < data_set_n; j++) { +// key_delta += weights[j] * input[j][key_comp_i]; +// } +// +// key_delta /= inv_sum; +// +// results[output_comp_i] = from[output_comp_i] + key_delta; +// } +// +// // if (!has_h) { +// // double new_val = 0; +// // for (int j = 0; j < data_set_n; j++) { +// // new_val += weights[j] * output[j][0]; +// // } +// // results[0] = new_val / inv_sum; +// // } +// +// // if (!has_h) { +// // double new_val = 0; +// // for (int j = 0; j < data_set_n; j++) { +// // new_val += weights[j] * output[j][1]; +// // } +// // results[1] = new_val / inv_sum; +// // } +// +// // for (std::uint32_t i = 0; i < to_calc.size(); i++) { +// // const std::uint32_t interp_i = to_calc[i]; +// +// // // rescale input between 0 and 1 +// // for (int j = 0; j < input.size(); j++) { +// // buffer[j] = input[j].at(i); +// // } +// +// // buffer[buffer_size - 1] = from[interp_i]; +// +// // const double min = *std::min_element(buffer, buffer + buffer_size); +// // const double max = *std::max_element(buffer, buffer + buffer_size); +// +// // for (int j = 0; j < input.size(); j++) { +// // buffer[j] = ((max - min) != 0 ? (buffer[j] - min) / (max - min) : 1); +// // } +// // from_rescaled = +// // ((max - min) != 0 ? (from[interp_i] - min) / (max - min) : 0); +// +// // double inv_sum = 0; +// +// // // calculate distances for each point +// // for (int i = 0; i < input.size(); i++) { +// // const double distance = std::pow(buffer[i] - from_rescaled, 2); +// +// // buffer[i] = distance > 0 ? (1 / std::sqrt(distance)) : 0; +// // inv_sum += buffer[i]; +// // } +// // // calculate new values +// // double new_val = 0; +// // for (int i = 0; i < output.size(); i++) { +// // new_val += buffer[i] * output[i][interp_i]; +// // } +// // results[interp_i] = new_val / inv_sum; +// // if (std::isnan(results[interp_i])) { +// // std::cout << "nan with new_val = " << output[0][i] << std::endl; +// // } +// // } +// +// return results; +//} + inline std::string get_string(int root, MPI_Comm communicator) { int count; MPI_Bcast(&count, 1, MPI_INT, root, communicator); @@ -52,43 +196,6 @@ void poet::ChemistryModule::WorkerLoop() { initializeField(dummy); break; } - case CHEM_DHT_ENABLE: { - bool enable; - ChemBCast(&enable, 1, MPI_CXX_BOOL); - - uint32_t size_mb; - ChemBCast(&size_mb, 1, MPI_UINT32_T); - - std::vector name_dummy; - - SetDHTEnabled(enable, size_mb, name_dummy); - break; - } - case CHEM_DHT_SIGNIF_VEC: { - std::vector input_vec; - - SetDHTSignifVector(input_vec); - break; - } - case CHEM_DHT_PROP_TYPE_VEC: { - std::vector input_vec(this->prop_count); - ChemBCast(input_vec.data(), this->prop_count, MPI_UINT32_T); - - SetDHTPropTypeVector(input_vec); - break; - } - case CHEM_DHT_SNAPS: { - int type; - ChemBCast(&type, 1, MPI_INT); - - SetDHTSnaps(type, get_string(0, this->group_comm)); - - break; - } - case CHEM_DHT_READ_FILE: { - ReadDHTFile(get_string(0, this->group_comm)); - break; - } case CHEM_WORK_LOOP: { WorkerProcessPkgs(timings, iteration); break; @@ -103,12 +210,6 @@ void poet::ChemistryModule::WorkerLoop() { WorkerMetricsToMaster(type); break; } - case CHEM_PROGRESSBAR: { - bool enable; - ChemBCast(&enable, 1, MPI_CXX_BOOL); - setProgressBarPrintout(enable); - break; - } case CHEM_BREAK_MAIN_LOOP: { WorkerPostSim(iteration); loop = false; @@ -211,7 +312,9 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, dht->checkDHT(local_work_package_size, dt, vecCurrWP, vecMappingWP); dht_get_end = MPI_Wtime(); - // DHT_Results.ResultsToMapping(vecMappingWP); + if (interp_enabled) { + interp->tryInterpolation(dht->getDHTResults(), vecMappingWP); + } } phreeqc_time_start = MPI_Wtime(); @@ -225,6 +328,9 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, if (dht_enabled) { dht->resultsToWP(vecCurrWP); + if (interp_enabled) { + interp->resultsToWP(vecCurrWP); + } } /* send results to master */ @@ -238,6 +344,10 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, dht->fillDHT(local_work_package_size, vecCurrWP); dht_fill_end = MPI_Wtime(); + if (interp_enabled) { + interp->writePairs(dht->getDHTResults()); + } + timings.dht_get += dht_get_end - dht_get_start; timings.dht_fill += dht_fill_end - dht_fill_start; } @@ -252,23 +362,44 @@ void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, MPI_STATUS_IGNORE); if (this->dht_enabled) { - this->dht->printStatistics(); + dht_hits.push_back(dht->getHits()); + dht_evictions.push_back(dht->getEvictions()); + dht->resetCounter(); - if (this->dht_snaps_type == DHT_FILES_ITEREND) { + if (this->interp_enabled) { + interp_calls.push_back(interp->getInterpolationCount()); + interp->resetCounter(); + interp->writePHTStats(); + } + + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); + + if (this->interp_enabled) { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } } } } void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { - if (this->dht_enabled && this->dht_snaps_type == DHT_FILES_SIMEND) { + if (this->dht_enabled && this->dht_snaps_type == DHT_SNAPS_SIMEND) { WorkerWriteDHTDump(iteration); + if (this->interp_enabled) { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } } } void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { std::stringstream out; - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') << std::setw(3) - << iteration << ".dht"; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".dht"; int res = dht->tableToFile(out.str().c_str()); if (res != DHT_SUCCESS && this->comm_rank == 2) std::cerr @@ -355,6 +486,26 @@ void poet::ChemistryModule::WorkerPerfToMaster(int type, this->group_comm); break; } + case WORKER_IP_WRITE: { + double val = interp->getPHTWriteTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_READ: { + double val = interp->getPHTReadTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_GATHER: { + double val = interp->getDHTGatherTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_FC: { + double val = interp->getInterpolationTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } default: { throw std::runtime_error("Unknown perf type in master's message."); } @@ -362,24 +513,46 @@ void poet::ChemistryModule::WorkerPerfToMaster(int type, } void poet::ChemistryModule::WorkerMetricsToMaster(int type) { - uint32_t value; + MPI_Comm worker_comm = dht->getCommunicator(); + int worker_rank; + MPI_Comm_rank(worker_comm, &worker_rank); + + MPI_Comm &group_comm = this->group_comm; + + auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm]( + std::vector &send_buffer, int tag) { + std::vector to_master(send_buffer.size()); + MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(), + MPI_UINT32_T, MPI_SUM, 0, worker_comm); + + if (worker_rank == 0) { + MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag, + group_comm); + } + }; + switch (type) { case WORKER_DHT_HITS: { - value = dht->getHits(); - break; - } - case WORKER_DHT_MISS: { - value = dht->getMisses(); + reduce_and_send(dht_hits, WORKER_DHT_HITS); break; } case WORKER_DHT_EVICTIONS: { - value = dht->getEvictions(); + reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS); break; } + case WORKER_IP_CALLS: { + reduce_and_send(interp_calls, WORKER_IP_CALLS); + return; + } + case WORKER_PHT_CACHE_HITS: { + std::vector input = this->interp->getPHTLocalCacheHits(); + reduce_and_send(input, WORKER_PHT_CACHE_HITS); + return; + } default: { throw std::runtime_error("Unknown perf type in master's message."); } } - MPI_Gather(&value, 1, MPI_UINT32_T, NULL, 1, MPI_UINT32_T, 0, - this->group_comm); } + +} // namespace poet diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 8d484a54b..80c0015b5 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -82,20 +82,20 @@ poet::DiffusionParams::s_DiffusionParams(RInside &R) { Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_index")); } -poet::ChemistryParams::s_ChemistryParams(RInside &R) { +void poet::ChemistryParams::initFromR(RInsidePOET &R) { this->database_path = Rcpp::as(R.parseEval("mysetup$chemistry$database")); this->input_script = Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); - if (Rcpp::as( - R.parseEval("'dht_species' %in% names(mysetup$chemistry)"))) { - this->dht_species = Rcpp::as>( - R.parseEval("mysetup$chemistry$dht_species")); + + if (R.checkIfExists("dht_species", "mysetup$chemistry")) { + this->dht_signifs = + R.wrapNamedVector("mysetup$chemistry$dht_species"); } - if (Rcpp::as( - R.parseEval("'dht_signif' %in% names(mysetup$chemistry)"))) { - this->dht_signif = Rcpp::as>( - R.parseEval("mysetup$chemistry$dht_signif")); + + if (R.checkIfExists("pht_species", "mysetup$chemistry")) { + this->pht_signifs = + R.wrapNamedVector("mysetup$chemistry$pht_species"); } } @@ -104,7 +104,7 @@ SimParams::SimParams(int world_rank_, int world_size_) { this->simparams.world_size = world_size_; } -int SimParams::parseFromCmdl(char *argv[], RInside &R) { +int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { // initialize argh object argh::parser cmdl(argv); @@ -144,32 +144,26 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { simparams.print_progressbar = cmdl[{"P", "progress"}]; /*Parse DHT arguments*/ - simparams.dht_enabled = cmdl["dht"]; + chem_params.use_dht = cmdl["dht"]; // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; - if (simparams.dht_enabled) { - cmdl("dht-strategy", 0) >> simparams.dht_strategy; - // cout << "CPP: DHT strategy is " << dht_strategy << endl; - - cmdl("dht-signif", 5) >> simparams.dht_significant_digits; - // cout << "CPP: DHT significant digits = " << dht_significant_digits << - // endl; - - simparams.dht_log = !(cmdl["dht-nolog"]); - // cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" - // : "OFF" ) << endl; - - cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> simparams.dht_size_per_process; + if (chem_params.use_dht) { + cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> chem_params.dht_size; // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << // endl; - cmdl("dht-snaps", 0) >> simparams.dht_snaps; + cmdl("dht-snaps", 0) >> chem_params.dht_snaps; - cmdl("dht-file") >> dht_file; + cmdl("dht-file") >> chem_params.dht_file; } /*Parse work package size*/ cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; + chem_params.use_interp = cmdl["interp"]; + cmdl("interp-size", 100) >> chem_params.pht_size; + cmdl("interp-min", 5) >> chem_params.interp_min_entries; + cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; + /*Parse output options*/ simparams.store_result = !cmdl["ignore-result"]; @@ -177,25 +171,27 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { cout << "CPP: Complete results storage is " << (simparams.store_result ? "ON" : "OFF") << endl; cout << "CPP: Work Package Size: " << simparams.wp_size << endl; - cout << "CPP: DHT is " << (simparams.dht_enabled ? "ON" : "OFF") << '\n'; + cout << "CPP: DHT is " << (chem_params.use_dht ? "ON" : "OFF") << '\n'; - if (simparams.dht_enabled) { + if (chem_params.use_dht) { cout << "CPP: DHT strategy is " << simparams.dht_strategy << endl; cout << "CPP: DHT key default digits (ignored if 'signif_vector' is " "defined) = " << simparams.dht_significant_digits << endl; cout << "CPP: DHT logarithm before rounding: " << (simparams.dht_log ? "ON" : "OFF") << endl; - cout << "CPP: DHT size per process (Byte) = " - << simparams.dht_size_per_process << endl; - cout << "CPP: DHT save snapshots is " << simparams.dht_snaps << endl; - cout << "CPP: DHT load file is " << dht_file << endl; + cout << "CPP: DHT size per process (Byte) = " << chem_params.dht_size + << endl; + cout << "CPP: DHT save snapshots is " << chem_params.dht_snaps << endl; + cout << "CPP: DHT load file is " << chem_params.dht_file << endl; } } cmdl(1) >> filesim; cmdl(2) >> out_dir; + chem_params.dht_outdir = out_dir; + /* distribute information to R runtime */ // if local_rank == 0 then master else worker R["local_rank"] = simparams.world_rank; @@ -210,65 +206,20 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { // work package size R["work_package_size"] = simparams.wp_size; // dht enabled? - R["dht_enabled"] = simparams.dht_enabled; + R["dht_enabled"] = chem_params.use_dht; // log before rounding? R["dht_log"] = simparams.dht_log; // eval the init string, ignoring any returns R.parseEvalQ("source(filesim)"); + R.parseEvalQ("mysetup <- setup"); + + this->chem_params.initFromR(R); return poet::PARSER_OK; } -void SimParams::initVectorParams(RInside &R) { - if (simparams.dht_enabled) { - /*Load significance vector from R setup file (or set default)*/ - bool signif_vector_exists = R.parseEval("exists('signif_vector')"); - if (signif_vector_exists) { - dht_signif_vector = as>(R["signif_vector"]); - } - - /*Load property type vector from R setup file (or set default)*/ - bool prop_type_vector_exists = R.parseEval("exists('prop_type')"); - if (prop_type_vector_exists) { - std::vector prop_type_R = - as>(R["prop_type"]); - this->dht_prop_type_vector.clear(); - this->dht_prop_type_vector.reserve(prop_type_R.size()); - - for (const auto &type : prop_type_R) { - if (type == "act") { - this->dht_prop_type_vector.push_back(DHT_TYPE_CHARGE); - continue; - } - - if (type == "ignore") { - this->dht_prop_type_vector.push_back(DHT_TYPE_IGNORE); - continue; - } - - this->dht_prop_type_vector.push_back(DHT_TYPE_DEFAULT); - } - } - - if (simparams.world_rank == 0) { - // MDL: new output on signif_vector and prop_type - if (signif_vector_exists) { - cout << "CPP: using problem-specific rounding digits: " << endl; - R.parseEval("print(data.frame(prop=prop, type=prop_type, " - "digits=signif_vector))"); - } else { - cout << "CPP: using DHT default rounding digits = " - << simparams.dht_significant_digits << endl; - } - - // MDL: pass to R the DHT stuff. These variables exist - // only if dht_enabled is true - R["dht_final_signif"] = dht_signif_vector; - R["dht_final_proptype"] = dht_prop_type_vector; - } - } -} +void SimParams::initVectorParams(RInside &R) {} std::list SimParams::validateOptions(argh::parser cmdl) { /* store all unknown parameters here */ diff --git a/test/testRounding.cpp b/test/testRounding.cpp new file mode 100644 index 000000000..06deef193 --- /dev/null +++ b/test/testRounding.cpp @@ -0,0 +1,79 @@ +#include + +#include "poet/Rounding.hpp" + +TEST_CASE("Rounding") { + constexpr int signif = 3; + + poet::DHT_Rounder rounder; + + SUBCASE("+exp - +sig") { + double input = 1.2345; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("+exp - -sig") { + double input = -1.2345; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - +sig") { + double input = 1.23456789E-5; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, -5); + } + + SUBCASE("-exp - -sig") { + double input = -1.23456789E-5; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, -5); + } + + SUBCASE("-exp - +sig (exceeding aqueous minimum)") { + double input = 1.23456789E-14; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 0); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - -sig (exceeding aqueous minimum)") { + double input = -1.23456789E-14; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 0); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - +sig (diff exceeds aqueous minimum)") { + double input = 1.23456789E-13; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 1); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - -sig (diff exceeds aqueous minimum)") { + double input = -1.23456789E-13; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -1); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - +sig (diff exceeds aqueous minimum - but H or O)") { + double input = 1.23456789E-13; + auto rounded = rounder.round(input, signif, true); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - -sig (diff exceeds aqueous minimum - but H or O)") { + double input = -1.23456789E-13; + auto rounded = rounder.round(input, signif, true); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, -13); + } +} diff --git a/util/data_evaluation/RFun_Eval.R b/util/data_evaluation/RFun_Eval.R index ed0068689..8243b8c05 100644 --- a/util/data_evaluation/RFun_Eval.R +++ b/util/data_evaluation/RFun_Eval.R @@ -1,10 +1,11 @@ ## Simple library of functions to assess and visualize the results of the coupled simulations -## Time-stamp: "Last modified 2023-04-24 16:09:55 mluebke" +## Time-stamp: "Last modified 2023-05-29 13:51:21 mluebke" require(RedModRphree) require(Rmufits) ## essentially for PlotCartCellData require(Rcpp) +require(stringr) curdir <- dirname(sys.frame(1)$ofile) ##path.expand(".") @@ -16,13 +17,18 @@ ConvertDHTKey <- function(value) { rcpp_key_convert(value) } +ConvertToUInt64 <- function(double_data) { + rcpp_uint64_convert(double_data) +} + ## function which reads all simulation results in a given directory ReadRTSims <- function(dir) { files_full <- list.files(dir, pattern="iter.*rds", full.names=TRUE) files_name <- list.files(dir, pattern="iter.*rds", full.names=FALSE) res <- lapply(files_full, readRDS) names(res) <- gsub(".rds","",files_name, fixed=TRUE) - return(res) + + return(res[str_sort(names(res), numeric = TRUE)]) } ## function which reads all successive DHT stored in a given directory @@ -68,6 +74,61 @@ ReadDHT <- function(file, new_scheme = TRUE) { return(res) } +## function which reads all successive DHT stored in a given directory +ReadAllPHT <- function(dir, with_info = FALSE) { + files_full <- list.files(dir, pattern="iter.*pht", full.names=TRUE) + files_name <- list.files(dir, pattern="iter.*pht", full.names=FALSE) + res <- lapply(files_full, ReadPHT, with_info = with_info) + names(res) <- gsub(".pht","",files_name, fixed=TRUE) + return(res) +} + +## function which reads one .dht file and gives a matrix +ReadPHT <- function(file, with_info = FALSE) { + conn <- file(file, "rb") ## open for reading in binary mode + if (!isSeekable(conn)) + stop("Connection not seekable") + + ## we first reposition ourselves to the end of the file... + tmp <- seek(conn, where=0, origin = "end") + ## ... and then back to the origin so to store the length in bytes + flen <- seek(conn, where=0, origin = "start") + + ## we read the first 2 integers (4 bytes each) containing dimensions in bytes + dims <- readBin(conn, what="integer", n=2) + + ## compute dimensions of the data + tots <- sum(dims) + ncol <- tots/8 + nrow <- (flen - 8)/tots ## 8 here is 2*sizeof("int") + buff <- readBin(conn, what="double", n=ncol*nrow) + ## close connection + close(conn) + res <- matrix(buff, nrow=nrow, ncol=ncol, byrow=TRUE) + + nkeys <- dims[1] / 8 + keys <- res[, 1:nkeys - 1] + + timesteps <- res[, nkeys] + conv <- apply(keys, 2, ConvertDHTKey) + #res[, 1:nkeys - 1] <- conv + + ndata <- dims[2] / 8 + fill_rate <- ConvertToUInt64(res[, nkeys + 1]) + + buff <- c(conv, timesteps, fill_rate) + + if (with_info) { + ndata <- dims[2]/8 + visit_count <- ConvertToUInt64(res[, nkeys + ndata]) + buff <- c(buff, visit_count) + } + + res <- matrix(buff, nrow = nrow, byrow = FALSE) + + return(res) +} + ## Scatter plots of each variable in the iteration PlotScatter <- function(sam1, sam2, which=NULL, labs=c("NO DHT", "DHT"), pch=".", cols=3, ...) { if ((!is.data.frame(sam1)) & ("T" %in% names(sam1))) @@ -228,3 +289,33 @@ Plot2DCellData <- function (data, grid, nx, ny, contour = TRUE, } invisible(pp) } + +PlotAsMP4 <- function(data, nx, ny, to_plot, out_dir, name, + contour = FALSE, scale = FALSE, framerate = 30) { + sort_data <- data[str_sort(names(data), numeric = TRUE)] + plot_data <- lapply(sort_data, function(x) x$C[[to_plot]]) + pad_size <- ceiling(log10(length(plot_data))) + + dir.create(out_dir, showWarnings = FALSE) + output_files <- paste0(out_dir, "/", name, "_%0", pad_size, "d.png") + output_mp4 <- paste0(out_dir, "/", name, ".mp4") + + png(output_files, + width = 297, height = 210, units = "mm", + res = 100 + ) + + for (i in 1:length(plot_data)) { + Rmufits::PlotCartCellData(plot_data[[i]], nx = nx, ny = ny, contour = contour, scale = scale) + } + dev.off() + + ffmpeg_command <- paste( + "ffmpeg -framerate", framerate, "-i", output_files, + "-c:v libx264 -crf 22", output_mp4 + ) + + unlink(output_mp4) + system(ffmpeg_command) + +} diff --git a/util/data_evaluation/interpret_keys.cpp b/util/data_evaluation/interpret_keys.cpp index 777f95d1d..26419076e 100644 --- a/util/data_evaluation/interpret_keys.cpp +++ b/util/data_evaluation/interpret_keys.cpp @@ -35,3 +35,16 @@ std::vector rcpp_key_convert(std::vector input) { return output; } + +// [[Rcpp::export]] +std::vector rcpp_uint64_convert(std::vector input) { + std::vector output; + output.reserve(input.size()); + + for (double &value : input) { + std::uint64_t *as_int = reinterpret_cast(&value); + output.push_back(*as_int); + } + + return output; +} From c5676b9a00cb9a0ca02d5ba32360dac23fd96b4b Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Tue, 1 Aug 2023 18:47:56 +0200 Subject: [PATCH 02/52] chore: update doctest to version 2.4.11 --- ext/doctest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/doctest b/ext/doctest index 8fdfd113d..ae7a13539 160000 --- a/ext/doctest +++ b/ext/doctest @@ -1 +1 @@ -Subproject commit 8fdfd113dcb4ad1a31705ff8ddb13a21a505bad8 +Subproject commit ae7a13539fb71f270b87eb2e874fbac80bc8dda2 From dd1fc5214eeda7fd80b0a1b164640f3048da24c6 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Tue, 1 Aug 2023 23:19:16 +0200 Subject: [PATCH 03/52] fix: remove mass balance check --- .../SurrogateModels/InterpolationModule.cpp | 47 +------------------ 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp index 0d139ac3e..e5c36fa0c 100644 --- a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp +++ b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-01 17:54:53 mluebke" +// Time-stamp: "Last modified 2023-08-01 23:18:45 mluebke" #include "poet/DHT_Wrapper.hpp" #include "poet/HashFunctions.hpp" @@ -115,55 +115,10 @@ void InterpolationModule::tryInterpolation( // } // mean_water /= pht_result.size; - auto calcMassBalance = [](const std::vector &input) { - double C, Ca, Mg; - C = input[3] + input[7] + 2 * input[9]; - Ca = input[4] + input[7] + input[9]; - Mg = input[6] + input[9]; - - return std::array{C, Ca, Mg}; - }; - - auto mass_in = calcMassBalance(dht_results.results[i]); - - const auto DHT_inputElements = dht_instance.getKeyElements(); - - // HACK: transform input elements to uint, as there shall no any user - // defined key species present yet - - // std::vector interp_inputElements; - // std::transform(DHT_inputElements.begin(), DHT_inputElements.end(), - // interp_inputElements.begin(), [](std::int32_t x) { - // if (x < 0) { - // x = 0; - // } - // return x; - // }); - interp_result.results[i] = f_interpolate(dht_instance.getKeyElements(), dht_results.results[i], pht_result.in_values, pht_result.out_values); - auto mass_out = calcMassBalance(interp_result.results[i]); - - double diff[3]; - - std::transform(mass_in.begin(), mass_in.end(), mass_out.begin(), diff, - std::minus<>()); - - bool exceeding = false; - for (const auto comp : diff) { - if (std::abs(comp) > 1e-10) { - exceeding = true; - break; - } - } - - if (exceeding) { - interp_result.status[i] = INSUFFICIENT_DATA; - continue; - } - if (interp_result.results[i][7] < 0 || interp_result.results[i][9] < 0) { interp_result.status[i] = INSUFFICIENT_DATA; continue; From 55f3be8c1839fbd39ced0b2389f795c0ff003cab Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Wed, 2 Aug 2023 13:56:16 +0200 Subject: [PATCH 04/52] perf: remove unused parameters from paramlist and parameter structure doc: update doc to latest changes data: update benchmarks to latest changes --- README.md | 71 +++++++++++++++++------------ bench/barite/barite.R | 8 +--- bench/dolo/dolo_diffu_inner.R | 8 +--- bench/dolo/dolo_diffu_inner_large.R | 8 +--- bench/dolo/dolo_interp_long.R | 8 +--- bench/surfex/ex.R | 8 +--- bench/surfex/surfex.R | 8 +--- docs/Input_Scripts.md | 21 +++------ include/poet/SimParams.hpp | 9 ++-- src/SimParams.cpp | 8 ---- 10 files changed, 65 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index 7ef495139..a273f7f10 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # POET @@ -69,6 +69,9 @@ following available options: DHT usage. Defaults to _OFF_. - **POET_ENABLE_TESTING**=_boolean_ - enables small set of unit tests (more to come). Defaults to _OFF_. +- **POET_PHT_ADDITIONAL_INFO**=_boolean_ - enabling the count of accesses to one + PHT bucket. Use with caution, as things will get slowed down significantly. + Defaults to _OFF_. ### Example: Build from scratch @@ -109,15 +112,24 @@ poet │ └── kin_r_library.R └── share └── poet - ├── bench - │ ├── dolo_diffu_inner_large.R - │ ├── dolo_diffu_inner.R - │ └── dolo_inner.pqi - └── examples - ├── dol.pqi - ├── phreeqc_kin.dat - ├── SimDol1D_diffu.R - └── SimDol2D_diffu.R + └── bench + ├── barite + │ ├── barite_interp_eval.R + │ ├── barite.pqi + │ ├── barite.R + │ └── db_barite.dat + ├── dolo + │ ├── dolo_diffu_inner_large.R + │ ├── dolo_diffu_inner.R + │ ├── dolo_inner.pqi + │ ├── dolo_interp_long.R + │ └── phreeqc_kin.dat + └── surfex + ├── ExBase.pqi + ├── ex.R + ├── SMILE_2021_11_01_TH.dat + ├── SurfExBase.pqi + └── surfex.R ``` The R libraries will be loaded at runtime and the paths are hardcoded @@ -125,6 +137,10 @@ absolute paths inside `poet.cpp`. So, if you consider to move `bin/poet` either change paths of the R source files and recompile POET or also move `R_lib/*` relative to the binary. +The benchmarks consist of input scripts, which are provided as .R files. +Additionally, Phreeqc scripts and their corresponding databases are required, +stored as .pqi and .dat files, respectively. + ## Running Run POET by `mpirun ./poet ` @@ -132,28 +148,25 @@ where: - **OPTIONS** - runtime parameters (explained below) - **SIMFILE** - simulation described as R script (e.g. - `/share/examples/SimDol2D_diffu.R`) + `/share/poet/bench/dolo/dolo_interp_long.R`) - **OUTPUT_DIRECTORY** - path, where all output of POET should be stored ### Runtime options The following parameters can be set: -| Option | Value | Description | -|--------------------------|--------------|--------------------------------------------------------------------------------------------------------------------------| -| **--work-package-size=** | _1..n_ | size of work packages (defaults to _5_) | -| **--ignore-result** | | disables store of simulation resuls | -| **--dht** | | enabling DHT usage (defaults to _OFF_) | -| **--dht-signif=** | _1..n_ | set rounding to number of significant digits (defaults to _5_) (it is recommended to use `signif_vec` in R input script) | -| **--dht-strategy=** | _0-1_ | change DHT strategy. **NOT IMPLEMENTED YET** (Defaults to _0_) | -| **--dht-size=** | _1-n_ | size of DHT per process involved in megabyte (defaults to _1000 MByte_) | -| **--dht-snaps=** | _0-2_ | disable or enable storage of DHT snapshots | -| **--dht-file=** | `` | initializes DHT with the given snapshot file | - -#### Additions to `dht-signif` - -Only used if no vector is given in setup file. For individual values -per column use R vector `signif_vector` in `SIMFILE`. +| Option | Value | Description | +|-----------------------------|--------------|--------------------------------------------------------------------------------------------------------------------------| +| **--work-package-size=** | _1..n_ | size of work packages (defaults to _5_) | +| **--ignore-result** | | disables store of simulation resuls | +| **--dht** | | enabling DHT usage (defaults to _OFF_) | +| **--dht-strategy=** | _0-1_ | change DHT strategy. **NOT IMPLEMENTED YET** (Defaults to _0_) | +| **--dht-size=** | _1-n_ | size of DHT per process involved in megabyte (defaults to _1000 MByte_) | +| **--dht-snaps=** | _0-2_ | disable or enable storage of DHT snapshots | +| **--dht-file=** | `` | initializes DHT with the given snapshot file | +| **--interp-size** | _1-n_ | size of PHT (interpolation) per process in megabyte | +| **--interp-bucket-entries** | _1-n_ | number of entries to store at maximum in one PHT bucket | +| **--interp-min** | _1-n_ | number of entries in PHT bucket needed to start interpolation | #### Additions to `dht-snaps` @@ -168,13 +181,13 @@ Following values can be set: ### Example: Running from scratch We will continue the above example and start a simulation with -`SimDol2D_diffu.R`. As transport a simple fixed-coefficient diffusion is used. +`dolo_diffu_inner.R`. As transport a simple fixed-coefficient diffusion is used. It's a 2D, 100x100 grid, simulating 10 time steps. To start the simulation with 4 processes `cd` into your previously installed POET-dir `/bin` and run: ```sh -mpirun -n 4 ./poet ../share/poet/examples/SimDol2D_diffu.R output +mpirun -n 4 ./poet ../share/poet/bench/dolo/dolo_diffu_inner.R/ output ``` After a finished simulation all data generated by POET will be found @@ -187,7 +200,7 @@ produced. This is done by appending the `--dht-snaps=` option. The resulting call would look like this: ```sh -mpirun -n 4 ./poet --dht --dht-snaps=2 ../share/poet/examples/SimDol2D_diffu.R output +mpirun -n 4 ./poet --dht --dht-snaps=2 ../share/poet/bench/dolo/dolo_diffu_inner.R/ output ``` ## About the usage of MPI_Wtime() diff --git a/bench/barite/barite.R b/bench/barite/barite.R index d6941d3ed..0543ff39a 100644 --- a/bench/barite/barite.R +++ b/bench/barite/barite.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-04-24 16:51:23 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:59:22 mluebke" database <- normalizePath("../share/poet/bench/barite/db_barite.dat") input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") @@ -28,11 +28,7 @@ init_cell <- list( grid <- list( n_cells = c(n, m), s_cells = c(1, 1), - type = types[1], - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = types[1] ) diff --git a/bench/dolo/dolo_diffu_inner.R b/bench/dolo/dolo_diffu_inner.R index f981ed5c4..071fe36c1 100644 --- a/bench/dolo/dolo_diffu_inner.R +++ b/bench/dolo/dolo_diffu_inner.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-07-28 16:57:40 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:59:02 mluebke" database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") @@ -29,11 +29,7 @@ init_cell <- list( grid <- list( n_cells = c(n, m), s_cells = c(1, 1), - type = types[1], - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = types[1] ) diff --git a/bench/dolo/dolo_diffu_inner_large.R b/bench/dolo/dolo_diffu_inner_large.R index 5dfb95677..4403ae46f 100644 --- a/bench/dolo/dolo_diffu_inner_large.R +++ b/bench/dolo/dolo_diffu_inner_large.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-07-28 16:57:40 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:59:12 mluebke" database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") @@ -29,11 +29,7 @@ init_cell <- list( grid <- list( n_cells = c(n, m), s_cells = c(2, 1), - type = types[1], - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = types[1] ) diff --git a/bench/dolo/dolo_interp_long.R b/bench/dolo/dolo_interp_long.R index b333891ea..dedfe1c21 100644 --- a/bench/dolo/dolo_interp_long.R +++ b/bench/dolo/dolo_interp_long.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-08-01 18:34:47 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:47:06 mluebke" database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") @@ -29,11 +29,7 @@ init_cell <- list( grid <- list( n_cells = c(n, m), s_cells = c(5, 2.5), - type = types[1], - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = types[1] ) diff --git a/bench/surfex/ex.R b/bench/surfex/ex.R index 299c5db0c..13feea1d8 100644 --- a/bench/surfex/ex.R +++ b/bench/surfex/ex.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-04-17 12:29:27 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:59:35 mluebke" database <- normalizePath("./SMILE_2021_11_01_TH.dat") input_script <- normalizePath("./ExBase.pqi") @@ -40,11 +40,7 @@ init_cell <- list(H = 1.476571028625e-01, grid <- list( n_cells = c(n, m), s_cells = c(1, 1), - type = "scratch", - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = "scratch" ) diff --git a/bench/surfex/surfex.R b/bench/surfex/surfex.R index 37a5e0429..409b0f277 100644 --- a/bench/surfex/surfex.R +++ b/bench/surfex/surfex.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-04-17 15:48:21 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:59:44 mluebke" database <- normalizePath("../share/poet/bench/surfex/SMILE_2021_11_01_TH.dat") input_script <- normalizePath("../share/poet/bench/surfex/SurfExBase.pqi") @@ -40,11 +40,7 @@ init_cell <- list(H = 1.476571028625e-01, grid <- list( n_cells = c(n, m), s_cells = c(1, 1), - type = "scratch", - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = "scratch" ) diff --git a/docs/Input_Scripts.md b/docs/Input_Scripts.md index 305ac554f..1c03de749 100644 --- a/docs/Input_Scripts.md +++ b/docs/Input_Scripts.md @@ -13,10 +13,6 @@ by POET. | `n_cells` | Numeric Vector | Number of cells in each direction | | `s_cells` | Numeric Vector | Spatial resolution of grid in each direction | | `type` | String | Type of initialization, can be set to *scratch*, *phreeqc* or *rds* | -| `init_cell` | Data Frame | Containing all exactly one value per species to initialize the field. | -| `props` | String Vector | Names of all species | -| `database` | String | Path to Phreeqc database | -| `input_script` | String | Path to Phreeqc initial script | ## Diffusion parameters @@ -70,10 +66,12 @@ vecinj_index <- list( ## Chemistry parameters -| name | type | description | -|----------------|--------|-----------------------------------| -| `database` | String | Path to the Phreeqc database | -| `input_script` | String | Path the the Phreeqc input script | +| name | type | description | +|----------------|--------------|----------------------------------------------------------------------------------| +| `database` | String | Path to the Phreeqc database | +| `input_script` | String | Path the the Phreeqc input script | +| `dht_species` | Named Vector | Indicates significant digits to use for each species for DHT rounding. | +| `pht_species` | Named Vector | Indicates significant digits to use for each species for Interpolation rounding. | ## Final setup @@ -86,10 +84,3 @@ vecinj_index <- list( | `timesteps` | Numeric Vector | $\Delta t$ to use for specific iteration | | `store_result` | Boolean | Indicates if results should be stored | | `out_save` | Numeric Vector | *optional:* At which iteration the states should be stored | - -### DHT setup - -| name | type | description | -|-----------------|----------------|---------------------------------------------------------------------------------| -| `signif_vector` | Numeric Vector | Indicates significant digits to use for DHT rounding. Order of `props` vector. | -| `prop_type` | String Vector | Set type of species for rounding, can be left blank or set to *act* or *ignore* | diff --git a/include/poet/SimParams.hpp b/include/poet/SimParams.hpp index 8a28624df..5651f98d6 100644 --- a/include/poet/SimParams.hpp +++ b/include/poet/SimParams.hpp @@ -240,11 +240,12 @@ private: std::list validateOptions(argh::parser cmdl); const std::set flaglist{ - "ignore-result", "dht", "dht-nolog", "P", "progress", "interp"}; + "ignore-result", "dht", "P", "progress", "interp"}; const std::set paramlist{ - "work-package-size", "dht-signif", "dht-strategy", - "dht-size", "dht-snaps", "dht-file", - "interp-size", "interp-min", "interp-bucket-entries"}; + "work-package-size", "dht-strategy", + "dht-size", "dht-snaps", + "dht-file", "interp-size", + "interp-min", "interp-bucket-entries"}; t_simparams simparams; diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 80c0015b5..95244b6fa 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -56,14 +56,6 @@ poet::GridParams::s_GridParams(RInside &R) { (dim == 1 ? this->n_cells[0] : this->n_cells[0] * this->n_cells[1]); this->type = Rcpp::as(R.parseEval("mysetup$grid$type")); - this->init_df = - Rcpp::as(R.parseEval("mysetup$grid$init_cell")); - this->props = - Rcpp::as>(R.parseEval("mysetup$grid$props")); - this->input_script = - Rcpp::as(R.parseEval("mysetup$grid$input_script")); - this->database_path = - Rcpp::as(R.parseEval("mysetup$grid$database")); } poet::DiffusionParams::s_DiffusionParams(RInside &R) { From 0f51d9dfefd96b3a8103c3ccb70b29eb1cc7c993 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Thu, 3 Aug 2023 17:22:59 +0200 Subject: [PATCH 05/52] fix: apply weights to output values (rates) instead of input values --- src/ChemistryModule/ChemistryModule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index 52a315734..a6e2bb7a4 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -90,7 +90,7 @@ inverseDistanceWeighting(const std::vector &to_calc, // } for (int j = 0; j < data_set_n; j++) { - key_delta += weights[j] * input[j][key_comp_i]; + key_delta += weights[j] * output[j][key_comp_i]; } key_delta /= inv_sum; From b040f6d8c47d620d8e7894b59df5c5fac20c59d7 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Thu, 3 Aug 2023 21:23:50 +0200 Subject: [PATCH 06/52] fix: use output index also for weighting --- src/ChemistryModule/ChemistryModule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index a6e2bb7a4..3048ae348 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -90,7 +90,7 @@ inverseDistanceWeighting(const std::vector &to_calc, // } for (int j = 0; j < data_set_n; j++) { - key_delta += weights[j] * output[j][key_comp_i]; + key_delta += weights[j] * output[j][output_comp_i]; } key_delta /= inv_sum; From a69ee81325bf6ad652557970adc36a67c15bfa9e Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Fri, 4 Aug 2023 13:40:20 +0200 Subject: [PATCH 07/52] fix: pass copy of map to DHT parsing --- src/ChemistryModule/ChemistryModule.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index 3048ae348..acbd7af7d 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -472,7 +472,7 @@ void poet::ChemistryModule::initializeInterp( } auto key_indices = - parseDHTSpeciesVec(key_species, dht->getKeySpecies().getNames()); + parseDHTSpeciesVec(map_copy, dht->getKeySpecies().getNames()); if (this->interp) { this->interp.reset(); From bb6ebf6ada6259f3dc410387f90b57ab3befbbfa Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Wed, 9 Aug 2023 09:50:09 +0200 Subject: [PATCH 08/52] MDL: add stdout message from initialisation in SimParams --- src/SimParams.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 95244b6fa..62a231a08 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -177,6 +177,13 @@ int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { cout << "CPP: DHT save snapshots is " << chem_params.dht_snaps << endl; cout << "CPP: DHT load file is " << chem_params.dht_file << endl; } + + if (chem_params.use_interp) { + cout << "CPP: PHT interpolation enabled: " << chem_params.use_interp << endl; + cout << "CPP: PHT interp-size = " << chem_params.pht_size << endl; + cout << "CPP: PHT interp-min = " << chem_params.interp_min_entries << endl; + cout << "CPP: PHT interp-bucket-entries = " << chem_params.pht_max_entries << endl; + } } cmdl(1) >> filesim; From 939052968bdb2057161ddbfaec8d9aaf7d49eb2b Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Wed, 9 Aug 2023 12:20:55 +0200 Subject: [PATCH 09/52] MDL: added include/poet/Macros.hpp --- include/poet/Macros.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/poet/Macros.hpp diff --git a/include/poet/Macros.hpp b/include/poet/Macros.hpp new file mode 100644 index 000000000..f632ea3ee --- /dev/null +++ b/include/poet/Macros.hpp @@ -0,0 +1,14 @@ +// Time-stamp: "Last modified 2023-08-09 12:10:57 delucia" +#ifndef MACROS_H +#define MACROS_H + +// Prepend "msg" with name of calling function +#define MSG(msg) std::cout << "CPP: " << __func__ << ": " << std::string(msg) << std::endl; + +#define MSG_NOENDL(msg) std::cout << "CPP: " << __func__ << ": " << std::string(msg); + +#define ERRMSG(msg) std::cerr << "CPP_ERROR: " << __func__ << ": " << std::string(msg) << std::endl; + +#define BOOL_PRINT(bool) std::string(bool ? "ON" : "OFF") + +#endif // MACROS_H From 5ce26df771b536020f9a7aaf1af07d86918587cd Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Wed, 9 Aug 2023 12:22:03 +0200 Subject: [PATCH 10/52] MDL: replaced all cout/cerr with macros MSG, ERRMSG... --- app/poet.cpp | 47 +++--- app/poet.h.in | 1 + include/poet/SimParams.hpp | 1 + src/DiffusionModule.cpp | 43 ++--- src/SimParams.cpp | 312 ++++++++++++++++++------------------- 5 files changed, 202 insertions(+), 202 deletions(-) diff --git a/app/poet.cpp b/app/poet.cpp index 69d1b470f..67cf3c6fd 100644 --- a/app/poet.cpp +++ b/app/poet.cpp @@ -18,14 +18,15 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/ChemistryModule.hpp" #include #include #include +#include #include #include #include #include +#include #include #include @@ -146,12 +147,13 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, double dt = Rcpp::as( R.parseEval("mysetup$timesteps[" + std::to_string(iter) + "]")); - cout << "CPP: Next time step is " << dt << "[s]" << endl; + + // cout << "CPP: Next time step is " << dt << "[s]" << endl; + MSG("Next time step is " + std::to_string(dt) + " [s]"); /* displaying iteration number, with C++ and R iterator */ - cout << "CPP: Going through iteration " << iter << endl; - cout << "CPP: R's $iter: " << ((uint32_t)(R.parseEval("mysetup$iter"))) - << ". Iteration" << endl; + MSG("Going through iteration " + std::to_string(iter)); + MSG("R's $iter: " + std::to_string((uint32_t)(R.parseEval("mysetup$iter"))) + ". Iteration"); /* run transport */ // TODO: transport to diffusion @@ -159,9 +161,7 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, chem.getField().UpdateFromField(diffusion.getField()); - cout << "CPP: Chemistry" << endl; - - chem.SetTimeStep(dt); + MSG("Chemistry step"); chem.SetTimeStep(dt); chem.RunCells(); @@ -181,10 +181,8 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, // store_result is TRUE) R.parseEvalQ("mysetup <- master_iteration_end(setup=mysetup)"); - cout << endl - << "CPP: End of *coupling* iteration " << iter << "/" << maxiter - << endl - << endl; + MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); + MSG(); // MPI_Barrier(MPI_COMM_WORLD); double end_t = MPI_Wtime(); @@ -261,7 +259,7 @@ int main(int argc, char *argv[]) { MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); if (world_rank == 0) { - cout << "Running POET in version " << poet_version << endl << endl; + MSG("Running POET version " + std::string(poet_version)); } if (world_rank > 0) { @@ -288,7 +286,9 @@ int main(int argc, char *argv[]) { } MPI_Barrier(MPI_COMM_WORLD); - cout << "CPP: finished, cleanup of process " << world_rank << endl; + + MSG("finished, cleanup of process " + std::to_string(world_rank)); + MPI_Finalize(); return EXIT_SUCCESS; @@ -312,7 +312,7 @@ int main(int argc, char *argv[]) { return EXIT_SUCCESS; } - cout << "CPP: R Init (RInside) on process " << world_rank << endl; + MSG("RInside initialized on process " + std::to_string(world_rank)); R.parseEvalQ("mysetup <- setup"); // if (world_rank == 0) { // get timestep vector from @@ -326,12 +326,12 @@ int main(int argc, char *argv[]) { // MDL: store all parameters if (world_rank == 0) { - cout << "CPP: Calling R Function to store calling parameters" << endl; - R.parseEvalQ("StoreSetup(setup=mysetup)"); + MSG("Calling R Function to store calling parameters"); + R.parseEvalQ("StoreSetup(setup=mysetup)"); } if (world_rank == 0) { - cout << "CPP: Init done on process with rank " << world_rank << endl; + MSG("Init done on process with rank " + std::to_string(world_rank)); } // MPI_Barrier(MPI_COMM_WORLD); @@ -340,9 +340,9 @@ int main(int argc, char *argv[]) { dSimTime = RunMasterLoop(params, R, g_params, nxyz_master); - cout << "CPP: finished simulation loop" << endl; + MSG("finished simulation loop"); - cout << "CPP: start timing profiling" << endl; + MSG("start timing profiling"); R["simtime"] = dSimTime; R.parseEvalQ("profiling$simtime <- simtime"); @@ -351,16 +351,15 @@ int main(int argc, char *argv[]) { r_vis_code = "saveRDS(profiling, file=paste0(fileout,'/timings.rds'));"; R.parseEval(r_vis_code); - cout << "CPP: Done! Results are stored as R objects into <" - << params.getOutDir() << "/timings.rds>" << endl; + MSG("Done! Results are stored as R objects into <" + params.getOutDir() + "/timings.rds>"); MPI_Barrier(MPI_COMM_WORLD); - cout << "CPP: finished, cleanup of process " << world_rank << endl; + MSG("finished, cleanup of process " + std::to_string(world_rank)); MPI_Finalize(); if (world_rank == 0) { - cout << "CPP: done, bye!" << endl; + MSG("done, bye!"); } exit(0); diff --git a/app/poet.h.in b/app/poet.h.in index d98fe41e9..96a25a2c1 100644 --- a/app/poet.h.in +++ b/app/poet.h.in @@ -7,4 +7,5 @@ const char *poet_version = "@POET_VERSION@"; const char *CHEMISTRY_MODULE_NAME = "state_C"; + #endif // POET_H diff --git a/include/poet/SimParams.hpp b/include/poet/SimParams.hpp index 5651f98d6..a73669865 100644 --- a/include/poet/SimParams.hpp +++ b/include/poet/SimParams.hpp @@ -27,6 +27,7 @@ #include #include +#include "Macros.hpp" #include "DataStructures.hpp" #include "RInsidePOET.hpp" #include "argh.hpp" // Argument handler https://github.com/adishavit/argh diff --git a/src/DiffusionModule.cpp b/src/DiffusionModule.cpp index 609d9cf91..576785461 100644 --- a/src/DiffusionModule.cpp +++ b/src/DiffusionModule.cpp @@ -18,9 +18,10 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/SimParams.hpp" -#include "tug/BoundaryCondition.hpp" -#include "tug/Diffusion.hpp" +#include +#include +#include +#include #include #include #include @@ -153,32 +154,32 @@ void DiffusionModule::simulate(double dt) { sim_b_transport = MPI_Wtime(); - std::cout << "DiffusionModule::simulate(): Starting diffusion ..." - << std::flush; - + MSG_NOENDL("DiffusionModule::simulate(): Starting diffusion ..."); + std::cout << std::flush; + std::vector> field_2d = t_field.As2DVector(); this->diff_input.setTimestep(dt); for (int i = 0; i < field_2d.size(); i++) { - std::vector in_alpha(this->n_cells_per_prop, this->alpha[i]); - this->diff_input.setBoundaryCondition(this->bc_vec[i]); - - if (this->dim == this->DIM_1D) { - tug::diffusion::BTCS_1D(this->diff_input, field_2d[i].data(), - in_alpha.data()); - } else { - tug::diffusion::ADI_2D(this->diff_input, field_2d[i].data(), - in_alpha.data()); - } + std::vector in_alpha(this->n_cells_per_prop, this->alpha[i]); + this->diff_input.setBoundaryCondition(this->bc_vec[i]); + + if (this->dim == this->DIM_1D) { + tug::diffusion::BTCS_1D(this->diff_input, field_2d[i].data(), + in_alpha.data()); + } else { + tug::diffusion::ADI_2D(this->diff_input, field_2d[i].data(), + in_alpha.data()); + } } - + t_field.SetFromVector(field_2d); - - std::cout << " done!\n"; - + + std::cout << " done!" << std::endl; + sim_a_transport = MPI_Wtime(); - + transport_t += sim_a_transport - sim_b_transport; } diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 62a231a08..8088609cf 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -18,10 +18,10 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/DHT_Types.hpp" #include #include #include +#include #include #include @@ -37,204 +37,202 @@ using namespace std; using namespace Rcpp; poet::GridParams::s_GridParams(RInside &R) { - auto tmp_n_cells = - Rcpp::as>(R.parseEval("mysetup$grid$n_cells")); - assert(tmp_n_cells.size() < 3); + auto tmp_n_cells = + Rcpp::as>(R.parseEval("mysetup$grid$n_cells")); + assert(tmp_n_cells.size() < 3); - this->dim = tmp_n_cells.size(); + this->dim = tmp_n_cells.size(); - std::copy(tmp_n_cells.begin(), tmp_n_cells.end(), this->n_cells.begin()); + std::copy(tmp_n_cells.begin(), tmp_n_cells.end(), this->n_cells.begin()); - auto tmp_s_cells = - Rcpp::as>(R.parseEval("mysetup$grid$s_cells")); + auto tmp_s_cells = + Rcpp::as>(R.parseEval("mysetup$grid$s_cells")); - assert(tmp_s_cells.size() == this->dim); + assert(tmp_s_cells.size() == this->dim); - std::copy(tmp_s_cells.begin(), tmp_s_cells.end(), this->s_cells.begin()); + std::copy(tmp_s_cells.begin(), tmp_s_cells.end(), this->s_cells.begin()); - this->total_n = - (dim == 1 ? this->n_cells[0] : this->n_cells[0] * this->n_cells[1]); + this->total_n = + (dim == 1 ? this->n_cells[0] : this->n_cells[0] * this->n_cells[1]); - this->type = Rcpp::as(R.parseEval("mysetup$grid$type")); + this->type = Rcpp::as(R.parseEval("mysetup$grid$type")); } poet::DiffusionParams::s_DiffusionParams(RInside &R) { - this->initial_t = - Rcpp::as(R.parseEval("mysetup$diffusion$init")); - this->alpha = - Rcpp::as(R.parseEval("mysetup$diffusion$alpha")); - if (Rcpp::as( - R.parseEval("'vecinj_inner' %in% names(mysetup$diffusion)"))) { - this->vecinj_inner = - Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_inner")); - } - this->vecinj = - Rcpp::as(R.parseEval("mysetup$diffusion$vecinj")); - this->vecinj_index = - Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_index")); + this->initial_t = + Rcpp::as(R.parseEval("mysetup$diffusion$init")); + this->alpha = + Rcpp::as(R.parseEval("mysetup$diffusion$alpha")); + if (Rcpp::as( + R.parseEval("'vecinj_inner' %in% names(mysetup$diffusion)"))) { + this->vecinj_inner = + Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_inner")); + } + this->vecinj = + Rcpp::as(R.parseEval("mysetup$diffusion$vecinj")); + this->vecinj_index = + Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_index")); } void poet::ChemistryParams::initFromR(RInsidePOET &R) { - this->database_path = - Rcpp::as(R.parseEval("mysetup$chemistry$database")); - this->input_script = - Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); + this->database_path = + Rcpp::as(R.parseEval("mysetup$chemistry$database")); + this->input_script = + Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); - if (R.checkIfExists("dht_species", "mysetup$chemistry")) { - this->dht_signifs = - R.wrapNamedVector("mysetup$chemistry$dht_species"); - } + if (R.checkIfExists("dht_species", "mysetup$chemistry")) { + this->dht_signifs = + R.wrapNamedVector("mysetup$chemistry$dht_species"); + } - if (R.checkIfExists("pht_species", "mysetup$chemistry")) { - this->pht_signifs = - R.wrapNamedVector("mysetup$chemistry$pht_species"); - } + if (R.checkIfExists("pht_species", "mysetup$chemistry")) { + this->pht_signifs = + R.wrapNamedVector("mysetup$chemistry$pht_species"); + } } SimParams::SimParams(int world_rank_, int world_size_) { - this->simparams.world_rank = world_rank_; - this->simparams.world_size = world_size_; + this->simparams.world_rank = world_rank_; + this->simparams.world_size = world_size_; } int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { - // initialize argh object - argh::parser cmdl(argv); + // initialize argh object + argh::parser cmdl(argv); - // if user asked for help - if (cmdl[{"help", "h"}]) { - if (simparams.world_rank == 0) { - cout << "Todo" << endl - << "See README.md for further information." << endl; + // if user asked for help + if (cmdl[{"help", "h"}]) { + if (simparams.world_rank == 0) { + MSG("Todo"); + MSG("See README.md for further information."); + } + return poet::PARSER_HELP; } - return poet::PARSER_HELP; - } - // if positional arguments are missing - else if (!cmdl(2)) { - if (simparams.world_rank == 0) { - cerr << "ERROR. Kin needs 2 positional arguments: " << endl - << "1) the R script defining your simulation and" << endl - << "2) the directory prefix where to save results and profiling" - << endl; + // if positional arguments are missing + else if (!cmdl(2)) { + if (simparams.world_rank == 0) { + ERRMSG("Kin needs 2 positional arguments: "); + ERRMSG("1) the R script defining your simulation and"); + ERRMSG("2) the directory prefix where to save results and profiling"); + } + return poet::PARSER_ERROR; } - return poet::PARSER_ERROR; - } - // collect all parameters which are not known, print them to stderr and return - // with PARSER_ERROR - std::list optionsError = validateOptions(cmdl); - if (!optionsError.empty()) { - if (simparams.world_rank == 0) { - cerr << "Unrecognized option(s):\n" << endl; - for (auto option : optionsError) { - cerr << option << endl; - } - cerr << "\nMake sure to use available options. Exiting!" << endl; + // collect all parameters which are not known, print them to stderr and return + // with PARSER_ERROR + std::list optionsError = validateOptions(cmdl); + if (!optionsError.empty()) { + if (simparams.world_rank == 0) { + ERRMSG("Unrecognized option(s):\n"); + for (auto option : optionsError) { + ERRMSG(std::string(option)); + } + ERRMSG("Make sure to use available options. Exiting!"); + } + return poet::PARSER_ERROR; } - return poet::PARSER_ERROR; - } + + simparams.print_progressbar = cmdl[{"P", "progress"}]; - simparams.print_progressbar = cmdl[{"P", "progress"}]; - - /*Parse DHT arguments*/ - chem_params.use_dht = cmdl["dht"]; - // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; - - if (chem_params.use_dht) { - cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> chem_params.dht_size; - // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << - // endl; - - cmdl("dht-snaps", 0) >> chem_params.dht_snaps; - - cmdl("dht-file") >> chem_params.dht_file; - } - /*Parse work package size*/ - cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; - - chem_params.use_interp = cmdl["interp"]; - cmdl("interp-size", 100) >> chem_params.pht_size; - cmdl("interp-min", 5) >> chem_params.interp_min_entries; - cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; - - /*Parse output options*/ - simparams.store_result = !cmdl["ignore-result"]; - - if (simparams.world_rank == 0) { - cout << "CPP: Complete results storage is " - << (simparams.store_result ? "ON" : "OFF") << endl; - cout << "CPP: Work Package Size: " << simparams.wp_size << endl; - cout << "CPP: DHT is " << (chem_params.use_dht ? "ON" : "OFF") << '\n'; + /*Parse DHT arguments*/ + chem_params.use_dht = cmdl["dht"]; + // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; if (chem_params.use_dht) { - cout << "CPP: DHT strategy is " << simparams.dht_strategy << endl; - cout << "CPP: DHT key default digits (ignored if 'signif_vector' is " - "defined) = " - << simparams.dht_significant_digits << endl; - cout << "CPP: DHT logarithm before rounding: " - << (simparams.dht_log ? "ON" : "OFF") << endl; - cout << "CPP: DHT size per process (Byte) = " << chem_params.dht_size - << endl; - cout << "CPP: DHT save snapshots is " << chem_params.dht_snaps << endl; - cout << "CPP: DHT load file is " << chem_params.dht_file << endl; + cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> chem_params.dht_size; + // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << + // endl; + + cmdl("dht-snaps", 0) >> chem_params.dht_snaps; + + cmdl("dht-file") >> chem_params.dht_file; + } + /*Parse work package size*/ + cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; + + chem_params.use_interp = cmdl["interp"]; + cmdl("interp-size", 100) >> chem_params.pht_size; + cmdl("interp-min", 5) >> chem_params.interp_min_entries; + cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; + + /*Parse output options*/ + simparams.store_result = !cmdl["ignore-result"]; + + if (simparams.world_rank == 0) { + MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); + MSG("Work Package Size: " + std::to_string(simparams.wp_size)); + MSG("DHT is " + BOOL_PRINT(chem_params.use_dht)); + + if (chem_params.use_dht) { + MSG("DHT strategy is " + std::to_string(simparams.dht_strategy)); + // MDL: these should be outdated (?) + // MSG("DHT key default digits (ignored if 'signif_vector' is " + // "defined) = " + // << simparams.dht_significant_digits); + // MSG("DHT logarithm before rounding: " + // << (simparams.dht_log ? "ON" : "OFF")); + MSG("DHT size per process (Byte) = " + std::to_string(chem_params.dht_size)); + MSG("DHT save snapshots is " + BOOL_PRINT(chem_params.dht_snaps)); + MSG("DHT load file is " + chem_params.dht_file); + } + + if (chem_params.use_interp) { + MSG("PHT interpolation enabled: " + BOOL_PRINT(chem_params.use_interp)); + MSG("PHT interp-size = " + std::to_string(chem_params.pht_size)); + MSG("PHT interp-min = " + std::to_string(chem_params.interp_min_entries)); + MSG("PHT interp-bucket-entries = " + std::to_string(chem_params.pht_max_entries)); + } } - if (chem_params.use_interp) { - cout << "CPP: PHT interpolation enabled: " << chem_params.use_interp << endl; - cout << "CPP: PHT interp-size = " << chem_params.pht_size << endl; - cout << "CPP: PHT interp-min = " << chem_params.interp_min_entries << endl; - cout << "CPP: PHT interp-bucket-entries = " << chem_params.pht_max_entries << endl; - } - } + cmdl(1) >> filesim; + cmdl(2) >> out_dir; - cmdl(1) >> filesim; - cmdl(2) >> out_dir; + chem_params.dht_outdir = out_dir; - chem_params.dht_outdir = out_dir; + /* distribute information to R runtime */ + // if local_rank == 0 then master else worker + R["local_rank"] = simparams.world_rank; + // assign a char* (string) to 'filesim' + R["filesim"] = wrap(filesim); + // assign a char* (string) to 'fileout' + R["fileout"] = wrap(out_dir); + // pass the boolean "store_result" to the R process + R["store_result"] = simparams.store_result; + // worker count + R["n_procs"] = simparams.world_size - 1; + // work package size + R["work_package_size"] = simparams.wp_size; + // dht enabled? + R["dht_enabled"] = chem_params.use_dht; + // log before rounding? + R["dht_log"] = simparams.dht_log; - /* distribute information to R runtime */ - // if local_rank == 0 then master else worker - R["local_rank"] = simparams.world_rank; - // assign a char* (string) to 'filesim' - R["filesim"] = wrap(filesim); - // assign a char* (string) to 'fileout' - R["fileout"] = wrap(out_dir); - // pass the boolean "store_result" to the R process - R["store_result"] = simparams.store_result; - // worker count - R["n_procs"] = simparams.world_size - 1; - // work package size - R["work_package_size"] = simparams.wp_size; - // dht enabled? - R["dht_enabled"] = chem_params.use_dht; - // log before rounding? - R["dht_log"] = simparams.dht_log; + // eval the init string, ignoring any returns + R.parseEvalQ("source(filesim)"); + R.parseEvalQ("mysetup <- setup"); - // eval the init string, ignoring any returns - R.parseEvalQ("source(filesim)"); - R.parseEvalQ("mysetup <- setup"); + this->chem_params.initFromR(R); - this->chem_params.initFromR(R); - - return poet::PARSER_OK; + return poet::PARSER_OK; } void SimParams::initVectorParams(RInside &R) {} std::list SimParams::validateOptions(argh::parser cmdl) { - /* store all unknown parameters here */ - std::list retList; + /* store all unknown parameters here */ + std::list retList; - /* loop over all flags and compare to given flaglist*/ - for (auto &flag : cmdl.flags()) { - if (!(flaglist.find(flag) != flaglist.end())) - retList.push_back(flag); - } + /* loop over all flags and compare to given flaglist*/ + for (auto &flag : cmdl.flags()) { + if (!(flaglist.find(flag) != flaglist.end())) + retList.push_back(flag); + } - /* and loop also over params and compare to given paramlist */ - for (auto ¶m : cmdl.params()) { - if (!(paramlist.find(param.first) != paramlist.end())) - retList.push_back(param.first); - } + /* and loop also over params and compare to given paramlist */ + for (auto ¶m : cmdl.params()) { + if (!(paramlist.find(param.first) != paramlist.end())) + retList.push_back(param.first); + } - return retList; + return retList; } From e9969eeb66d5521a9e8f15080b88606b5d970596 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Wed, 9 Aug 2023 14:16:09 +0200 Subject: [PATCH 11/52] fix: add includes to macros --- include/poet/Macros.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/poet/Macros.hpp b/include/poet/Macros.hpp index f632ea3ee..47beeeae3 100644 --- a/include/poet/Macros.hpp +++ b/include/poet/Macros.hpp @@ -1,7 +1,10 @@ -// Time-stamp: "Last modified 2023-08-09 12:10:57 delucia" +// Time-stamp: "Last modified 2023-08-09 14:16:04 mluebke" #ifndef MACROS_H #define MACROS_H +#include +#include + // Prepend "msg" with name of calling function #define MSG(msg) std::cout << "CPP: " << __func__ << ": " << std::string(msg) << std::endl; From ff8da121932d840c6e79ae1e7ffae6db6dbff9dc Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Thu, 10 Aug 2023 14:15:26 +0200 Subject: [PATCH 12/52] feat: enable interpolation without DHT lookup --- app/poet.cpp | 30 +- ext/phreeqcrm | 2 +- include/poet/ChemistryModule.hpp | 9 +- include/poet/DHT_Types.hpp | 8 - include/poet/DHT_Wrapper.hpp | 25 +- include/poet/DataStructures.hpp | 16 + include/poet/Interpolation.hpp | 14 +- include/poet/enums.hpp | 10 + src/ChemistryModule/ChemistryModule.cpp | 15 +- src/ChemistryModule/MasterFunctions.cpp | 23 +- src/ChemistryModule/SurrogateModels/DHT.c | 6 +- .../SurrogateModels/DHT_Wrapper.cpp | 90 ++--- .../SurrogateModels/InterpolationModule.cpp | 46 ++- .../SurrogateModels/ProximityHashTable.cpp | 28 +- src/ChemistryModule/WorkerFunctions.cpp | 240 +++--------- src/SimParams.cpp | 345 ++++++++++-------- 16 files changed, 393 insertions(+), 514 deletions(-) delete mode 100644 include/poet/DHT_Types.hpp create mode 100644 include/poet/enums.hpp diff --git a/app/poet.cpp b/app/poet.cpp index 67cf3c6fd..51dc51fd9 100644 --- a/app/poet.cpp +++ b/app/poet.cpp @@ -224,21 +224,21 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, R.parseEvalQ("profiling$dht_get_time <- dht_get_time"); R["dht_fill_time"] = Rcpp::wrap(chem.GetWorkerDHTFillTimings()); R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time"); - if (params.getChemParams().use_interp) { - R["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); - R.parseEvalQ("profiling$interp_write <- interp_w"); - R["interp_r"] = Rcpp::wrap(chem.GetWorkerInterpolationReadTimings()); - R.parseEvalQ("profiling$interp_read <- interp_r"); - R["interp_g"] = Rcpp::wrap(chem.GetWorkerInterpolationGatherTimings()); - R.parseEvalQ("profiling$interp_gather <- interp_g"); - R["interp_fc"] = - Rcpp::wrap(chem.GetWorkerInterpolationFunctionCallTimings()); - R.parseEvalQ("profiling$interp_function_calls <- interp_fc"); - R["interp_calls"] = Rcpp::wrap(chem.GetWorkerInterpolationCalls()); - R.parseEvalQ("profiling$interp_calls <- interp_calls"); - R["interp_cached"] = Rcpp::wrap(chem.GetWorkerPHTCacheHits()); - R.parseEvalQ("profiling$interp_cached <- interp_cached"); - } + } + if (params.getChemParams().use_interp) { + R["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); + R.parseEvalQ("profiling$interp_write <- interp_w"); + R["interp_r"] = Rcpp::wrap(chem.GetWorkerInterpolationReadTimings()); + R.parseEvalQ("profiling$interp_read <- interp_r"); + R["interp_g"] = Rcpp::wrap(chem.GetWorkerInterpolationGatherTimings()); + R.parseEvalQ("profiling$interp_gather <- interp_g"); + R["interp_fc"] = + Rcpp::wrap(chem.GetWorkerInterpolationFunctionCallTimings()); + R.parseEvalQ("profiling$interp_function_calls <- interp_fc"); + R["interp_calls"] = Rcpp::wrap(chem.GetWorkerInterpolationCalls()); + R.parseEvalQ("profiling$interp_calls <- interp_calls"); + R["interp_cached"] = Rcpp::wrap(chem.GetWorkerPHTCacheHits()); + R.parseEvalQ("profiling$interp_cached <- interp_cached"); } chem.MasterLoopBreak(); diff --git a/ext/phreeqcrm b/ext/phreeqcrm index 89f713b27..6ed14c353 160000 --- a/ext/phreeqcrm +++ b/ext/phreeqcrm @@ -1 +1 @@ -Subproject commit 89f713b273cd5340b2e8169523da04c2d7ad89c9 +Subproject commit 6ed14c35322a245e3a9776ef262c0ac0eba3b301 diff --git a/include/poet/ChemistryModule.hpp b/include/poet/ChemistryModule.hpp index 16dc1f5f9..f5930184d 100644 --- a/include/poet/ChemistryModule.hpp +++ b/include/poet/ChemistryModule.hpp @@ -1,15 +1,15 @@ -// Time-stamp: "Last modified 2023-08-01 13:13:18 mluebke" +// Time-stamp: "Last modified 2023-08-08 13:15:49 mluebke" #ifndef CHEMISTRYMODULE_H_ #define CHEMISTRYMODULE_H_ #include "DHT_Wrapper.hpp" +#include "DataStructures.hpp" #include "Field.hpp" #include "Interpolation.hpp" #include "IrmResult.h" #include "PhreeqcRM.h" #include "SimParams.hpp" -#include "DataStructures.hpp" #include #include @@ -322,9 +322,8 @@ protected: void WorkerPerfToMaster(int type, const struct worker_s &timings); void WorkerMetricsToMaster(int type); - IRM_RESULT WorkerRunWorkPackage(std::vector &vecWP, - std::vector &vecMapping, - double dSimTime, double dTimestep); + IRM_RESULT WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, + double dTimestep); std::vector CalculateWPSizesVector(uint32_t n_cells, uint32_t wp_size) const; diff --git a/include/poet/DHT_Types.hpp b/include/poet/DHT_Types.hpp deleted file mode 100644 index 15eb9913e..000000000 --- a/include/poet/DHT_Types.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef DHT_TYPES_H_ -#define DHT_TYPES_H_ - -namespace poet { -enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_TOTAL }; -} - -#endif // DHT_TYPES_H_ diff --git a/include/poet/DHT_Wrapper.hpp b/include/poet/DHT_Wrapper.hpp index f2ed226ab..a066d3ccd 100644 --- a/include/poet/DHT_Wrapper.hpp +++ b/include/poet/DHT_Wrapper.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-01 13:48:34 mluebke" +// Time-stamp: "Last modified 2023-08-08 18:01:12 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -25,7 +25,7 @@ #include "DataStructures.hpp" #include "LookupKey.hpp" -#include "poet/DHT_Types.hpp" +#include "enums.hpp" #include "poet/HashFunctions.hpp" #include "poet/LookupKey.hpp" #include "poet/Rounding.hpp" @@ -57,12 +57,9 @@ using DHT_Location = std::pair; class DHT_Wrapper { public: using DHT_ResultObject = struct DHTResobj { - uint32_t length; std::vector keys; - std::vector> results; - std::vector old_values; - std::vector needPhreeqc; std::vector locations; + std::vector filledDHT; }; static constexpr std::int32_t DHT_KEY_INPUT_CUSTOM = @@ -119,9 +116,7 @@ public: * @param[in,out] work_package Pointer to current work package * @param dt Current timestep of simulation */ - auto checkDHT(int length, double dt, const std::vector &work_package, - std::vector &curr_mapping) - -> const DHT_ResultObject &; + auto checkDHT(WorkPackage &work_package) -> const DHT_ResultObject &; /** * @brief Write simulated values into DHT @@ -139,7 +134,7 @@ public: * outputs of the PHREEQC simulation * @param dt Current timestep of simulation */ - void fillDHT(int length, const std::vector &work_package); + void fillDHT(const WorkPackage &work_package); void resultsToWP(std::vector &work_package); @@ -214,6 +209,14 @@ public: std::uint32_t getOutputCount() const { return this->data_count; } + inline void prepareKeys(const std::vector> &input_values, + double dt) { + dht_results.keys.resize(input_values.size()); + for (std::size_t i = 0; i < input_values.size(); i++) { + dht_results.keys[i] = fuzzForDHT(input_values[i], dt); + } + } + private: uint32_t key_count; uint32_t data_count; @@ -221,7 +224,7 @@ private: DHT *dht_object; MPI_Comm communicator; - LookupKey fuzzForDHT(int var_count, void *key, double dt); + LookupKey fuzzForDHT(const std::vector &cell, double dt); std::vector outputToInputAndRates(const std::vector &old_results, diff --git a/include/poet/DataStructures.hpp b/include/poet/DataStructures.hpp index ec323d6fe..c2ab99d77 100644 --- a/include/poet/DataStructures.hpp +++ b/include/poet/DataStructures.hpp @@ -1,13 +1,29 @@ #ifndef DATASTRUCTURES_H_ #define DATASTRUCTURES_H_ +#include "enums.hpp" + #include +#include #include #include #include namespace poet { +struct WorkPackage { + std::size_t size; + std::vector> input; + std::vector> output; + std::vector mapping; + + WorkPackage(size_t _size) : size(_size) { + input.resize(size); + output.resize(size); + mapping.resize(size, CHEM_PQC); + } +}; + template class NamedVector { public: void insert(std::pair to_insert) { diff --git a/include/poet/Interpolation.hpp b/include/poet/Interpolation.hpp index c80cdc97b..5f85872ca 100644 --- a/include/poet/Interpolation.hpp +++ b/include/poet/Interpolation.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-01 18:10:48 mluebke" +// Time-stamp: "Last modified 2023-08-09 12:52:37 mluebke" #ifndef INTERPOLATION_H_ #define INTERPOLATION_H_ @@ -65,10 +65,9 @@ public: void writeLocationToPHT(LookupKey key, DHT_Location location); const PHT_Result &query(const LookupKey &key, - const std::vector &signif, - std::uint32_t min_entries_needed, - std::uint32_t input_count, - std::uint32_t output_count); + const std::uint32_t min_entries_needed, + const std::uint32_t input_count, + const std::uint32_t output_count); std::uint64_t getLocations(const LookupKey &key); @@ -185,10 +184,9 @@ public: auto getMinEntriesNeeded() { return this->min_entries_needed; } - void writePairs(const DHT_Wrapper::DHT_ResultObject &in); + void writePairs(); - void tryInterpolation(DHT_Wrapper::DHT_ResultObject &dht_results, - std::vector &curr_mapping); + void tryInterpolation(WorkPackage &work_package); void resultsToWP(std::vector &work_package) const; diff --git a/include/poet/enums.hpp b/include/poet/enums.hpp new file mode 100644 index 000000000..11c3c1281 --- /dev/null +++ b/include/poet/enums.hpp @@ -0,0 +1,10 @@ +#ifndef ENUMS_H_ +#define ENUMS_H_ + +namespace poet { +enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_TOTAL }; + +enum CHEMISTRY_OUT_SOURCE { CHEM_PQC, CHEM_DHT, CHEM_INTERP }; +} // namespace poet + +#endif // ENUMS_H_ diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index acbd7af7d..6f194e8bc 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -293,16 +293,23 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { this->n_cells = trans_field.GetRequestedVecSize(); chem_field = Field(n_cells); - std::vector phreeqc_init; - this->getDumpedField(phreeqc_init); + std::vector> phreeqc_dump(this->nxyz); + this->getDumpedField(phreeqc_dump); if (is_sequential) { - std::vector init_vec{phreeqc_init}; - this->unshuffleField(phreeqc_init, n_cells, prop_count, 1, init_vec); + std::vector init_vec; + for (std::size_t i = 0; i < n_cells; i++) { + init_vec.insert(init_vec.end(), phreeqc_dump[i].begin(), + phreeqc_dump[i].end()); + } + + const auto tmp_buffer{init_vec}; + this->unshuffleField(tmp_buffer, n_cells, prop_count, 1, init_vec); chem_field.InitFromVec(init_vec, prop_names); return; } + std::vector &phreeqc_init = phreeqc_dump[0]; std::vector> initial_values; for (const auto &vec : trans_field.As2DVector()) { diff --git a/src/ChemistryModule/MasterFunctions.cpp b/src/ChemistryModule/MasterFunctions.cpp index b40341dbd..77bfa6023 100644 --- a/src/ChemistryModule/MasterFunctions.cpp +++ b/src/ChemistryModule/MasterFunctions.cpp @@ -90,8 +90,8 @@ std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const { MPI_Get_count(&probe, MPI_UINT32_T, &count); std::vector ret(count); - MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_DHT_EVICTIONS, - this->group_comm, NULL); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, + WORKER_DHT_EVICTIONS, this->group_comm, NULL); return ret; } @@ -322,9 +322,24 @@ void poet::ChemistryModule::RunCells() { void poet::ChemistryModule::MasterRunSequential() { std::vector shuffled_field = shuffleField(chem_field.AsVector(), n_cells, prop_count, 1); - this->setDumpedField(shuffled_field); + + std::vector> input; + for (std::size_t i = 0; i < n_cells; i++) { + input.push_back( + std::vector(shuffled_field.begin() + (i * prop_count), + shuffled_field.begin() + ((i + 1) * prop_count))); + } + + this->setDumpedField(input); PhreeqcRM::RunCells(); - this->getDumpedField(shuffled_field); + this->getDumpedField(input); + + shuffled_field.clear(); + for (std::size_t i = 0; i < n_cells; i++) { + shuffled_field.insert(shuffled_field.end(), input[i].begin(), + input[i].end()); + } + std::vector out_vec{shuffled_field}; unshuffleField(shuffled_field, n_cells, prop_count, 1, out_vec); chem_field.SetFromVector(out_vec); diff --git a/src/ChemistryModule/SurrogateModels/DHT.c b/src/ChemistryModule/SurrogateModels/DHT.c index 5275cb02c..0a7261049 100644 --- a/src/ChemistryModule/SurrogateModels/DHT.c +++ b/src/ChemistryModule/SurrogateModels/DHT.c @@ -1,4 +1,4 @@ -/// Time-stamp: "Last modified 2023-06-28 15:58:19 mluebke" +/// Time-stamp: "Last modified 2023-08-10 11:50:46 mluebke" /* ** Copyright (C) 2017-2021 Max Luebke (University of Potsdam) ** @@ -35,8 +35,8 @@ static void determine_dest(uint64_t hash, int comm_size, /** how many bytes do we need for one index? */ int index_size = sizeof(double) - (index_count - 1); for (int i = 0; i < index_count; i++) { - tmp_index = 0; - memcpy(&tmp_index, (char *)&hash + i, index_size); + tmp_index = (hash >> (i * 8)) & ((1ULL << (index_size * 8)) - 1); + /* memcpy(&tmp_index, (char *)&hash + i, index_size); */ index[i] = (uint64_t)(tmp_index % table_size); } *dest_rank = (unsigned int)(hash % comm_size); diff --git a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp index 86df22d49..bc3e8cfd5 100644 --- a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp +++ b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-01 13:41:57 mluebke" +// Time-stamp: "Last modified 2023-08-09 14:05:01 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -21,11 +21,11 @@ */ #include "poet/DHT_Wrapper.hpp" -#include "poet/DHT_Types.hpp" #include "poet/HashFunctions.hpp" #include "poet/Interpolation.hpp" #include "poet/LookupKey.hpp" #include "poet/Rounding.hpp" +#include "poet/enums.hpp" #include #include @@ -83,97 +83,68 @@ DHT_Wrapper::~DHT_Wrapper() { // free DHT DHT_free(dht_object, NULL, NULL); } -auto DHT_Wrapper::checkDHT(int length, double dt, - const std::vector &work_package, - std::vector &curr_mapping) +auto DHT_Wrapper::checkDHT(WorkPackage &work_package) -> const DHT_ResultObject & { - dht_results.length = length; - dht_results.keys.resize(length); - dht_results.results.resize(length); - dht_results.needPhreeqc.resize(length); + const auto length = work_package.size; std::vector bucket_writer(this->data_count + input_key_elements.size()); - std::vector new_mapping; // loop over every grid cell contained in work package for (int i = 0; i < length; i++) { // point to current grid cell - void *key = (void *)&(work_package[i * this->data_count]); - auto &data = dht_results.results[i]; auto &key_vector = dht_results.keys[i]; - // data.resize(this->data_count); - key_vector = fuzzForDHT(this->key_count, key, dt); - // overwrite input with data from DHT, IF value is found in DHT int res = DHT_read(this->dht_object, key_vector.data(), bucket_writer.data()); switch (res) { case DHT_SUCCESS: - dht_results.results[i] = inputAndRatesToOutput(bucket_writer); - dht_results.needPhreeqc[i] = false; + work_package.output[i] = inputAndRatesToOutput(bucket_writer); + work_package.mapping[i] = CHEM_DHT; this->dht_hits++; break; case DHT_READ_MISS: - dht_results.needPhreeqc[i] = true; - new_mapping.push_back(curr_mapping[i]); - dht_results.results[i] = std::vector{ - &work_package[i * this->data_count], - &work_package[i * this->data_count] + this->data_count}; - - // HACK: apply normalization to total H and O in results field of DHT - // dht_results.results[i][0] -= base_totals[0]; - // dht_results.results[i][1] -= base_totals[1]; break; } } - curr_mapping = std::move(new_mapping); - dht_results.old_values = work_package; - return dht_results; } -void DHT_Wrapper::fillDHT(int length, const std::vector &work_package) { +void DHT_Wrapper::fillDHT(const WorkPackage &work_package) { + + const auto length = work_package.size; // loop over every grid cell contained in work package dht_results.locations.resize(length); + dht_results.filledDHT = std::vector(length, false); for (int i = 0; i < length; i++) { // If true grid cell was simulated, needs to be inserted into dht - if (dht_results.needPhreeqc[i]) { + if (work_package.mapping[i] == CHEM_PQC) { // check if calcite or dolomite is absent and present, resp.n and vice // versa in input/output. If this is the case -> Do not write to DHT! // HACK: hardcoded, should be fixed! - if ((dht_results.old_values[i * this->data_count + 7] == 0) != - (work_package[i * this->data_count + 7] == 0)) { - dht_results.needPhreeqc[i] = false; + if ((work_package.input[i][7] == 0) != (work_package.output[i][7] == 0)) { continue; } - if ((dht_results.old_values[i * this->data_count + 9] == 0) != - (work_package[i * this->data_count + 9] == 0)) { - dht_results.needPhreeqc[i] = false; + if ((work_package.input[i][9] == 0) != (work_package.output[i][9] == 0)) { continue; } uint32_t proc, index; - const auto &key = dht_results.keys[i]; - const auto curr_old_data = std::vector( - dht_results.old_values.begin() + (i * this->data_count), - dht_results.old_values.begin() + ((i + 1) * this->data_count)); - const auto curr_new_data = std::vector( - work_package.begin() + (i * this->data_count), - work_package.begin() + ((i + 1) * this->data_count)); - const auto data = outputToInputAndRates(curr_old_data, curr_new_data); + auto &key = dht_results.keys[i]; + const auto data = + outputToInputAndRates(work_package.input[i], work_package.output[i]); // void *data = (void *)&(work_package[i * this->data_count]); // fuzz data (round, logarithm etc.) // insert simulated data with fuzzed key into DHT - int res = DHT_write(this->dht_object, (void *)(key.data()), + int res = DHT_write(this->dht_object, key.data(), const_cast(data.data()), &proc, &index); dht_results.locations[i] = {proc, index}; @@ -182,6 +153,8 @@ void DHT_Wrapper::fillDHT(int length, const std::vector &work_package) { if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { dht_evictions++; } + + dht_results.filledDHT[i] = true; } } } @@ -218,14 +191,14 @@ DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data) { return output; } -void DHT_Wrapper::resultsToWP(std::vector &work_package) { - for (int i = 0; i < dht_results.length; i++) { - if (!dht_results.needPhreeqc[i]) { - std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), - work_package.begin() + (data_count * i)); - } - } -} +// void DHT_Wrapper::resultsToWP(std::vector &work_package) { +// for (int i = 0; i < dht_results.length; i++) { +// if (!dht_results.needPhreeqc[i]) { +// std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), +// work_package.begin() + (data_count * i)); +// } +// } +// } int DHT_Wrapper::tableToFile(const char *filename) { int res = DHT_to_file(dht_object, filename); @@ -255,11 +228,10 @@ void DHT_Wrapper::printStatistics() { } } -LookupKey DHT_Wrapper::fuzzForDHT(int var_count, void *key, double dt) { +LookupKey DHT_Wrapper::fuzzForDHT(const std::vector &cell, double dt) { const auto c_zero_val = std::pow(10, AQUEOUS_EXP); - const Lookup_Keyelement dummy = {.0}; - LookupKey vecFuzz(var_count + 1, dummy); + LookupKey vecFuzz(this->key_count + 1, {.0}); DHT_Rounder rounder; int totals_i = 0; @@ -269,7 +241,7 @@ LookupKey DHT_Wrapper::fuzzForDHT(int var_count, void *key, double dt) { if (input_key_elements[i] == DHT_KEY_INPUT_CUSTOM) { continue; } - double curr_key = ((double *)key)[input_key_elements[i]]; + double curr_key = cell[input_key_elements[i]]; if (curr_key != 0) { if (curr_key < c_zero_val && this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { @@ -284,7 +256,7 @@ LookupKey DHT_Wrapper::fuzzForDHT(int var_count, void *key, double dt) { } } // add timestep to the end of the key as double value - vecFuzz[var_count].fp_element = dt; + vecFuzz[this->key_count].fp_element = dt; return vecFuzz; } diff --git a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp index e5c36fa0c..0321e6d8a 100644 --- a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp +++ b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-01 23:18:45 mluebke" +// Time-stamp: "Last modified 2023-08-09 13:41:53 mluebke" #include "poet/DHT_Wrapper.hpp" #include "poet/HashFunctions.hpp" @@ -48,31 +48,32 @@ void InterpolationModule::initPHT(std::uint32_t key_count, key_size, data_size, entries_per_bucket, size_per_process, communicator); } -void InterpolationModule::writePairs(const DHT_Wrapper::DHT_ResultObject &in) { - for (int i = 0; i < in.length; i++) { - if (in.needPhreeqc[i]) { +void InterpolationModule::writePairs() { + const auto in = this->dht_instance.getDHTResults(); + for (int i = 0; i < in.filledDHT.size(); i++) { + if (in.filledDHT[i]) { const auto coarse_key = roundKey(in.keys[i]); pht->writeLocationToPHT(coarse_key, in.locations[i]); } } } -void InterpolationModule::tryInterpolation( - DHT_Wrapper::DHT_ResultObject &dht_results, - std::vector &curr_mapping) { - interp_result.status.resize(dht_results.length, NOT_NEEDED); - interp_result.results.resize(dht_results.length, {}); +void InterpolationModule::tryInterpolation(WorkPackage &work_package) { + interp_result.status.resize(work_package.size, NOT_NEEDED); - for (int i = 0; i < dht_results.length; i++) { - if (!dht_results.needPhreeqc[i]) { + const auto dht_results = this->dht_instance.getDHTResults(); + + for (int i = 0; i < work_package.size; i++) { + if (work_package.mapping[i] != CHEM_PQC) { interp_result.status[i] = NOT_NEEDED; continue; } + const auto rounded_key = roundKey(dht_results.keys[i]); + auto pht_result = - pht->query(roundKey(dht_results.keys[i]), this->key_signifs.getValues(), - this->min_entries_needed, dht_instance.getInputCount(), - dht_instance.getOutputCount()); + pht->query(rounded_key, this->min_entries_needed, + dht_instance.getInputCount(), dht_instance.getOutputCount()); int pht_i = 0; @@ -85,9 +86,9 @@ void InterpolationModule::tryInterpolation( auto out_it = pht_result.out_values.begin() + pht_i; bool same_sig_calcite = (pht_result.in_values[pht_i][7] == 0) == - (dht_results.results[i][7] == 0); + (work_package.input[i][7] == 0); bool same_sig_dolomite = (pht_result.in_values[pht_i][8] == 0) == - (dht_results.results[i][9] == 0); + (work_package.input[i][9] == 0); if (!same_sig_calcite || !same_sig_dolomite) { pht_result.size -= 1; pht_result.in_values.erase(in_it); @@ -104,7 +105,7 @@ void InterpolationModule::tryInterpolation( } #ifdef POET_PHT_ADD - this->pht->incrementReadCounter(roundKey(dht_results.keys[i])); + this->pht->incrementReadCounter(roundKey(roundedKey)); #endif double start_fc = MPI_Wtime(); @@ -115,11 +116,11 @@ void InterpolationModule::tryInterpolation( // } // mean_water /= pht_result.size; - interp_result.results[i] = - f_interpolate(dht_instance.getKeyElements(), dht_results.results[i], + work_package.output[i] = + f_interpolate(dht_instance.getKeyElements(), work_package.input[i], pht_result.in_values, pht_result.out_values); - if (interp_result.results[i][7] < 0 || interp_result.results[i][9] < 0) { + if (work_package.output[i][7] < 0 || work_package.output[i][9] < 0) { interp_result.status[i] = INSUFFICIENT_DATA; continue; } @@ -129,10 +130,7 @@ void InterpolationModule::tryInterpolation( this->interpolations++; - curr_mapping.erase(std::remove(curr_mapping.begin(), curr_mapping.end(), i), - curr_mapping.end()); - - dht_results.needPhreeqc[i] = false; + work_package.mapping[i] = CHEM_INTERP; interp_result.status[i] = RES_OK; } } diff --git a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp index e0f014755..285ed21d3 100644 --- a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp +++ b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-01 17:11:42 mluebke" +// Time-stamp: "Last modified 2023-08-09 13:32:11 mluebke" #include "poet/DHT_Wrapper.hpp" #include "poet/HashFunctions.hpp" @@ -106,9 +106,8 @@ void ProximityHashTable::writeLocationToPHT(LookupKey key, } const ProximityHashTable::PHT_Result &ProximityHashTable::query( - const LookupKey &key, const std::vector &signif, - std::uint32_t min_entries_needed, std::uint32_t input_count, - std::uint32_t output_count) { + const LookupKey &key, const std::uint32_t min_entries_needed, + const std::uint32_t input_count, const std::uint32_t output_count) { double start_r = MPI_Wtime(); const auto cache_ret = localCache[key]; @@ -158,27 +157,6 @@ const ProximityHashTable::PHT_Result &ProximityHashTable::query( buffer += input_count; lookup_results.out_values.push_back( std::vector(buffer, buffer + output_count)); - - // if (!similarityCheck(check_key, key, signif)) { - // // TODO: original stored location in PHT was overwritten in DHT. - // Need to - // // handle this! - // lookup_results.size--; - // if (lookup_results.size < min_entries_needed) { - // lookup_results.size = 0; - // break; - // } - // continue; - // } - - // auto input = convertKeysFromDHT(buffer_start, dht_key_count); - // // remove timestep from the key - // input.pop_back(); - // lookup_results.in_keys.push_back(input); - - // auto *data = reinterpret_cast(buffer + dht_key_count); - // lookup_results.out_values.push_back( - // std::vector(data, data + dht_data_count)); } if (lookup_results.size != 0) { diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/ChemistryModule/WorkerFunctions.cpp index a87b07a86..eb9df84a2 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/ChemistryModule/WorkerFunctions.cpp @@ -1,10 +1,10 @@ -// Time-stamp: "Last modified 2023-08-01 17:22:20 mluebke" +// Time-stamp: "Last modified 2023-08-10 12:14:24 mluebke" -#include "IrmResult.h" #include "poet/ChemistryModule.hpp" #include "poet/DHT_Wrapper.hpp" #include "poet/Interpolation.hpp" +#include #include #include #include @@ -19,146 +19,6 @@ #include namespace poet { - -//std::vector -//inverseDistanceWeighting(const std::vector &to_calc, -// const std::vector &from, -// const std::vector> &input, -// const std::vector> &output) { -// std::vector results = from; -// -// const std::uint32_t buffer_size = input.size() + 1; -// double buffer[buffer_size]; -// double from_rescaled; -// -// const std::uint32_t data_set_n = input.size(); -// double rescaled[to_calc.size()][data_set_n + 1]; -// double weights[data_set_n]; -// -// // rescaling over all key elements -// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { -// const auto output_comp_i = to_calc[key_comp_i]; -// -// // rescale input between 0 and 1 -// for (int point_i = 0; point_i < data_set_n; point_i++) { -// rescaled[key_comp_i][point_i] = input[point_i][key_comp_i]; -// } -// -// rescaled[key_comp_i][data_set_n] = from[output_comp_i]; -// -// const double min = *std::min_element(rescaled[key_comp_i], -// rescaled[key_comp_i] + data_set_n + 1); -// const double max = *std::max_element(rescaled[key_comp_i], -// rescaled[key_comp_i] + data_set_n + 1); -// -// for (int point_i = 0; point_i < data_set_n; point_i++) { -// rescaled[key_comp_i][point_i] = -// ((max - min) != 0 -// ? (rescaled[key_comp_i][point_i] - min) / (max - min) -// : 0); -// } -// rescaled[key_comp_i][data_set_n] = -// ((max - min) != 0 ? (from[output_comp_i] - min) / (max - min) : 0); -// } -// -// // calculate distances for each data set -// double inv_sum = 0; -// for (int point_i = 0; point_i < data_set_n; point_i++) { -// double distance = 0; -// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { -// distance += std::pow( -// rescaled[key_comp_i][point_i] - rescaled[key_comp_i][data_set_n], 2); -// } -// weights[point_i] = 1 / std::sqrt(distance); -// assert(!std::isnan(weights[point_i])); -// inv_sum += weights[point_i]; -// } -// -// assert(!std::isnan(inv_sum)); -// -// // actual interpolation -// // bool has_h = false; -// // bool has_o = false; -// -// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { -// const auto output_comp_i = to_calc[key_comp_i]; -// double key_delta = 0; -// -// // if (interp_i == 0) { -// // has_h = true; -// // } -// -// // if (interp_i == 1) { -// // has_o = true; -// // } -// -// for (int j = 0; j < data_set_n; j++) { -// key_delta += weights[j] * input[j][key_comp_i]; -// } -// -// key_delta /= inv_sum; -// -// results[output_comp_i] = from[output_comp_i] + key_delta; -// } -// -// // if (!has_h) { -// // double new_val = 0; -// // for (int j = 0; j < data_set_n; j++) { -// // new_val += weights[j] * output[j][0]; -// // } -// // results[0] = new_val / inv_sum; -// // } -// -// // if (!has_h) { -// // double new_val = 0; -// // for (int j = 0; j < data_set_n; j++) { -// // new_val += weights[j] * output[j][1]; -// // } -// // results[1] = new_val / inv_sum; -// // } -// -// // for (std::uint32_t i = 0; i < to_calc.size(); i++) { -// // const std::uint32_t interp_i = to_calc[i]; -// -// // // rescale input between 0 and 1 -// // for (int j = 0; j < input.size(); j++) { -// // buffer[j] = input[j].at(i); -// // } -// -// // buffer[buffer_size - 1] = from[interp_i]; -// -// // const double min = *std::min_element(buffer, buffer + buffer_size); -// // const double max = *std::max_element(buffer, buffer + buffer_size); -// -// // for (int j = 0; j < input.size(); j++) { -// // buffer[j] = ((max - min) != 0 ? (buffer[j] - min) / (max - min) : 1); -// // } -// // from_rescaled = -// // ((max - min) != 0 ? (from[interp_i] - min) / (max - min) : 0); -// -// // double inv_sum = 0; -// -// // // calculate distances for each point -// // for (int i = 0; i < input.size(); i++) { -// // const double distance = std::pow(buffer[i] - from_rescaled, 2); -// -// // buffer[i] = distance > 0 ? (1 / std::sqrt(distance)) : 0; -// // inv_sum += buffer[i]; -// // } -// // // calculate new values -// // double new_val = 0; -// // for (int i = 0; i < output.size(); i++) { -// // new_val += buffer[i] * output[i][interp_i]; -// // } -// // results[interp_i] = new_val / inv_sum; -// // if (std::isnan(results[interp_i])) { -// // std::cout << "nan with new_val = " << output[0][i] << std::endl; -// // } -// // } -// -// return results; -//} - inline std::string get_string(int root, MPI_Comm communicator) { int count; MPI_Bcast(&count, 1, MPI_INT, root, communicator); @@ -256,8 +116,6 @@ void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, int double_count, struct worker_s &timings) { - int local_work_package_size = 0; - static int counter = 1; double dht_get_start, dht_get_end; @@ -268,12 +126,11 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, double dt; double current_sim_time; - const uint32_t n_cells_times_props = this->prop_count * this->wp_size; - std::vector vecCurrWP(n_cells_times_props + BUFFER_OFFSET); int count = double_count; + std::vector mpi_buffer(count); /* receive */ - MPI_Recv(vecCurrWP.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, + MPI_Recv(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, MPI_STATUS_IGNORE); /* decrement count of work_package by BUFFER_OFFSET */ @@ -283,72 +140,70 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, * mpi_buffer */ // work_package_size - local_work_package_size = vecCurrWP[count]; + poet::WorkPackage s_curr_wp(mpi_buffer[count]); // current iteration of simulation - iteration = vecCurrWP[count + 1]; + iteration = mpi_buffer[count + 1]; // current timestep size - dt = vecCurrWP[count + 2]; + dt = mpi_buffer[count + 2]; // current simulation time ('age' of simulation) - current_sim_time = vecCurrWP[count + 3]; + current_sim_time = mpi_buffer[count + 3]; /* 4th double value is currently a placeholder */ // placeholder = mpi_buffer[count+4]; - vecCurrWP.resize(n_cells_times_props); - std::vector vecMappingWP(local_work_package_size); + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { + s_curr_wp.input[wp_i] = + std::vector(mpi_buffer.begin() + this->prop_count * wp_i, + mpi_buffer.begin() + this->prop_count * (wp_i + 1)); + } - { - std::uint32_t i = 0; - std::generate(vecMappingWP.begin(), vecMappingWP.end(), - [&] { return i++; }); + // std::cout << this->comm_rank << ":" << counter++ << std::endl; + if (dht_enabled || interp_enabled) { + dht->prepareKeys(s_curr_wp.input, dt); } if (dht_enabled) { /* check for values in DHT */ dht_get_start = MPI_Wtime(); - dht->checkDHT(local_work_package_size, dt, vecCurrWP, vecMappingWP); + dht->checkDHT(s_curr_wp); dht_get_end = MPI_Wtime(); + timings.dht_get += dht_get_end - dht_get_start; + } - if (interp_enabled) { - interp->tryInterpolation(dht->getDHTResults(), vecMappingWP); - } + if (interp_enabled) { + interp->tryInterpolation(s_curr_wp); } phreeqc_time_start = MPI_Wtime(); - if (WorkerRunWorkPackage(vecCurrWP, vecMappingWP, current_sim_time, dt) != - IRM_OK) { - throw std::runtime_error("Phreeqc threw an error!"); + if (WorkerRunWorkPackage(s_curr_wp, current_sim_time, dt) != IRM_OK) { + std::cerr << "Phreeqc error" << std::endl; }; phreeqc_time_end = MPI_Wtime(); - if (dht_enabled) { - dht->resultsToWP(vecCurrWP); - if (interp_enabled) { - interp->resultsToWP(vecCurrWP); - } + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { + std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), + mpi_buffer.begin() + this->prop_count * wp_i); } /* send results to master */ MPI_Request send_req; - MPI_Isend(vecCurrWP.data(), count, MPI_DOUBLE, 0, LOOP_WORK, MPI_COMM_WORLD, + MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, MPI_COMM_WORLD, &send_req); - if (dht_enabled) { + if (dht_enabled || interp_enabled) { /* write results to DHT */ dht_fill_start = MPI_Wtime(); - dht->fillDHT(local_work_package_size, vecCurrWP); + dht->fillDHT(s_curr_wp); dht_fill_end = MPI_Wtime(); if (interp_enabled) { - interp->writePairs(dht->getDHTResults()); + interp->writePairs(); } - - timings.dht_get += dht_get_end - dht_get_start; timings.dht_fill += dht_fill_end - dht_fill_start; } @@ -429,36 +284,39 @@ void poet::ChemistryModule::WorkerReadDHTDump( } IRM_RESULT -poet::ChemistryModule::WorkerRunWorkPackage( - std::vector &vecWP, std::vector &vecMapping, - double dSimTime, double dTimestep) { - if ((this->wp_size * this->prop_count) != vecWP.size()) { - return IRM_INVALIDARG; - } - +poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package, + double dSimTime, double dTimestep) { // check if we actually need to start phreeqc - bool bRunPhreeqc = false; - for (const auto &aMappingNum : vecMapping) { - if (aMappingNum != -1) { - bRunPhreeqc = true; - break; + std::vector pqc_mapping; + + for (std::size_t i = 0; i < work_package.size; i++) { + if (work_package.mapping[i] == CHEM_PQC) { + pqc_mapping.push_back(i); } } - if (!bRunPhreeqc) { + if (pqc_mapping.empty()) { return IRM_OK; } IRM_RESULT result; - this->PhreeqcRM::setPOETMapping(vecMapping); - this->setDumpedField(vecWP); + this->PhreeqcRM::setPOETMapping(pqc_mapping); + this->setDumpedField(work_package.input); this->PhreeqcRM::SetTime(dSimTime); this->PhreeqcRM::SetTimeStep(dTimestep); result = this->PhreeqcRM::RunCells(); - this->getDumpedField(vecWP); + std::vector> output_tmp(work_package.size); + + this->getDumpedField(output_tmp); + + for (std::size_t i = 0; i < work_package.size; i++) { + if (work_package.mapping[i] == CHEM_PQC) { + work_package.output[i] = output_tmp[i]; + } + } return result; } diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 8088609cf..4348917f5 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -18,15 +18,17 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include "poet/enums.hpp" #include #include #include -#include #include #include #include - +#include +#include +#include #include #include #include @@ -37,202 +39,233 @@ using namespace std; using namespace Rcpp; poet::GridParams::s_GridParams(RInside &R) { - auto tmp_n_cells = - Rcpp::as>(R.parseEval("mysetup$grid$n_cells")); - assert(tmp_n_cells.size() < 3); + auto tmp_n_cells = + Rcpp::as>(R.parseEval("mysetup$grid$n_cells")); + assert(tmp_n_cells.size() < 3); - this->dim = tmp_n_cells.size(); + this->dim = tmp_n_cells.size(); - std::copy(tmp_n_cells.begin(), tmp_n_cells.end(), this->n_cells.begin()); + std::copy(tmp_n_cells.begin(), tmp_n_cells.end(), this->n_cells.begin()); - auto tmp_s_cells = - Rcpp::as>(R.parseEval("mysetup$grid$s_cells")); + auto tmp_s_cells = + Rcpp::as>(R.parseEval("mysetup$grid$s_cells")); - assert(tmp_s_cells.size() == this->dim); + assert(tmp_s_cells.size() == this->dim); - std::copy(tmp_s_cells.begin(), tmp_s_cells.end(), this->s_cells.begin()); + std::copy(tmp_s_cells.begin(), tmp_s_cells.end(), this->s_cells.begin()); - this->total_n = - (dim == 1 ? this->n_cells[0] : this->n_cells[0] * this->n_cells[1]); + this->total_n = + (dim == 1 ? this->n_cells[0] : this->n_cells[0] * this->n_cells[1]); - this->type = Rcpp::as(R.parseEval("mysetup$grid$type")); + this->type = Rcpp::as(R.parseEval("mysetup$grid$type")); } poet::DiffusionParams::s_DiffusionParams(RInside &R) { - this->initial_t = - Rcpp::as(R.parseEval("mysetup$diffusion$init")); - this->alpha = - Rcpp::as(R.parseEval("mysetup$diffusion$alpha")); - if (Rcpp::as( - R.parseEval("'vecinj_inner' %in% names(mysetup$diffusion)"))) { - this->vecinj_inner = - Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_inner")); - } - this->vecinj = - Rcpp::as(R.parseEval("mysetup$diffusion$vecinj")); - this->vecinj_index = - Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_index")); + this->initial_t = + Rcpp::as(R.parseEval("mysetup$diffusion$init")); + this->alpha = + Rcpp::as(R.parseEval("mysetup$diffusion$alpha")); + if (Rcpp::as( + R.parseEval("'vecinj_inner' %in% names(mysetup$diffusion)"))) { + this->vecinj_inner = + Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_inner")); + } + this->vecinj = + Rcpp::as(R.parseEval("mysetup$diffusion$vecinj")); + this->vecinj_index = + Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_index")); } void poet::ChemistryParams::initFromR(RInsidePOET &R) { - this->database_path = - Rcpp::as(R.parseEval("mysetup$chemistry$database")); - this->input_script = - Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); + this->database_path = + Rcpp::as(R.parseEval("mysetup$chemistry$database")); + this->input_script = + Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); - if (R.checkIfExists("dht_species", "mysetup$chemistry")) { - this->dht_signifs = - R.wrapNamedVector("mysetup$chemistry$dht_species"); - } + if (R.checkIfExists("dht_species", "mysetup$chemistry")) { + this->dht_signifs = + R.wrapNamedVector("mysetup$chemistry$dht_species"); + } - if (R.checkIfExists("pht_species", "mysetup$chemistry")) { - this->pht_signifs = - R.wrapNamedVector("mysetup$chemistry$pht_species"); - } + if (R.checkIfExists("pht_species", "mysetup$chemistry")) { + this->pht_signifs = + R.wrapNamedVector("mysetup$chemistry$pht_species"); + } } SimParams::SimParams(int world_rank_, int world_size_) { - this->simparams.world_rank = world_rank_; - this->simparams.world_size = world_size_; + this->simparams.world_rank = world_rank_; + this->simparams.world_size = world_size_; } int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { - // initialize argh object - argh::parser cmdl(argv); + // initialize argh object + argh::parser cmdl(argv); - // if user asked for help - if (cmdl[{"help", "h"}]) { - if (simparams.world_rank == 0) { - MSG("Todo"); - MSG("See README.md for further information."); - } - return poet::PARSER_HELP; + // if user asked for help + if (cmdl[{"help", "h"}]) { + if (simparams.world_rank == 0) { + MSG("Todo"); + MSG("See README.md for further information."); } - // if positional arguments are missing - else if (!cmdl(2)) { - if (simparams.world_rank == 0) { - ERRMSG("Kin needs 2 positional arguments: "); - ERRMSG("1) the R script defining your simulation and"); - ERRMSG("2) the directory prefix where to save results and profiling"); - } - return poet::PARSER_ERROR; + return poet::PARSER_HELP; + } + // if positional arguments are missing + else if (!cmdl(2)) { + if (simparams.world_rank == 0) { + ERRMSG("Kin needs 2 positional arguments: "); + ERRMSG("1) the R script defining your simulation and"); + ERRMSG("2) the directory prefix where to save results and profiling"); } + return poet::PARSER_ERROR; + } - // collect all parameters which are not known, print them to stderr and return - // with PARSER_ERROR - std::list optionsError = validateOptions(cmdl); - if (!optionsError.empty()) { - if (simparams.world_rank == 0) { - ERRMSG("Unrecognized option(s):\n"); - for (auto option : optionsError) { - ERRMSG(std::string(option)); - } - ERRMSG("Make sure to use available options. Exiting!"); - } - return poet::PARSER_ERROR; + // collect all parameters which are not known, print them to stderr and return + // with PARSER_ERROR + std::list optionsError = validateOptions(cmdl); + if (!optionsError.empty()) { + if (simparams.world_rank == 0) { + ERRMSG("Unrecognized option(s):\n"); + for (auto option : optionsError) { + ERRMSG(std::string(option)); + } + ERRMSG("Make sure to use available options. Exiting!"); } - - simparams.print_progressbar = cmdl[{"P", "progress"}]; + return poet::PARSER_ERROR; + } - /*Parse DHT arguments*/ - chem_params.use_dht = cmdl["dht"]; - // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; + simparams.print_progressbar = cmdl[{"P", "progress"}]; + + simparams.print_progressbar = cmdl[{"P", "progress"}]; + + /*Parse DHT arguments*/ + chem_params.use_dht = cmdl["dht"]; + chem_params.use_interp = cmdl["interp"]; + // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; + + cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> chem_params.dht_size; + // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << + // endl; + + cmdl("dht-snaps", 0) >> chem_params.dht_snaps; + + cmdl("dht-file") >> chem_params.dht_file; + /*Parse work package size*/ + cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; + + cmdl("interp-size", 100) >> chem_params.pht_size; + cmdl("interp-min", 5) >> chem_params.interp_min_entries; + cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; + + /*Parse output options*/ + simparams.store_result = !cmdl["ignore-result"]; + + cout << "CPP: Complete results storage is " + << (simparams.store_result ? "ON" : "OFF") << endl; + cout << "CPP: Work Package Size: " << simparams.wp_size << endl; + cout << "CPP: DHT is " << (chem_params.use_dht ? "ON" : "OFF") << '\n'; + + if (chem_params.use_dht || chem_params.use_interp) { + cout << "CPP: DHT strategy is " << simparams.dht_strategy << endl; + cout << "CPP: DHT key default digits (ignored if 'signif_vector' is " + "defined) = " + << simparams.dht_significant_digits << endl; + cout << "CPP: DHT logarithm before rounding: " + << (simparams.dht_log ? "ON" : "OFF") << endl; + cout << "CPP: DHT size per process (Byte) = " << chem_params.dht_size + << endl; + cout << "CPP: DHT save snapshots is " << chem_params.dht_snaps << endl; + cout << "CPP: DHT load file is " << chem_params.dht_file << endl; + } + /*Parse work package size*/ + cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; + + chem_params.use_interp = cmdl["interp"]; + cmdl("interp-size", 100) >> chem_params.pht_size; + cmdl("interp-min", 5) >> chem_params.interp_min_entries; + cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; + + /*Parse output options*/ + simparams.store_result = !cmdl["ignore-result"]; + + if (simparams.world_rank == 0) { + MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); + MSG("Work Package Size: " + std::to_string(simparams.wp_size)); + MSG("DHT is " + BOOL_PRINT(chem_params.use_dht)); if (chem_params.use_dht) { - cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> chem_params.dht_size; - // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << - // endl; - - cmdl("dht-snaps", 0) >> chem_params.dht_snaps; - - cmdl("dht-file") >> chem_params.dht_file; - } - /*Parse work package size*/ - cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; - - chem_params.use_interp = cmdl["interp"]; - cmdl("interp-size", 100) >> chem_params.pht_size; - cmdl("interp-min", 5) >> chem_params.interp_min_entries; - cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; - - /*Parse output options*/ - simparams.store_result = !cmdl["ignore-result"]; - - if (simparams.world_rank == 0) { - MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); - MSG("Work Package Size: " + std::to_string(simparams.wp_size)); - MSG("DHT is " + BOOL_PRINT(chem_params.use_dht)); - - if (chem_params.use_dht) { - MSG("DHT strategy is " + std::to_string(simparams.dht_strategy)); - // MDL: these should be outdated (?) - // MSG("DHT key default digits (ignored if 'signif_vector' is " - // "defined) = " - // << simparams.dht_significant_digits); - // MSG("DHT logarithm before rounding: " - // << (simparams.dht_log ? "ON" : "OFF")); - MSG("DHT size per process (Byte) = " + std::to_string(chem_params.dht_size)); - MSG("DHT save snapshots is " + BOOL_PRINT(chem_params.dht_snaps)); - MSG("DHT load file is " + chem_params.dht_file); - } - - if (chem_params.use_interp) { - MSG("PHT interpolation enabled: " + BOOL_PRINT(chem_params.use_interp)); - MSG("PHT interp-size = " + std::to_string(chem_params.pht_size)); - MSG("PHT interp-min = " + std::to_string(chem_params.interp_min_entries)); - MSG("PHT interp-bucket-entries = " + std::to_string(chem_params.pht_max_entries)); - } + MSG("DHT strategy is " + std::to_string(simparams.dht_strategy)); + // MDL: these should be outdated (?) + // MSG("DHT key default digits (ignored if 'signif_vector' is " + // "defined) = " + // << simparams.dht_significant_digits); + // MSG("DHT logarithm before rounding: " + // << (simparams.dht_log ? "ON" : "OFF")); + MSG("DHT size per process (Byte) = " + + std::to_string(chem_params.dht_size)); + MSG("DHT save snapshots is " + BOOL_PRINT(chem_params.dht_snaps)); + MSG("DHT load file is " + chem_params.dht_file); } - cmdl(1) >> filesim; - cmdl(2) >> out_dir; + if (chem_params.use_interp) { + MSG("PHT interpolation enabled: " + BOOL_PRINT(chem_params.use_interp)); + MSG("PHT interp-size = " + std::to_string(chem_params.pht_size)); + MSG("PHT interp-min = " + + std::to_string(chem_params.interp_min_entries)); + MSG("PHT interp-bucket-entries = " + + std::to_string(chem_params.pht_max_entries)); + } + } - chem_params.dht_outdir = out_dir; + cmdl(1) >> filesim; + cmdl(2) >> out_dir; - /* distribute information to R runtime */ - // if local_rank == 0 then master else worker - R["local_rank"] = simparams.world_rank; - // assign a char* (string) to 'filesim' - R["filesim"] = wrap(filesim); - // assign a char* (string) to 'fileout' - R["fileout"] = wrap(out_dir); - // pass the boolean "store_result" to the R process - R["store_result"] = simparams.store_result; - // worker count - R["n_procs"] = simparams.world_size - 1; - // work package size - R["work_package_size"] = simparams.wp_size; - // dht enabled? - R["dht_enabled"] = chem_params.use_dht; - // log before rounding? - R["dht_log"] = simparams.dht_log; + chem_params.dht_outdir = out_dir; - // eval the init string, ignoring any returns - R.parseEvalQ("source(filesim)"); - R.parseEvalQ("mysetup <- setup"); + /* distribute information to R runtime */ + // if local_rank == 0 then master else worker + R["local_rank"] = simparams.world_rank; + // assign a char* (string) to 'filesim' + R["filesim"] = wrap(filesim); + // assign a char* (string) to 'fileout' + R["fileout"] = wrap(out_dir); + // pass the boolean "store_result" to the R process + R["store_result"] = simparams.store_result; + // worker count + R["n_procs"] = simparams.world_size - 1; + // work package size + R["work_package_size"] = simparams.wp_size; + // dht enabled? + R["dht_enabled"] = chem_params.use_dht; + // log before rounding? + R["dht_log"] = simparams.dht_log; - this->chem_params.initFromR(R); + // eval the init string, ignoring any returns + R.parseEvalQ("source(filesim)"); + R.parseEvalQ("mysetup <- setup"); - return poet::PARSER_OK; + this->chem_params.initFromR(R); + + return poet::PARSER_OK; } void SimParams::initVectorParams(RInside &R) {} std::list SimParams::validateOptions(argh::parser cmdl) { - /* store all unknown parameters here */ - std::list retList; + /* store all unknown parameters here */ + std::list retList; - /* loop over all flags and compare to given flaglist*/ - for (auto &flag : cmdl.flags()) { - if (!(flaglist.find(flag) != flaglist.end())) - retList.push_back(flag); - } + /* loop over all flags and compare to given flaglist*/ + for (auto &flag : cmdl.flags()) { + if (!(flaglist.find(flag) != flaglist.end())) + retList.push_back(flag); + } - /* and loop also over params and compare to given paramlist */ - for (auto ¶m : cmdl.params()) { - if (!(paramlist.find(param.first) != paramlist.end())) - retList.push_back(param.first); - } + /* and loop also over params and compare to given paramlist */ + for (auto ¶m : cmdl.params()) { + if (!(paramlist.find(param.first) != paramlist.end())) + retList.push_back(param.first); + } - return retList; + return retList; } From f81fdfa3b41314d0d3d3e2099df4496df4b8acb3 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Thu, 10 Aug 2023 14:17:17 +0200 Subject: [PATCH 13/52] fix: remove reworked part in output --- src/SimParams.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 4348917f5..761de8f3f 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -160,23 +160,6 @@ int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { /*Parse output options*/ simparams.store_result = !cmdl["ignore-result"]; - cout << "CPP: Complete results storage is " - << (simparams.store_result ? "ON" : "OFF") << endl; - cout << "CPP: Work Package Size: " << simparams.wp_size << endl; - cout << "CPP: DHT is " << (chem_params.use_dht ? "ON" : "OFF") << '\n'; - - if (chem_params.use_dht || chem_params.use_interp) { - cout << "CPP: DHT strategy is " << simparams.dht_strategy << endl; - cout << "CPP: DHT key default digits (ignored if 'signif_vector' is " - "defined) = " - << simparams.dht_significant_digits << endl; - cout << "CPP: DHT logarithm before rounding: " - << (simparams.dht_log ? "ON" : "OFF") << endl; - cout << "CPP: DHT size per process (Byte) = " << chem_params.dht_size - << endl; - cout << "CPP: DHT save snapshots is " << chem_params.dht_snaps << endl; - cout << "CPP: DHT load file is " << chem_params.dht_file << endl; - } /*Parse work package size*/ cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; From 2d22daa8206a3c9acc333a08de48cd1e6295cea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Fri, 11 Aug 2023 10:19:02 +0200 Subject: [PATCH 14/52] ci: always run build and test stage on new commits --- .gitlab-ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 35eabee89..cb7391eb7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -34,8 +34,6 @@ build-poet: # This job runs in the build stage, which runs first. - mkdir build && cd build - cmake -DPOET_ENABLE_TESTING=ON .. - make -j$(nproc) - rules: - - if: $CI_PIPELINE_SOURCE == 'merge_request_event' artifacts: paths: - build @@ -47,8 +45,6 @@ test-poet: script: - cd build - make -j$(nproc) check - rules: - - if: $CI_PIPELINE_SOURCE == 'merge_request_event' archive-sources: # This job runs in the build stage, which runs first. image: python:3 From d4f525ccbff7cf552b5566b34aa4c0809d07e108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Fri, 11 Aug 2023 10:26:40 +0200 Subject: [PATCH 15/52] ci: add expiration date for build artifacts --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cb7391eb7..1a7bf2db0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -37,6 +37,7 @@ build-poet: # This job runs in the build stage, which runs first. artifacts: paths: - build + expire_in: 1 day test-poet: stage: test From fef482f5e0295ab77cb82640d02bedaebf608cf9 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Fri, 11 Aug 2023 11:04:11 +0200 Subject: [PATCH 16/52] fix: rounded_key instead of roundedKey --- src/ChemistryModule/SurrogateModels/InterpolationModule.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp index 0321e6d8a..1de57554b 100644 --- a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp +++ b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-09 13:41:53 mluebke" +// Time-stamp: "Last modified 2023-08-11 11:01:11 delucia" #include "poet/DHT_Wrapper.hpp" #include "poet/HashFunctions.hpp" @@ -105,7 +105,7 @@ void InterpolationModule::tryInterpolation(WorkPackage &work_package) { } #ifdef POET_PHT_ADD - this->pht->incrementReadCounter(roundKey(roundedKey)); + this->pht->incrementReadCounter(roundKey(rounded_key)); #endif double start_fc = MPI_Wtime(); From 344f73af2317a41b7a438be89b092c2a5a2bed5a Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Fri, 11 Aug 2023 14:44:26 +0200 Subject: [PATCH 17/52] fix: unnest pht- and dht-related outputs and stats collection in WorkerPostIter and WorkerPostSim --- src/ChemistryModule/WorkerFunctions.cpp | 45 +++++++++++++------------ 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/ChemistryModule/WorkerFunctions.cpp index eb9df84a2..a46030f79 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/ChemistryModule/WorkerFunctions.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-10 12:14:24 mluebke" +// Time-stamp: "Last modified 2023-08-11 14:07:03 delucia" #include "poet/ChemistryModule.hpp" #include "poet/DHT_Wrapper.hpp" @@ -216,38 +216,38 @@ void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, uint32_t iteration) { MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, MPI_STATUS_IGNORE); + if (this->dht_enabled) { dht_hits.push_back(dht->getHits()); dht_evictions.push_back(dht->getEvictions()); - dht->resetCounter(); - - if (this->interp_enabled) { - interp_calls.push_back(interp->getInterpolationCount()); - interp->resetCounter(); - interp->writePHTStats(); - } - + dht->resetCounter(); + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); - - if (this->interp_enabled) { - std::stringstream out; - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".pht"; - interp->dumpPHTState(out.str()); - } } } + + if (this->interp_enabled) { + std::stringstream out; + interp_calls.push_back(interp->getInterpolationCount()); + interp->resetCounter(); + interp->writePHTStats(); + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } } + + void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { if (this->dht_enabled && this->dht_snaps_type == DHT_SNAPS_SIMEND) { WorkerWriteDHTDump(iteration); - if (this->interp_enabled) { - std::stringstream out; - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".pht"; - interp->dumpPHTState(out.str()); - } + } + if (this->interp_enabled) { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); } } @@ -263,6 +263,7 @@ void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { std::cout << "CPP: Worker: Successfully written DHT to file " << out.str() << "\n"; } + void poet::ChemistryModule::WorkerReadDHTDump( const std::string &dht_input_file) { int res = dht->fileToTable((char *)dht_input_file.c_str()); From 74ebe3e1f10116136f728419e020cfcc42ae99cb Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Tue, 15 Aug 2023 17:33:10 +0200 Subject: [PATCH 18/52] fix: only write PHT dump file if advised to do so --- src/ChemistryModule/WorkerFunctions.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/ChemistryModule/WorkerFunctions.cpp index a46030f79..e13641941 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/ChemistryModule/WorkerFunctions.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-11 14:07:03 delucia" +// Time-stamp: "Last modified 2023-08-15 17:33:00 mluebke" #include "poet/ChemistryModule.hpp" #include "poet/DHT_Wrapper.hpp" @@ -232,18 +232,20 @@ void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, interp_calls.push_back(interp->getInterpolationCount()); interp->resetCounter(); interp->writePHTStats(); - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".pht"; - interp->dumpPHTState(out.str()); + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } } } void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { - if (this->dht_enabled && this->dht_snaps_type == DHT_SNAPS_SIMEND) { + if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); } - if (this->interp_enabled) { + if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { std::stringstream out; out << this->dht_file_out_dir << "/iter_" << std::setfill('0') << std::setw(this->file_pad) << iteration << ".pht"; From 4660fd4c4809b2e68f5d9e579e77e096a4610bff Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Wed, 16 Aug 2023 12:21:19 +0200 Subject: [PATCH 19/52] chore: add .cache to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 79e7da401..8cb918a01 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ vignettes/*.pdf # End of https://www.toptal.com/developers/gitignore/api/c,c++,r,cmake build/ +/.cache/ From a5c8982b9838281b98ccb8c7b5118807f57d09a0 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Thu, 17 Aug 2023 10:51:54 +0200 Subject: [PATCH 20/52] feat: implement SEXP export of Field structure feat: implement NamedVector based on Rcpp::NumericVector feat: remove hard coded checks and substitute by R hook functions, defined in the input script refactor: modify API of DHT_Wrapper/InterpolationModule to expect a work package structure, where input/output values are stored as 2D vectors test: add tests for NamedVector test: extend tests for Field --- app/poet.cpp | 37 +-- bench/dolo/dolo_diffu_inner.R | 106 ++++++--- bench/dolo/dolo_diffu_inner_large.R | 108 ++++++--- bench/dolo/dolo_interp_long.R | 85 +++++-- include/poet/ChemistryModule.hpp | 5 +- include/poet/DHT_Wrapper.hpp | 17 +- include/poet/DataStructures.hpp | 199 ++++++++++++++-- include/poet/DiffusionModule.hpp | 5 +- include/poet/Field.hpp | 159 ------------- include/poet/Interpolation.hpp | 19 +- include/poet/LookupKey.hpp | 4 +- include/poet/RInsidePOET.hpp | 54 +++-- include/poet/SimParams.hpp | 14 +- src/ChemistryModule/ChemistryModule.cpp | 22 +- src/ChemistryModule/MasterFunctions.cpp | 4 +- .../SurrogateModels/DHT_Wrapper.cpp | 52 ++++- .../SurrogateModels/InterpolationModule.cpp | 84 +++---- .../SurrogateModels/ProximityHashTable.cpp | 6 +- src/ChemistryModule/WorkerFunctions.cpp | 15 +- src/DataStructures/Field.cpp | 215 +++++++++++++----- src/DiffusionModule.cpp | 53 ++--- src/SimParams.cpp | 16 +- test/CMakeLists.txt | 4 + test/RInsidePOET_funcs.R | 22 ++ test/testDataStructures.hpp.in | 6 + .../{testDataStructures.cpp => testField.cpp} | 62 ++++- test/testNamedVector.cpp | 63 +++++ 27 files changed, 931 insertions(+), 505 deletions(-) delete mode 100644 include/poet/Field.hpp create mode 100644 test/RInsidePOET_funcs.R create mode 100644 test/testDataStructures.hpp.in rename test/{testDataStructures.cpp => testField.cpp} (71%) create mode 100644 test/testNamedVector.cpp diff --git a/app/poet.cpp b/app/poet.cpp index 51dc51fd9..4095cf858 100644 --- a/app/poet.cpp +++ b/app/poet.cpp @@ -21,12 +21,12 @@ #include #include #include -#include +#include #include #include +#include #include #include -#include #include #include @@ -153,13 +153,15 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, /* displaying iteration number, with C++ and R iterator */ MSG("Going through iteration " + std::to_string(iter)); - MSG("R's $iter: " + std::to_string((uint32_t)(R.parseEval("mysetup$iter"))) + ". Iteration"); + MSG("R's $iter: " + + std::to_string((uint32_t)(R.parseEval("mysetup$iter"))) + + ". Iteration"); /* run transport */ // TODO: transport to diffusion diffusion.simulate(dt); - chem.getField().UpdateFromField(diffusion.getField()); + chem.getField().update(diffusion.getField()); MSG("Chemistry step"); @@ -167,7 +169,7 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, chem.RunCells(); writeFieldsToR(R, diffusion.getField(), chem.GetField()); - diffusion.getField().UpdateFromField(chem.GetField()); + diffusion.getField().update(chem.GetField()); R["req_dt"] = dt; R["simtime"] = (sim_time += dt); @@ -181,7 +183,8 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, // store_result is TRUE) R.parseEvalQ("mysetup <- master_iteration_end(setup=mysetup)"); - MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); + MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + + std::to_string(maxiter)); MSG(); // MPI_Barrier(MPI_COMM_WORLD); @@ -258,15 +261,16 @@ int main(int argc, char *argv[]) { MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + RInsidePOET &R = RInsidePOET::getInstance(); + if (world_rank == 0) { - MSG("Running POET version " + std::string(poet_version)); + MSG("Running POET version " + std::string(poet_version)); } if (world_rank > 0) { { - SimParams params(world_rank, world_size); - int pret = params.parseFromCmdl(argv, RInsidePOET::getInstance()); + int pret = params.parseFromCmdl(argv, R); if (pret == poet::PARSER_ERROR) { MPI_Finalize(); @@ -286,7 +290,7 @@ int main(int argc, char *argv[]) { } MPI_Barrier(MPI_COMM_WORLD); - + MSG("finished, cleanup of process " + std::to_string(world_rank)); MPI_Finalize(); @@ -294,8 +298,6 @@ int main(int argc, char *argv[]) { return EXIT_SUCCESS; } - RInsidePOET &R = RInsidePOET::getInstance(); - /*Loading Dependencies*/ // TODO: kann raus std::string r_load_dependencies = "source('../R_lib/kin_r_library.R');"; @@ -326,12 +328,12 @@ int main(int argc, char *argv[]) { // MDL: store all parameters if (world_rank == 0) { - MSG("Calling R Function to store calling parameters"); - R.parseEvalQ("StoreSetup(setup=mysetup)"); + MSG("Calling R Function to store calling parameters"); + R.parseEvalQ("StoreSetup(setup=mysetup)"); } if (world_rank == 0) { - MSG("Init done on process with rank " + std::to_string(world_rank)); + MSG("Init done on process with rank " + std::to_string(world_rank)); } // MPI_Barrier(MPI_COMM_WORLD); @@ -351,7 +353,8 @@ int main(int argc, char *argv[]) { r_vis_code = "saveRDS(profiling, file=paste0(fileout,'/timings.rds'));"; R.parseEval(r_vis_code); - MSG("Done! Results are stored as R objects into <" + params.getOutDir() + "/timings.rds>"); + MSG("Done! Results are stored as R objects into <" + params.getOutDir() + + "/timings.rds>"); MPI_Barrier(MPI_COMM_WORLD); @@ -359,7 +362,7 @@ int main(int argc, char *argv[]) { MPI_Finalize(); if (world_rank == 0) { - MSG("done, bye!"); + MSG("done, bye!"); } exit(0); diff --git a/bench/dolo/dolo_diffu_inner.R b/bench/dolo/dolo_diffu_inner.R index 071fe36c1..a16f8a49c 100644 --- a/bench/dolo/dolo_diffu_inner.R +++ b/bench/dolo/dolo_diffu_inner.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-08-02 13:59:02 mluebke" +## Time-stamp: "Last modified 2023-08-16 17:04:42 mluebke" database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") @@ -62,34 +62,34 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( - list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ), - list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0.0, - "Cl" = 0.004, - "Mg" = 0.002 - ) + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ) ) vecinj_inner <- list( - l1 = c(1,20,20), - l2 = c(2,80,80), - l3 = c(2,60,80) + l1 = c(1, 20, 20), + l2 = c(2, 80, 80), + l3 = c(2, 60, 80) ) boundary <- list( -# "N" = c(1, rep(0, n-1)), + # "N" = c(1, rep(0, n-1)), "N" = rep(0, n), "E" = rep(0, n), "S" = rep(0, n), @@ -116,21 +116,59 @@ diffusion <- list( ## # Needed when using DHT -dht_species <- c("H" = 10, - "O" = 10, - "Charge" = 3, - "C(4)" = 5, - "Ca" = 5, - "Cl" = 5, - "Mg" = 5, - "Calcite" = 5, - "Dolomite" =5 - ) +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" = 5 +) + +check_sign_cal_dol_dht <- function(old, new) { + if ((old["Calcite"] == 0) != (new["Calcite"] == 0)) { + return(TRUE) + } + if ((old["Dolomite"] == 0) != (new["Dolomite"] == 0)) { + return(TRUE) + } + return(FALSE) +} + +fuzz_input_dht_keys <- function(input) { + return(input[names(dht_species)]) +} + +check_sign_cal_dol_interp <- function(to_interp, data_set) { + data_set <- as.data.frame(do.call(rbind, data_set), check.names = FALSE, optional = TRUE) + names(data_set) <- names(dht_species) + cal <- (data_set$Calcite == 0) == (to_interp["Calcite"] == 0) + dol <- (data_set$Dolomite == 0) == (to_interp["Dolomite"] == 0) + + cal_dol_same_sig <- cal == dol + return(rev(which(!cal_dol_same_sig))) +} + +check_neg_cal_dol <- function(result) { + neg_sign <- (result["Calcite"] <- 0) || (result["Dolomite"] < 0) + return(any(neg_sign)) +} + +hooks <- list( + dht_fill = check_sign_cal_dol_dht, + dht_fuzz = fuzz_input_dht_keys, + interp_pre_func = check_sign_cal_dol_interp, + interp_post_func = check_neg_cal_dol +) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species + dht_species = dht_species, + hooks = hooks ) ################################################################# diff --git a/bench/dolo/dolo_diffu_inner_large.R b/bench/dolo/dolo_diffu_inner_large.R index 4403ae46f..ce1da8246 100644 --- a/bench/dolo/dolo_diffu_inner_large.R +++ b/bench/dolo/dolo_diffu_inner_large.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-08-02 13:59:12 mluebke" +## Time-stamp: "Last modified 2023-08-16 17:05:04 mluebke" database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") @@ -62,34 +62,34 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( - list( - "H" = 0.0001540445, - "O" = 0.002148006, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ), - list( - "H" = 0.0001610193, - "O" = 0.002386934, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0.0, - "Cl" = 0.004, - "Mg" = 0.002 - ) + list( + "H" = 0.0001540445, + "O" = 0.002148006, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 0.0001610193, + "O" = 0.002386934, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ) ) vecinj_inner <- list( - l1 = c(1,400,200), - l2 = c(2,1400,800), - l3 = c(2,1600,800) + l1 = c(1, 400, 200), + l2 = c(2, 1400, 800), + l3 = c(2, 1600, 800) ) boundary <- list( -# "N" = c(1, rep(0, n-1)), + # "N" = c(1, rep(0, n-1)), "N" = rep(0, n), "E" = rep(0, m), "S" = rep(0, n), @@ -115,21 +115,59 @@ diffusion <- list( ################################################################# ## # Needed when using DHT -dht_species <- c("H" = 10, - "O" = 10, - "Charge" = 3, - "C(4)" = 5, - "Ca" = 5, - "Cl" = 5, - "Mg" = 5, - "Calcite" = 5, - "Dolomite" =5 - ) +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" = 5 +) + +check_sign_cal_dol_dht <- function(old, new) { + if ((old["Calcite"] == 0) != (new["Calcite"] == 0)) { + return(TRUE) + } + if ((old["Dolomite"] == 0) != (new["Dolomite"] == 0)) { + return(TRUE) + } + return(FALSE) +} + +fuzz_input_dht_keys <- function(input) { + return(input[names(dht_species)]) +} + +check_sign_cal_dol_interp <- function(to_interp, data_set) { + data_set <- as.data.frame(do.call(rbind, data_set), check.names = FALSE, optional = TRUE) + names(data_set) <- names(dht_species) + cal <- (data_set$Calcite == 0) == (to_interp["Calcite"] == 0) + dol <- (data_set$Dolomite == 0) == (to_interp["Dolomite"] == 0) + + cal_dol_same_sig <- cal == dol + return(rev(which(!cal_dol_same_sig))) +} + +check_neg_cal_dol <- function(result) { + neg_sign <- (result["Calcite"] <- 0) || (result["Dolomite"] < 0) + return(any(neg_sign)) +} + +hooks <- list( + dht_fill = check_sign_cal_dol_dht, + dht_fuzz = fuzz_input_dht_keys, + interp_pre_func = check_sign_cal_dol_interp, + interp_post_func = check_neg_cal_dol +) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species + dht_species = dht_species, + hooks = hooks ) ################################################################# @@ -148,5 +186,5 @@ setup <- list( iterations = iterations, timesteps = rep(dt, iterations), store_result = TRUE, - out_save = seq(5, iterations, by=5) + out_save = seq(5, iterations, by = 5) ) diff --git a/bench/dolo/dolo_interp_long.R b/bench/dolo/dolo_interp_long.R index dedfe1c21..80770f07a 100644 --- a/bench/dolo/dolo_interp_long.R +++ b/bench/dolo/dolo_interp_long.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-08-02 13:47:06 mluebke" +## Time-stamp: "Last modified 2023-08-16 14:57:25 mluebke" database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") @@ -62,25 +62,25 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( - list( - "H" = 1.110124E+02, - "O" = 5.550796E+01, - "Charge" = -3.230390327801E-08, - "C(4)" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ), - list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0.0, - "Cl" = 0.004, - "Mg" = 0.002 - ), - init_diffu + list( + "H" = 1.110124E+02, + "O" = 5.550796E+01, + "Charge" = -3.230390327801E-08, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ), + init_diffu ) vecinj_inner <- list( @@ -132,19 +132,56 @@ dht_species <- c( ## # Optional when using Interpolation (example with less key species and custom ## # significant digits) -#pht_species <- c( +# pht_species <- c( # "C(4)" = 3, # "Ca" = 3, # "Mg" = 2, # "Calcite" = 2, # "Dolomite" = 2 -#) +# ) + +check_sign_cal_dol_dht <- function(old, new) { + if ((old["Calcite"] == 0) != (new["Calcite"] == 0)) { + return(TRUE) + } + if ((old["Dolomite"] == 0) != (new["Dolomite"] == 0)) { + return(TRUE) + } + return(FALSE) +} + +fuzz_input_dht_keys <- function(input) { + return(input[names(dht_species)]) +} + +check_sign_cal_dol_interp <- function(to_interp, data_set) { + data_set <- as.data.frame(do.call(rbind, data_set), check.names = FALSE, optional = TRUE) + names(data_set) <- names(dht_species) + cal <- (data_set$Calcite == 0) == (to_interp["Calcite"] == 0) + dol <- (data_set$Dolomite == 0) == (to_interp["Dolomite"] == 0) + + cal_dol_same_sig <- cal == dol + return(rev(which(!cal_dol_same_sig))) +} + +check_neg_cal_dol <- function(result) { + neg_sign <- (result["Calcite"] <- 0) || (result["Dolomite"] < 0) + return(any(neg_sign)) +} + +hooks <- list( + dht_fill = check_sign_cal_dol_dht, + dht_fuzz = fuzz_input_dht_keys, + interp_pre_func = check_sign_cal_dol_interp, + interp_post_func = check_neg_cal_dol +) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species -# pht_species = pht_species + dht_species = dht_species, + hooks = hooks + # pht_species = pht_species ) ################################################################# diff --git a/include/poet/ChemistryModule.hpp b/include/poet/ChemistryModule.hpp index f5930184d..c0c7f9083 100644 --- a/include/poet/ChemistryModule.hpp +++ b/include/poet/ChemistryModule.hpp @@ -1,11 +1,10 @@ -// Time-stamp: "Last modified 2023-08-08 13:15:49 mluebke" +// Time-stamp: "Last modified 2023-08-15 14:36:28 mluebke" #ifndef CHEMISTRYMODULE_H_ #define CHEMISTRYMODULE_H_ #include "DHT_Wrapper.hpp" #include "DataStructures.hpp" -#include "Field.hpp" #include "Interpolation.hpp" #include "IrmResult.h" #include "PhreeqcRM.h" @@ -382,7 +381,7 @@ protected: uint32_t prop_count = 0; std::vector prop_names; - Field chem_field{0}; + Field chem_field; static constexpr int MODULE_COUNT = 5; diff --git a/include/poet/DHT_Wrapper.hpp b/include/poet/DHT_Wrapper.hpp index a066d3ccd..a74db4f2d 100644 --- a/include/poet/DHT_Wrapper.hpp +++ b/include/poet/DHT_Wrapper.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-08 18:01:12 mluebke" +// Time-stamp: "Last modified 2023-08-15 14:57:51 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -25,6 +25,8 @@ #include "DataStructures.hpp" #include "LookupKey.hpp" +#include "RInsidePOET.hpp" +#include "SimParams.hpp" #include "enums.hpp" #include "poet/HashFunctions.hpp" #include "poet/LookupKey.hpp" @@ -85,6 +87,8 @@ public: DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, const NamedVector &key_species, const std::vector &key_indices, + const std::vector &output_names, + const ChemistryParams::Chem_Hook_Functions &hooks, uint32_t data_count); /** * @brief Destroy the dht wrapper object @@ -213,7 +217,11 @@ public: double dt) { dht_results.keys.resize(input_values.size()); for (std::size_t i = 0; i < input_values.size(); i++) { - dht_results.keys[i] = fuzzForDHT(input_values[i], dt); + if (this->hooks.dht_fuzz.isValid()) { + dht_results.keys[i] = fuzzForDHT_R(input_values[i], dt); + } else { + dht_results.keys[i] = fuzzForDHT(input_values[i], dt); + } } } @@ -225,6 +233,7 @@ private: MPI_Comm communicator; LookupKey fuzzForDHT(const std::vector &cell, double dt); + LookupKey fuzzForDHT_R(const std::vector &cell, double dt); std::vector outputToInputAndRates(const std::vector &old_results, @@ -242,6 +251,10 @@ private: std::vector dht_prop_type_vector; std::vector input_key_elements; + const std::vector &output_names; + + const ChemistryParams::Chem_Hook_Functions &hooks; + DHT_ResultObject dht_results; std::array base_totals{0}; diff --git a/include/poet/DataStructures.hpp b/include/poet/DataStructures.hpp index c2ab99d77..834ceca50 100644 --- a/include/poet/DataStructures.hpp +++ b/include/poet/DataStructures.hpp @@ -3,12 +3,16 @@ #include "enums.hpp" +#include + +#include +#include #include -#include +#include +#include #include #include #include - namespace poet { struct WorkPackage { @@ -24,28 +28,193 @@ struct WorkPackage { } }; -template class NamedVector { +template class NamedVector : public Rcpp::NumericVector { public: - void insert(std::pair to_insert) { - this->names.push_back(to_insert.first); - this->values.push_back(to_insert.second); - container_size++; + NamedVector() : Rcpp::NumericVector(){}; + + NamedVector(const std::vector &in_names, + const std::vector &in_values) + : Rcpp::NumericVector(Rcpp::wrap(in_values)) { + this->names() = Rcpp::CharacterVector(Rcpp::wrap(in_names)); } - bool empty() const { return this->container_size == 0; } + NamedVector(const SEXP &s) : Rcpp::NumericVector(s){}; - std::size_t size() const { return this->container_size; } + bool empty() const { return (this->size() == 0); } - value_type &operator[](std::size_t i) { return values[i]; } + std::vector getNames() const { + return Rcpp::as>(this->names()); + } + std::vector getValues() const { return Rcpp::as>(*this); } +}; - const std::vector &getNames() const { return this->names; } - const std::vector &getValues() const { return this->values; } +using FieldColumn = std::vector; + +/** + * Stores data for input/output of a module. The class keeps track of all + * defined properties with name and import/export to 1D and 2D STL vectors. + * Also, it eases the update process of a field with an input from another + * field. + * + * It can be seen as an R-like data frame, but with less access to the members. + * Species values are stored as "columns", where column is a STL vector. + */ +class Field : private std::unordered_map { +public: + Field(){}; + + /** + * Creates a new instance of a field with fixed expected vector size. + * + * \param vec_s expected vector size of each component/column. + */ + Field(std::uint32_t vec_s) : req_vec_size(vec_s){}; + + /** + * Initializes instance with a 2D vector and according names for each columnn. + * There is no check if names were given multiple times. The order of name + * vector also defines the ordering of the output. + * + * \param vec_s expected vector size of each component/column. + * \param input 2D vector using STL semantic describing the current state of + * the field. + * \param prop_vec Name of each vector in the input. Shall match + * the count of vectors. + * + * \exception std::runtime_error If prop_vec size doesn't match input vector + * size or column vectors size doesn't match expected vector size. + */ + Field(std::uint32_t vec_s, const std::vector> &input, + const std::vector &prop_vec, bool row_major = false); + + /** + * Initializes instance with a 1D continious memory vector and according names + * for each columnn. There is no check if names were given multiple times. The + * order of name vector also defines the ordering of the output. + * + * \param vec_s expected vector size of each component/column. + * \param input 1D vector using STL semantic describing the current state of + * the field storing each column starting at index *i times requested vector + * size*. + * \param prop_vec Name of each vector in the input. Shall match the + * count of vectors. + * + * \exception std::runtime_error If prop_vec size doesn't match input vector + * size or column vectors size doesn't match expected vector size. + */ + Field(std::uint32_t vec_s, const std::vector &input, + const std::vector &prop_vec); + + Field(const SEXP &s_init) { fromSEXP(s_init); } + + Field &operator=(const Field &rhs) { + this->req_vec_size = rhs.req_vec_size; + this->props = rhs.props; + + this->clear(); + + for (const auto &pair : rhs) { + this->insert(pair); + } + return *this; + } + + /** + * Read in a (previously exported) 1D vector. It has to have the same + * dimensions as the current column count times the requested vector size of + * this instance. Import order is given by the species name vector. + * + * \param cont_field 1D field as vector. + * + * \exception std::runtime_error Input vector does not match the expected + * size. + */ + Field &operator=(const FieldColumn &cont_field); + + /** + * Read in a (previously exported) 2D vector. It has to have the same + * dimensions as the current column count of this instance and each vector + * must have the size of the requested vector size. Import order is given by + * the species name vector. + * + * \param cont_field 2D field as vector. + * + * \exception std::runtime_error Input vector has more or less elements than + * the instance or a column vector does not match expected vector size. + */ + Field &operator=(const std::vector &cont_field); + + Field &operator=(const SEXP &s_rhs) { + fromSEXP(s_rhs); + return *this; + } + + /** + * Returns a reference to the column vector with given name. Creates a new + * vector if prop was not found. The prop name will be added to the end of the + * list. + * + * \param key Name of the prop. + * + * \return Reference to the column vector. + */ + FieldColumn &operator[](const std::string &key); + + /** + * Returns the names of all species defined in the instance. + * + * \return Vector containing all species names in output order. + */ + auto GetProps() const -> std::vector { return this->props; } + + /** + * Return the requested vector size. + * + * \return Requested vector size set in the instanciation of the object. + */ + auto GetRequestedVecSize() const -> std::uint32_t { + return this->req_vec_size; + }; + + /** + * Updates all species with values from another field. If one element of the + * input field doesn't match the names of the calling instance it will get + * skipped. + * + * \param input Field to update the current instance's columns. + */ + void update(const Field &input); + + /** + * Builds a new 1D vector with the current state of the instance. The output + * order is given by the given species name vector set earlier and/or added + * values using the [] operator. + * + * \return 1D STL vector stored each column one after another. + */ + auto AsVector() const -> FieldColumn; + + /** + * Builds a new 2D vector with the current state of the instance. The output + * order is given by the given species name vector set earlier and/or added + * values using the [] operator. + * + * \return 2D STL vector stored each column one after anothe in a new vector + * element. + */ + auto As2DVector() const -> std::vector; + + SEXP asSEXP() const; + + std::size_t cols() const { return this->props.size(); } private: - std::size_t container_size{0}; + std::uint32_t req_vec_size{0}; - std::vector names{}; - std::vector values{}; + std::vector props{{}}; + + void fromSEXP(const SEXP &s_rhs); }; + } // namespace poet #endif // DATASTRUCTURES_H_ diff --git a/include/poet/DiffusionModule.hpp b/include/poet/DiffusionModule.hpp index e9b9c61f5..8ea379027 100644 --- a/include/poet/DiffusionModule.hpp +++ b/include/poet/DiffusionModule.hpp @@ -21,7 +21,7 @@ #ifndef DIFFUSION_MODULE_H #define DIFFUSION_MODULE_H -#include "Field.hpp" +#include "DataStructures.hpp" #include "SimParams.hpp" #include "poet/SimParams.hpp" #include @@ -94,7 +94,8 @@ private: enum { DIM_1D = 1, DIM_2D }; - void initialize(const poet::DiffusionParams &args); + void initialize(const poet::DiffusionParams &args, + std::uint32_t n_grid_cells); uint8_t dim; diff --git a/include/poet/Field.hpp b/include/poet/Field.hpp deleted file mode 100644 index cb4e46996..000000000 --- a/include/poet/Field.hpp +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef FIELD_H_ -#define FIELD_H_ - -#include -#include -#include -#include -#include -#include - -namespace poet { - -using FieldColumn = std::vector; - -/** - * Stores data for input/output of a module. The class keeps track of all - * defined properties with name and import/export to 1D and 2D STL vectors. - * Also, it eases the update process of a field with an input from another - * field. - * - * It can be seen as an R-like data frame, but with less access to the members. - * Species values are stored as "columns", where column is a STL vector. - */ -class Field : private std::unordered_map { -public: - /** - * Creates a new instance of a field with fixed expected vector size. - * - * \param vec_s expected vector size of each component/column. \ - */ - Field(uint32_t vec_s) : req_vec_size(vec_s){}; - - /** - * Initializes instance with a 2D vector and according names for each columnn. - * There is no check if names were given multiple times. The order of name - * vector also defines the ordering of the output. - * - * \param input 2D vector using STL semantic describing the current state of - * the field. - * \param prop_vec Name of each vector in the input. Shall match - * the count of vectors. - * - * \exception std::runtime_error If prop_vec size doesn't match input vector - * size or column vectors size doesn't match expected vector size. - */ - void InitFromVec(const std::vector> &input, - const std::vector &prop_vec); - - /** - * Initializes instance with a 1D continious memory vector and according names - * for each columnn. There is no check if names were given multiple times. The - * order of name vector also defines the ordering of the output. - * - * \param input 1D vector using STL semantic describing the current state of - * the field storing each column starting at index *i times requested vector - * size*. - * \param prop_vec Name of each vector in the input. Shall match the - * count of vectors. - * - * \exception std::runtime_error If prop_vec size doesn't match input vector - * size or column vectors size doesn't match expected vector size. - */ - void InitFromVec(const std::vector &input, - const std::vector &prop_vec); - - Field &operator=(const Field &rhs) { - this->req_vec_size = rhs.req_vec_size; - this->props = rhs.props; - return *this; - } - - /** - * Returns a reference to the column vector with given name. Creates a new - * vector if prop was not found. The prop name will be added to the end of the - * list. - * - * \param key Name of the prop. - * - * \return Reference to the column vector. - */ - FieldColumn &operator[](const std::string &key); - - /** - * Returns the names of all species defined in the instance. - * - * \return Vector containing all species names in output order. - */ - auto GetProps() const -> std::vector { return this->props; } - - /** - * Return the requested vector size. - * - * \return Requested vector size set in the instanciation of the object. - */ - auto GetRequestedVecSize() const -> std::uint32_t { - return this->req_vec_size; - }; - - /** - * Updates all species with values from another field. If one element of the - * input field doesn't match the names of the calling instance it will get - * skipped. - * - * \param input Field to update the current instance's columns. - */ - void UpdateFromField(const Field &input); - - /** - * Builds a new 1D vector with the current state of the instance. The output - * order is given by the given species name vector set earlier and/or added - * values using the [] operator. - * - * \return 1D STL vector stored each column one after another. - */ - auto AsVector() const -> FieldColumn; - - /** - * Builds a new 2D vector with the current state of the instance. The output - * order is given by the given species name vector set earlier and/or added - * values using the [] operator. - * - * \return 2D STL vector stored each column one after anothe in a new vector - * element. - */ - auto As2DVector() const -> std::vector; - - /** - * Read in a (previously exported) 1D vector. It has to have the same - * dimensions as the current column count times the requested vector size of - * this instance. Import order is given by the species name vector. - * - * \param cont_field 1D field as vector. - * - * \exception std::runtime_error Input vector does not match the expected - * size. - */ - void SetFromVector(const FieldColumn &cont_field); - - /** - * Read in a (previously exported) 2D vector. It has to have the same - * dimensions as the current column count of this instance and each vector - * must have the size of the requested vector size. Import order is given by - * the species name vector. - * - * \param cont_field 2D field as vector. - * - * \exception std::runtime_error Input vector has more or less elements than - * the instance or a column vector does not match expected vector size. - */ - void SetFromVector(const std::vector &cont_field); - -private: - std::uint32_t req_vec_size; - - std::vector props; -}; - -} // namespace poet -#endif // FIELD_H_ diff --git a/include/poet/Interpolation.hpp b/include/poet/Interpolation.hpp index 5f85872ca..71b9f9a01 100644 --- a/include/poet/Interpolation.hpp +++ b/include/poet/Interpolation.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-09 12:52:37 mluebke" +// Time-stamp: "Last modified 2023-08-16 16:49:31 mluebke" #ifndef INTERPOLATION_H_ #define INTERPOLATION_H_ @@ -9,6 +9,7 @@ #include "LookupKey.hpp" #include "poet/DHT_Wrapper.hpp" #include "poet/Rounding.hpp" +#include "poet/SimParams.hpp" #include #include #include @@ -46,7 +47,7 @@ public: std::vector> in_values; std::vector> out_values; - std::uint32_t getSize() const { + std::uint32_t getMemSize() const { std::uint32_t sum{0}; for (const auto &results : out_values) { sum += results.size() * sizeof(double); @@ -65,9 +66,9 @@ public: void writeLocationToPHT(LookupKey key, DHT_Location location); const PHT_Result &query(const LookupKey &key, - const std::uint32_t min_entries_needed, - const std::uint32_t input_count, - const std::uint32_t output_count); + std::uint32_t min_entries_needed, + std::uint32_t input_count, + std::uint32_t output_count); std::uint64_t getLocations(const LookupKey &key); @@ -163,7 +164,9 @@ public: std::uint64_t size_per_process, std::uint32_t min_entries_needed, DHT_Wrapper &dht, const NamedVector &interp_key_signifs, - const std::vector &dht_key_indices); + const std::vector &dht_key_indices, + const std::vector &out_names, + const ChemistryParams::Chem_Hook_Functions &hooks); enum result_status { RES_OK, INSUFFICIENT_DATA, NOT_NEEDED }; @@ -258,6 +261,10 @@ private: return out_key; } + + const ChemistryParams::Chem_Hook_Functions &hooks; + const std::vector &out_names; + const std::vector dht_names; }; } // namespace poet diff --git a/include/poet/LookupKey.hpp b/include/poet/LookupKey.hpp index c3366735f..fbb064c53 100644 --- a/include/poet/LookupKey.hpp +++ b/include/poet/LookupKey.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-07-26 10:20:10 mluebke" +// Time-stamp: "Last modified 2023-08-11 10:12:52 mluebke" #ifndef LOOKUPKEY_H_ #define LOOKUPKEY_H_ @@ -27,8 +27,6 @@ union Lookup_Keyelement { class LookupKey : public std::vector { public: - explicit LookupKey(size_type count); - using std::vector::vector; std::vector to_double() const; diff --git a/include/poet/RInsidePOET.hpp b/include/poet/RInsidePOET.hpp index fb2102685..6467e5760 100644 --- a/include/poet/RInsidePOET.hpp +++ b/include/poet/RInsidePOET.hpp @@ -1,11 +1,11 @@ #ifndef RPOET_H_ #define RPOET_H_ -#include "DataStructures.hpp" - #include #include #include +#include +#include #include #include #include @@ -28,34 +28,32 @@ public: this->parseEval("'" + R_name + "' %in% names(" + where + ")")); } - template inline T getSTL(const std::string &R_name) { - return Rcpp::as(RInside::operator[](R_name)); - } - - template - poet::NamedVector wrapNamedVector(const std::string &R_name) { - std::size_t name_size = this->parseEval("length(names(" + R_name + "))"); - if (name_size == 0) { - throw std::runtime_error("wrapNamedVector: " + R_name + - " is not a named vector!"); - } - - auto names = Rcpp::as>( - this->parseEval("names(" + R_name + ")")); - auto values = Rcpp::as(this->parseEval(R_name)); - - poet::NamedVector ret_map; - - for (std::size_t i = 0; i < names.size(); i++) { - ret_map.insert( - std::make_pair(names[i], static_cast(values[i]))); - } - - return ret_map; - } - private: RInsidePOET() : RInside(){}; }; +template class RHookFunction { +public: + RHookFunction() {} + RHookFunction(RInside &R, const std::string &f_name) { + try { + this->func = Rcpp::Function(Rcpp::as(R.parseEval(f_name.c_str()))); + } catch (const std::exception &e) { + } + } + + template T operator()(Args... args) const { + if (func.has_value()) { + return (Rcpp::as(this->func.value()(args...))); + } else { + throw std::exception(); + } + } + + bool isValid() const { return this->func.has_value(); } + +private: + std::optional func; +}; + #endif // RPOET_H_ diff --git a/include/poet/SimParams.hpp b/include/poet/SimParams.hpp index a73669865..9928bcdba 100644 --- a/include/poet/SimParams.hpp +++ b/include/poet/SimParams.hpp @@ -22,13 +22,14 @@ #define PARSER_H #include +#include #include #include #include #include -#include "Macros.hpp" #include "DataStructures.hpp" +#include "Macros.hpp" #include "RInsidePOET.hpp" #include "argh.hpp" // Argument handler https://github.com/adishavit/argh #include @@ -124,6 +125,13 @@ struct ChemistryParams { std::uint32_t interp_min_entries; NamedVector pht_signifs; + struct Chem_Hook_Functions { + RHookFunction dht_fill; + RHookFunction> dht_fuzz; + RHookFunction> interp_pre; + RHookFunction interp_post; + } hooks; + void initFromR(RInsidePOET &R); }; @@ -240,8 +248,8 @@ public: private: std::list validateOptions(argh::parser cmdl); - const std::set flaglist{ - "ignore-result", "dht", "P", "progress", "interp"}; + const std::set flaglist{"ignore-result", "dht", "P", "progress", + "interp"}; const std::set paramlist{ "work-package-size", "dht-strategy", "dht-size", "dht-snaps", diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index 6f194e8bc..3060e8617 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -291,7 +291,6 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { if (is_master) { this->n_cells = trans_field.GetRequestedVecSize(); - chem_field = Field(n_cells); std::vector> phreeqc_dump(this->nxyz); this->getDumpedField(phreeqc_dump); @@ -305,7 +304,7 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { const auto tmp_buffer{init_vec}; this->unshuffleField(tmp_buffer, n_cells, prop_count, 1, init_vec); - chem_field.InitFromVec(init_vec, prop_names); + this->chem_field = Field(n_cells, init_vec, prop_names); return; } @@ -325,7 +324,7 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { initial_values.push_back(std::move(init)); } - chem_field.InitFromVec(initial_values, prop_names); + this->chem_field = Field(n_cells, initial_values, prop_names); } else { ChemBCast(base_totals.data(), 2, MPI_DOUBLE); } @@ -366,11 +365,9 @@ void poet::ChemistryModule::initializeDHT( auto map_copy = key_species; if (key_species.empty()) { - const auto &all_species = this->prop_names; - for (std::size_t i = 0; i < all_species.size(); i++) { - map_copy.insert(std::make_pair(all_species[i], - DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT)); - } + std::vector default_signif( + this->prop_names.size(), DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT); + map_copy = NamedVector(this->prop_names, default_signif); } auto key_indices = parseDHTSpeciesVec(key_species, this->prop_names); @@ -381,8 +378,9 @@ void poet::ChemistryModule::initializeDHT( const std::uint64_t dht_size = size_mb * MB_FACTOR; - this->dht = new DHT_Wrapper(dht_comm, dht_size, map_copy, key_indices, - this->prop_count); + this->dht = + new DHT_Wrapper(dht_comm, dht_size, map_copy, key_indices, + this->prop_names, params.hooks, this->prop_count); this->dht->setBaseTotals(base_totals.at(0), base_totals.at(1)); } } @@ -488,8 +486,8 @@ void poet::ChemistryModule::initializeInterp( const uint64_t pht_size = size_mb * MB_FACTOR; interp = std::make_unique( - bucket_size, pht_size, min_entries, *(this->dht), map_copy, - key_indices); + bucket_size, pht_size, min_entries, *(this->dht), map_copy, key_indices, + this->prop_names, this->params.hooks); interp->setInterpolationFunction(inverseDistanceWeighting); } diff --git a/src/ChemistryModule/MasterFunctions.cpp b/src/ChemistryModule/MasterFunctions.cpp index 77bfa6023..51b656da3 100644 --- a/src/ChemistryModule/MasterFunctions.cpp +++ b/src/ChemistryModule/MasterFunctions.cpp @@ -342,7 +342,7 @@ void poet::ChemistryModule::MasterRunSequential() { std::vector out_vec{shuffled_field}; unshuffleField(shuffled_field, n_cells, prop_count, 1, out_vec); - chem_field.SetFromVector(out_vec); + chem_field = out_vec; } void poet::ChemistryModule::MasterRunParallel() { @@ -423,7 +423,7 @@ void poet::ChemistryModule::MasterRunParallel() { std::vector out_vec{mpi_buffer}; unshuffleField(mpi_buffer, this->n_cells, this->prop_count, wp_sizes_vector.size(), out_vec); - chem_field.SetFromVector(out_vec); + chem_field = out_vec; /* do master stuff */ diff --git a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp index bc3e8cfd5..d1ee112c2 100644 --- a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp +++ b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-09 14:05:01 mluebke" +// Time-stamp: "Last modified 2023-08-16 16:44:17 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -27,7 +27,10 @@ #include "poet/Rounding.hpp" #include "poet/enums.hpp" +#include "poet/RInsidePOET.hpp" + #include +#include #include #include #include @@ -43,10 +46,12 @@ namespace poet { DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, const NamedVector &key_species, const std::vector &key_indices, + const std::vector &_output_names, + const ChemistryParams::Chem_Hook_Functions &_hooks, uint32_t data_count) : key_count(key_indices.size()), data_count(data_count), input_key_elements(key_indices), communicator(dht_comm), - key_species(key_species) { + key_species(key_species), output_names(_output_names), hooks(_hooks) { // initialize DHT object // key size = count of key elements + timestep uint32_t key_size = (key_count + 1) * sizeof(Lookup_Keyelement); @@ -128,12 +133,13 @@ void DHT_Wrapper::fillDHT(const WorkPackage &work_package) { // check if calcite or dolomite is absent and present, resp.n and vice // versa in input/output. If this is the case -> Do not write to DHT! // HACK: hardcoded, should be fixed! - if ((work_package.input[i][7] == 0) != (work_package.output[i][7] == 0)) { - continue; - } + if (hooks.dht_fill.isValid()) { + NamedVector old_values(output_names, work_package.input[i]); + NamedVector new_values(output_names, work_package.output[i]); - if ((work_package.input[i][9] == 0) != (work_package.output[i][9] == 0)) { - continue; + if (hooks.dht_fill(old_values, new_values)) { + continue; + } } uint32_t proc, index; @@ -228,6 +234,38 @@ void DHT_Wrapper::printStatistics() { } } +LookupKey DHT_Wrapper::fuzzForDHT_R(const std::vector &cell, + double dt) { + const auto c_zero_val = std::pow(10, AQUEOUS_EXP); + + NamedVector input_nv(this->output_names, cell); + + const std::vector eval_vec = hooks.dht_fuzz(input_nv); + assert(eval_vec.size() == this->key_count); + LookupKey vecFuzz(this->key_count + 1, {.0}); + + DHT_Rounder rounder; + + int totals_i = 0; + // introduce fuzzing to allow more hits in DHT + // loop over every variable of grid cell + for (std::uint32_t i = 0; i < eval_vec.size(); i++) { + double curr_key = eval_vec[i]; + if (curr_key != 0) { + if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { + curr_key -= base_totals[totals_i++]; + } + vecFuzz[i] = + rounder.round(curr_key, dht_signif_vector[i], + this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); + } + } + // add timestep to the end of the key as double value + vecFuzz[this->key_count].fp_element = dt; + + return vecFuzz; +} + LookupKey DHT_Wrapper::fuzzForDHT(const std::vector &cell, double dt) { const auto c_zero_val = std::pow(10, AQUEOUS_EXP); diff --git a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp index 1de57554b..9384731b2 100644 --- a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp +++ b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp @@ -1,10 +1,13 @@ -// Time-stamp: "Last modified 2023-08-11 11:01:11 delucia" +// Time-stamp: "Last modified 2023-08-16 17:02:31 mluebke" #include "poet/DHT_Wrapper.hpp" +#include "poet/DataStructures.hpp" #include "poet/HashFunctions.hpp" #include "poet/Interpolation.hpp" #include "poet/LookupKey.hpp" #include "poet/Rounding.hpp" +#include +#include #include #include #include @@ -27,9 +30,13 @@ InterpolationModule::InterpolationModule( std::uint32_t entries_per_bucket, std::uint64_t size_per_process, std::uint32_t min_entries_needed, DHT_Wrapper &dht, const NamedVector &interp_key_signifs, - const std::vector &dht_key_indices) + const std::vector &dht_key_indices, + const std::vector &_out_names, + const ChemistryParams::Chem_Hook_Functions &_hooks) : dht_instance(dht), key_signifs(interp_key_signifs), - key_indices(dht_key_indices), min_entries_needed(min_entries_needed) { + key_indices(dht_key_indices), min_entries_needed(min_entries_needed), + dht_names(dht.getKeySpecies().getNames()), out_names(_out_names), + hooks(_hooks) { initPHT(this->key_signifs.size(), entries_per_bucket, size_per_process, dht.getCommunicator()); @@ -63,45 +70,41 @@ void InterpolationModule::tryInterpolation(WorkPackage &work_package) { const auto dht_results = this->dht_instance.getDHTResults(); - for (int i = 0; i < work_package.size; i++) { - if (work_package.mapping[i] != CHEM_PQC) { - interp_result.status[i] = NOT_NEEDED; + for (int wp_i = 0; wp_i < work_package.size; wp_i++) { + if (work_package.mapping[wp_i] != CHEM_PQC) { + interp_result.status[wp_i] = NOT_NEEDED; continue; } - const auto rounded_key = roundKey(dht_results.keys[i]); + const auto rounded_key = roundKey(dht_results.keys[wp_i]); auto pht_result = pht->query(rounded_key, this->min_entries_needed, dht_instance.getInputCount(), dht_instance.getOutputCount()); - int pht_i = 0; + if (pht_result.size < this->min_entries_needed) { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } + + if (hooks.interp_pre.isValid()) { + NamedVector nv_in(this->out_names, work_package.input[wp_i]); + + auto rm_indices = hooks.interp_pre(nv_in, pht_result.in_values); + + pht_result.size -= rm_indices.size(); - while (pht_i < pht_result.size) { if (pht_result.size < this->min_entries_needed) { - break; - } - - auto in_it = pht_result.in_values.begin() + pht_i; - auto out_it = pht_result.out_values.begin() + pht_i; - - bool same_sig_calcite = (pht_result.in_values[pht_i][7] == 0) == - (work_package.input[i][7] == 0); - bool same_sig_dolomite = (pht_result.in_values[pht_i][8] == 0) == - (work_package.input[i][9] == 0); - if (!same_sig_calcite || !same_sig_dolomite) { - pht_result.size -= 1; - pht_result.in_values.erase(in_it); - pht_result.out_values.erase(out_it); + interp_result.status[wp_i] = INSUFFICIENT_DATA; continue; } - pht_i += 1; - } - - if (pht_result.size < this->min_entries_needed) { - interp_result.status[i] = INSUFFICIENT_DATA; - continue; + for (const auto &index : rm_indices) { + pht_result.in_values.erase( + std::next(pht_result.in_values.begin(), index - 1)); + pht_result.out_values.erase( + std::next(pht_result.out_values.begin(), index - 1)); + } } #ifdef POET_PHT_ADD @@ -109,20 +112,17 @@ void InterpolationModule::tryInterpolation(WorkPackage &work_package) { #endif double start_fc = MPI_Wtime(); - // mean water - // double mean_water = 0; - // for (int out_i = 0; out_i < pht_result.size; out_i++) { - // mean_water += pht_result.out_values[out_i][0]; - // } - // mean_water /= pht_result.size; - work_package.output[i] = - f_interpolate(dht_instance.getKeyElements(), work_package.input[i], + work_package.output[wp_i] = + f_interpolate(dht_instance.getKeyElements(), work_package.input[wp_i], pht_result.in_values, pht_result.out_values); - if (work_package.output[i][7] < 0 || work_package.output[i][9] < 0) { - interp_result.status[i] = INSUFFICIENT_DATA; - continue; + if (hooks.interp_post.isValid()) { + NamedVector nv_result(this->out_names, work_package.output[wp_i]); + if (hooks.interp_post(nv_result)) { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } } // interp_result.results[i][0] = mean_water; @@ -130,8 +130,8 @@ void InterpolationModule::tryInterpolation(WorkPackage &work_package) { this->interpolations++; - work_package.mapping[i] = CHEM_INTERP; - interp_result.status[i] = RES_OK; + work_package.mapping[wp_i] = CHEM_INTERP; + interp_result.status[wp_i] = RES_OK; } } diff --git a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp index 285ed21d3..3ea44430d 100644 --- a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp +++ b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-09 13:32:11 mluebke" +// Time-stamp: "Last modified 2023-08-15 14:50:59 mluebke" #include "poet/DHT_Wrapper.hpp" #include "poet/HashFunctions.hpp" @@ -203,7 +203,7 @@ void ProximityHashTable::Cache::operator()(const LookupKey &key, if (this->free_mem < 0) { const LookupKey &to_del = this->lru_queue.back(); const auto elem_d = this->find(to_del); - this->free_mem += elem_d->second.getSize(); + this->free_mem += elem_d->second.getMemSize(); this->erase(to_del); this->keyfinder.erase(to_del); this->lru_queue.pop_back(); @@ -212,7 +212,7 @@ void ProximityHashTable::Cache::operator()(const LookupKey &key, this->insert({key, val}); this->lru_queue.emplace_front(key); this->keyfinder[key] = lru_queue.begin(); - this->free_mem -= val.getSize(); + this->free_mem -= val.getMemSize(); return; } diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/ChemistryModule/WorkerFunctions.cpp index e13641941..624a78b4f 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/ChemistryModule/WorkerFunctions.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-15 17:33:00 mluebke" +// Time-stamp: "Last modified 2023-08-16 14:50:04 mluebke" #include "poet/ChemistryModule.hpp" #include "poet/DHT_Wrapper.hpp" @@ -52,7 +52,7 @@ void poet::ChemistryModule::WorkerLoop() { break; } case CHEM_INIT_SPECIES: { - Field dummy{0}; + Field dummy; initializeField(dummy); break; } @@ -220,8 +220,8 @@ void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, if (this->dht_enabled) { dht_hits.push_back(dht->getHits()); dht_evictions.push_back(dht->getEvictions()); - dht->resetCounter(); - + dht->resetCounter(); + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); } @@ -238,8 +238,9 @@ void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, interp->dumpPHTState(out.str()); } } -} + RInsidePOET::getInstance().parseEvalQ("gc()"); +} void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { @@ -248,7 +249,7 @@ void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { std::stringstream out; out << this->dht_file_out_dir << "/iter_" << std::setfill('0') - << std::setw(this->file_pad) << iteration << ".pht"; + << std::setw(this->file_pad) << iteration << ".pht"; interp->dumpPHTState(out.str()); } } @@ -265,7 +266,7 @@ void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { std::cout << "CPP: Worker: Successfully written DHT to file " << out.str() << "\n"; } - + void poet::ChemistryModule::WorkerReadDHTDump( const std::string &dht_input_file) { int res = dht->fileToTable((char *)dht_input_file.c_str()); diff --git a/src/DataStructures/Field.cpp b/src/DataStructures/Field.cpp index 13b143c1e..de364a599 100644 --- a/src/DataStructures/Field.cpp +++ b/src/DataStructures/Field.cpp @@ -1,31 +1,53 @@ -#include "poet/Field.hpp" +#include "poet/DataStructures.hpp" +#include +#include +#include #include #include #include #include #include -void poet::Field::InitFromVec(const std::vector> &input, - const std::vector &prop_vec) { - if (prop_vec.size() != input.size()) { - throw std::runtime_error("Prop vector shall name all elements."); - } - - auto name_it = prop_vec.begin(); - - for (const auto &in_vec : input) { - if (in_vec.size() != req_vec_size) { - throw std::runtime_error( - "Input vector doesn't match expected vector size."); +poet::Field::Field(std::uint32_t vec_s, + const std::vector> &input, + const std::vector &prop_vec, bool row_major) + : req_vec_size(vec_s), props(prop_vec) { + if (row_major) { + if (this->props.size() != input[0].size()) { + throw std::runtime_error("Prop vector shall name all elements."); } - this->insert({*(name_it++), in_vec}); - } - this->props = prop_vec; + const std::size_t n_input = input.size(); + + for (std::size_t i = 0; i < this->props.size(); i++) { + std::vector curr_col(n_input); + + for (std::size_t j = 0; j < n_input; j++) { + curr_col[j] = input[j][i]; + } + + this->insert({this->props[i], std::move(curr_col)}); + } + } else { + if (this->props.size() != input.size()) { + throw std::runtime_error("Prop vector shall name all elements."); + } + + auto name_it = this->props.begin(); + + for (const auto &in_vec : input) { + if (in_vec.size() != req_vec_size) { + throw std::runtime_error( + "Input vector doesn't match expected vector size."); + } + this->insert({*(name_it++), in_vec}); + } + } } -void poet::Field::InitFromVec(const std::vector &input, - const std::vector &prop_vec) { +poet::Field::Field(std::uint32_t vec_s, const std::vector &input, + const std::vector &prop_vec) + : Field(vec_s) { const uint32_t expected_size = prop_vec.size() * req_vec_size; if (expected_size != input.size()) { @@ -62,47 +84,7 @@ auto poet::Field::AsVector() const -> poet::FieldColumn { return output; } -void poet::Field::SetFromVector(const poet::FieldColumn &cont_field) { - if (cont_field.size() != this->size() * req_vec_size) { - throw std::runtime_error( - "Field::SetFromVector: vector does not match expected size"); - } - - uint32_t vec_p = 0; - for (const auto &elem : props) { - const auto start = cont_field.begin() + vec_p; - const auto end = start + req_vec_size; - - const auto map_it = this->find(elem); - - map_it->second = FieldColumn(start, end); - - vec_p += req_vec_size; - } -} - -void poet::Field::SetFromVector( - const std::vector &cont_field) { - if (cont_field.size() != this->size()) { - throw std::runtime_error( - "Input field contains more or less elements than this container."); - } - - auto in_field_it = cont_field.begin(); - - for (const auto &elem : props) { - if (in_field_it->size() != req_vec_size) { - throw std::runtime_error( - "One vector contains more or less elements than expected."); - } - - const auto map_it = this->find(elem); - - map_it->second = *(in_field_it++); - } -} - -void poet::Field::UpdateFromField(const poet::Field &input) { +void poet::Field::update(const poet::Field &input) { for (const auto &input_elem : input) { auto it_self = this->find(input_elem.first); if (it_self == this->end()) { @@ -132,3 +114,118 @@ poet::FieldColumn &poet::Field::operator[](const std::string &key) { return std::unordered_map::operator[](key); } + +SEXP poet::Field::asSEXP() const { + const std::size_t cols = this->props.size(); + + SEXP s_names = PROTECT(Rf_allocVector(STRSXP, cols)); + SEXP s_output = PROTECT(Rf_allocVector(VECSXP, cols)); + + for (std::size_t prop_i = 0; prop_i < this->props.size(); prop_i++) { + const auto &name = this->props[prop_i]; + SEXP s_values = PROTECT(Rf_allocVector(REALSXP, this->req_vec_size)); + const auto values = this->find(name)->second; + + SET_STRING_ELT(s_names, prop_i, Rf_mkChar(name.c_str())); + + for (std::size_t i = 0; i < this->req_vec_size; i++) { + REAL(s_values)[i] = values[i]; + } + + SET_VECTOR_ELT(s_output, prop_i, s_values); + + UNPROTECT(1); + } + + SEXP s_rownames = PROTECT(Rf_allocVector(INTSXP, this->req_vec_size)); + for (std::size_t i = 0; i < this->req_vec_size; i++) { + INTEGER(s_rownames)[i] = static_cast(i + 1); + } + + Rf_setAttrib(s_output, R_ClassSymbol, + Rf_ScalarString(Rf_mkChar("data.frame"))); + Rf_setAttrib(s_output, R_NamesSymbol, s_names); + Rf_setAttrib(s_output, R_RowNamesSymbol, s_rownames); + + UNPROTECT(3); + + return s_output; +} + +poet::Field &poet::Field::operator=(const FieldColumn &cont_field) { + if (cont_field.size() != this->size() * req_vec_size) { + throw std::runtime_error( + "Field::SetFromVector: vector does not match expected size"); + } + + uint32_t vec_p = 0; + for (const auto &elem : props) { + const auto start = cont_field.begin() + vec_p; + const auto end = start + req_vec_size; + + const auto map_it = this->find(elem); + + map_it->second = FieldColumn(start, end); + + vec_p += req_vec_size; + } + + return *this; +} + +poet::Field & +poet::Field::operator=(const std::vector &cont_field) { + if (cont_field.size() != this->size()) { + throw std::runtime_error( + "Input field contains more or less elements than this container."); + } + + auto in_field_it = cont_field.begin(); + + for (const auto &elem : props) { + if (in_field_it->size() != req_vec_size) { + throw std::runtime_error( + "One vector contains more or less elements than expected."); + } + + const auto map_it = this->find(elem); + + map_it->second = *(in_field_it++); + } + return *this; +} + +void poet::Field::fromSEXP(const SEXP &s_rhs) { + this->clear(); + + SEXP s_vec = PROTECT(Rf_coerceVector(s_rhs, VECSXP)); + SEXP s_names = PROTECT(Rf_getAttrib(s_vec, R_NamesSymbol)); + + std::size_t cols = static_cast(Rf_length(s_vec)); + + this->props.clear(); + this->props.reserve(cols); + + for (std::size_t i = 0; i < cols; i++) { + const std::string prop_name(CHAR(STRING_ELT(s_names, i))); + this->props.push_back(prop_name); + + SEXP s_values = PROTECT(VECTOR_ELT(s_vec, i)); + + if (i == 0) { + this->req_vec_size = static_cast(Rf_length(s_values)); + } + + FieldColumn input(this->req_vec_size); + + for (std::size_t j = 0; j < this->req_vec_size; j++) { + input[j] = static_cast(REAL(s_values)[j]); + } + + UNPROTECT(1); + + this->insert({prop_name, input}); + } + + UNPROTECT(2); +} diff --git a/src/DiffusionModule.cpp b/src/DiffusionModule.cpp index 576785461..e02c42e03 100644 --- a/src/DiffusionModule.cpp +++ b/src/DiffusionModule.cpp @@ -18,15 +18,15 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include -#include -#include -#include #include #include #include #include #include +#include +#include +#include +#include #include #include @@ -58,17 +58,18 @@ inline const char *convert_bc_to_config_file(uint8_t in) { DiffusionModule::DiffusionModule(const poet::DiffusionParams &diffu_args, const poet::GridParams &grid_params) - : t_field{grid_params.total_n}, n_cells_per_prop(grid_params.total_n) { + : n_cells_per_prop(grid_params.total_n) { this->diff_input.setGridCellN(grid_params.n_cells[0], grid_params.n_cells[1]); this->diff_input.setDomainSize(grid_params.s_cells[0], grid_params.s_cells[1]); this->dim = grid_params.dim; - this->initialize(diffu_args); + this->initialize(diffu_args, grid_params.total_n); } -void DiffusionModule::initialize(const poet::DiffusionParams &args) { +void DiffusionModule::initialize(const poet::DiffusionParams &args, + std::uint32_t n_grid_cells) { // const poet::DiffusionParams args(this->R); // name of props @@ -87,7 +88,7 @@ void DiffusionModule::initialize(const poet::DiffusionParams &args) { this->alpha.push_back(args.alpha[this->prop_names[i]]); const double val = args.initial_t[prop_names[i]]; - std::vector init_val(t_field.GetRequestedVecSize(), val); + std::vector init_val(n_grid_cells, val); initial_values.push_back(std::move(init_val)); if (this->dim == this->DIM_2D) { @@ -100,7 +101,7 @@ void DiffusionModule::initialize(const poet::DiffusionParams &args) { } } - t_field.InitFromVec(initial_values, prop_names); + this->t_field = Field(n_grid_cells, initial_values, prop_names); // apply boundary conditions to each ghost node uint8_t bc_size = (this->dim == this->DIM_1D ? 2 : 4); @@ -156,30 +157,30 @@ void DiffusionModule::simulate(double dt) { MSG_NOENDL("DiffusionModule::simulate(): Starting diffusion ..."); std::cout << std::flush; - + std::vector> field_2d = t_field.As2DVector(); this->diff_input.setTimestep(dt); for (int i = 0; i < field_2d.size(); i++) { - std::vector in_alpha(this->n_cells_per_prop, this->alpha[i]); - this->diff_input.setBoundaryCondition(this->bc_vec[i]); - - if (this->dim == this->DIM_1D) { - tug::diffusion::BTCS_1D(this->diff_input, field_2d[i].data(), - in_alpha.data()); - } else { - tug::diffusion::ADI_2D(this->diff_input, field_2d[i].data(), - in_alpha.data()); - } + std::vector in_alpha(this->n_cells_per_prop, this->alpha[i]); + this->diff_input.setBoundaryCondition(this->bc_vec[i]); + + if (this->dim == this->DIM_1D) { + tug::diffusion::BTCS_1D(this->diff_input, field_2d[i].data(), + in_alpha.data()); + } else { + tug::diffusion::ADI_2D(this->diff_input, field_2d[i].data(), + in_alpha.data()); + } } - - t_field.SetFromVector(field_2d); - - std::cout << " done!" << std::endl; - + + t_field = field_2d; + + std::cout << " done!\n"; + sim_a_transport = MPI_Wtime(); - + transport_t += sim_a_transport - sim_b_transport; } diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 761de8f3f..fd77dde20 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -83,14 +83,22 @@ void poet::ChemistryParams::initFromR(RInsidePOET &R) { Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); if (R.checkIfExists("dht_species", "mysetup$chemistry")) { - this->dht_signifs = - R.wrapNamedVector("mysetup$chemistry$dht_species"); + this->dht_signifs = Rcpp::as>( + R.parseEval(("mysetup$chemistry$dht_species"))); } if (R.checkIfExists("pht_species", "mysetup$chemistry")) { - this->pht_signifs = - R.wrapNamedVector("mysetup$chemistry$pht_species"); + this->pht_signifs = Rcpp::as>( + R.parseEval(("mysetup$chemistry$pht_species"))); } + this->hooks.dht_fill = + RHookFunction(R, "mysetup$chemistry$hooks$dht_fill"); + this->hooks.dht_fuzz = + RHookFunction>(R, "mysetup$chemistry$hooks$dht_fuzz"); + this->hooks.interp_pre = RHookFunction>( + R, "mysetup$chemistry$hooks$interp_pre_func"); + this->hooks.interp_post = + RHookFunction(R, "mysetup$chemistry$hooks$interp_post_func"); } SimParams::SimParams(int world_rank_, int world_size_) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9d3a5ac43..32078d388 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,6 +5,10 @@ file(GLOB test_SRC add_executable(testPOET ${test_SRC}) target_link_libraries(testPOET doctest poet_lib) +get_filename_component(TEST_RInsideSourceFile "RInsidePOET_funcs.R" REALPATH) +configure_file(testDataStructures.hpp.in testDataStructures.hpp) +target_include_directories(testPOET PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") + add_custom_target(check COMMAND $ DEPENDS testPOET diff --git a/test/RInsidePOET_funcs.R b/test/RInsidePOET_funcs.R new file mode 100644 index 000000000..668b05169 --- /dev/null +++ b/test/RInsidePOET_funcs.R @@ -0,0 +1,22 @@ +simple_named_vec <- function(input) { + input["Ca"] <- 0 + + return(input) +} + +extended_named_vec <- function(input, additional) { + return(input["H"] + additional["H"]) +} + +bool_named_vec <- function(input) { + return(input["H"] == 0) +} + +simple_field <- function(field) { + field$Na <- 0 + return(field) +} + +extended_field <- function(field, additional) { + return(field + additional) +} diff --git a/test/testDataStructures.hpp.in b/test/testDataStructures.hpp.in new file mode 100644 index 000000000..c1d10271a --- /dev/null +++ b/test/testDataStructures.hpp.in @@ -0,0 +1,6 @@ +#ifndef TESTRINSIDEPOET_H_ +#define TESTRINSIDEPOET_H_ + +inline const char *RInside_source_file = "@TEST_RInsideSourceFile@"; + +#endif // TESTRINSIDEPOET_H_ diff --git a/test/testDataStructures.cpp b/test/testField.cpp similarity index 71% rename from test/testDataStructures.cpp rename to test/testField.cpp index c3046f275..8ee1bf5d6 100644 --- a/test/testDataStructures.cpp +++ b/test/testField.cpp @@ -1,12 +1,17 @@ +#include #include #include +#include #include #include #include -#include +#include +#include #include #include +#include "testDataStructures.hpp" + using namespace poet; #define CHECK_AND_FAIL_LOOP(val1, val2) \ @@ -19,14 +24,17 @@ TEST_CASE("Field") { constexpr uint32_t VEC_COUNT = 3; constexpr double INIT_VAL = 1; - Field dut(VEC_SIZE); - std::vector names = {"C", "Ca", "Na"}; std::vector init_values(names.size(), FieldColumn(VEC_SIZE, INIT_VAL)); + RInsidePOET &R = RInsidePOET::getInstance(); + + R["sourcefile"] = RInside_source_file; + R.parseEval("source(sourcefile)"); + SUBCASE("Initialize field with 2D vector") { - dut.InitFromVec(init_values, names); + Field dut(VEC_SIZE, init_values, names); auto props = dut.GetProps(); @@ -43,7 +51,7 @@ TEST_CASE("Field") { SUBCASE("Initialize field with 2D vector") { std::vector init_values(VEC_SIZE * VEC_COUNT, 1); - dut.InitFromVec(init_values, names); + Field dut(VEC_SIZE, init_values, names); auto props = dut.GetProps(); @@ -58,7 +66,7 @@ TEST_CASE("Field") { } } - dut.InitFromVec(init_values, names); + Field dut(VEC_SIZE, init_values, names); SUBCASE("Return vector") { std::vector output = dut.AsVector(); @@ -66,6 +74,37 @@ TEST_CASE("Field") { CHECK(output.size() == VEC_SIZE * VEC_COUNT); } + SUBCASE("SEXP return") { + R["test"] = dut.asSEXP(); + + Field field_constructed = R.parseEval("test"); + + Rcpp::DataFrame R_df = R.parseEval("test"); + Rcpp::CharacterVector R_names = R_df.names(); + + for (std::size_t i = 0; i < VEC_COUNT; i++) { + const auto r_values = Rcpp::as>(R_df[i]); + CHECK_EQ(r_values, dut[names[i]]); + CHECK_EQ(R_names[i], names[i]); + CHECK_EQ(field_constructed[names[i]], dut[names[i]]); + } + } + + SUBCASE("Apply R function (set Na to zero)") { + RHookFunction to_call(R, "simple_field"); + Field field_proc = to_call(dut.asSEXP()); + + CHECK_EQ(field_proc["Na"], FieldColumn(dut.GetRequestedVecSize(), 0)); + } + + SUBCASE("Apply R function (add two fields)") { + RHookFunction to_call(R, "extended_field"); + Field field_proc = to_call(dut.asSEXP(), dut.asSEXP()); + + CHECK_EQ(field_proc["Na"], + FieldColumn(dut.GetRequestedVecSize(), INIT_VAL + INIT_VAL)); + } + constexpr double NEW_VAL = 2.; std::vector new_val_vec(VEC_SIZE, NEW_VAL); @@ -102,7 +141,7 @@ TEST_CASE("Field") { } } - dut.SetFromVector(new_field); + dut = new_field; SUBCASE("SetFromVector return correct field vector") { auto ret_vec = dut.AsVector(); @@ -147,19 +186,18 @@ TEST_CASE("Field") { // reset field names = dut.GetProps(); - dut.SetFromVector( - std::vector(names.size(), FieldColumn(VEC_SIZE, INIT_VAL))); + + dut = std::vector(names.size(), FieldColumn(VEC_SIZE, INIT_VAL)); constexpr double SOME_OTHER_VAL = -0.5; - Field some_other_field(VEC_SIZE); std::vector some_other_props = {"Na", "Cl"}; std::vector some_other_values(VEC_SIZE * some_other_props.size(), SOME_OTHER_VAL); - some_other_field.InitFromVec(some_other_values, some_other_props); + Field some_other_field(VEC_SIZE, some_other_values, some_other_props); SUBCASE("Update existing field from another field") { - dut.UpdateFromField(some_other_field); + dut.update(some_other_field); auto ret_vec = dut.As2DVector(); auto ret_it = ret_vec.begin(); diff --git a/test/testNamedVector.cpp b/test/testNamedVector.cpp new file mode 100644 index 000000000..713f2729b --- /dev/null +++ b/test/testNamedVector.cpp @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "testDataStructures.hpp" + +TEST_CASE("NamedVector") { + RInsidePOET &R = RInsidePOET::getInstance(); + + R["sourcefile"] = RInside_source_file; + R.parseEval("source(sourcefile)"); + + const std::vector names{{"H", "O", "Ca"}}; + const std::vector values{{0.1, 0.2, 0.3}}; + + poet::NamedVector nv(names, values); + + SUBCASE("SEXP conversion") { + R["test"] = nv; + + const Rcpp::NumericVector R_value = R.parseEval("test"); + const Rcpp::CharacterVector R_names = R_value.names(); + + const poet::NamedVector nv_constructed = R["test"]; + + for (std::size_t i = 0; i < R_value.size(); i++) { + CHECK_EQ(R_value[i], values[i]); + CHECK_EQ(R_names[i], names[i]); + CHECK_EQ(nv_constructed[i], values[i]); + CHECK_EQ(nv_constructed.getNames()[i], names[i]); + } + } + + SUBCASE("Apply R function (set to zero)") { + RHookFunction> to_call(R, "simple_named_vec"); + nv = to_call(nv); + + CHECK_EQ(nv[2], 0); + } + + SUBCASE("Apply R function (second NamedVector)") { + RHookFunction> to_call(R, "extended_named_vec"); + + const std::vector names{{"C", "H", "Mg"}}; + const std::vector values{{0, 1, 2}}; + + poet::NamedVector second_nv(names, values); + + nv = to_call(nv, second_nv); + + CHECK_EQ(nv[0], 1.1); + } + + SUBCASE("Apply R function (check if zero)") { + RHookFunction to_call(R, "bool_named_vec"); + + CHECK_FALSE(to_call(nv)); + } +} From b961c26a8551bca1522e4cdc7d4560f699835b25 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Thu, 17 Aug 2023 14:29:09 +0200 Subject: [PATCH 21/52] testing staged changes --- R_lib/kin_r_library.R | 390 +++++++++++++++++++++--------------------- src/SimParams.cpp | 8 +- 2 files changed, 203 insertions(+), 195 deletions(-) diff --git a/R_lib/kin_r_library.R b/R_lib/kin_r_library.R index c759bf9e1..273fe6c4f 100644 --- a/R_lib/kin_r_library.R +++ b/R_lib/kin_r_library.R @@ -1,4 +1,6 @@ -### Copyright (C) 2018-2022 Marco De Lucia, Max Luebke (GFZ Potsdam) +## Time-stamp: "Last modified 2023-08-15 11:58:23 delucia" + +### Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam) ### ### POET is free software; you can redistribute it and/or modify it under the ### terms of the GNU General Public License as published by the Free Software @@ -13,51 +15,52 @@ ### this program; if not, write to the Free Software Foundation, Inc., 51 ### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + ## Simple function to check file extension. It is needed to check if ## the GridFile is SUM (MUFITS format) or rds/RData FileExt <- function(x) { - pos <- regexpr("\\.([[:alnum:]]+)$", x) - ifelse(pos > -1L, substring(x, pos + 1L), "") + pos <- regexpr("\\.([[:alnum:]]+)$", x) + ifelse(pos > -1L, substring(x, pos + 1L), "") } master_init <- function(setup) { - msgm("Process with rank 0 reading GRID properties") + msgm("Process with rank 0 reading GRID properties") - ## Setup the directory where we will store the results - verb <- FALSE - if (local_rank == 0) { - verb <- TRUE ## verbosity loading MUFITS results - if (!dir.exists(fileout)) { - dir.create(fileout) - msgm("created directory ", fileout) + ## Setup the directory where we will store the results + verb <- FALSE + if (local_rank == 0) { + verb <- TRUE ## verbosity loading MUFITS results + if (!dir.exists(fileout)) { + dir.create(fileout) + msgm("created directory ", fileout) + } else { + msgm("dir ", fileout, " already exists, I will overwrite!") + } + if (!exists("store_result")) { + msgm("store_result doesn't exist!") + } else { + msgm("store_result is ", store_result) + } } else { - msgm("dir ", fileout, " already exists, I will overwrite!") + } - if (!exists("store_result")) { - msgm("store_result doesn't exist!") - } else { - msgm("store_result is ", store_result) + + setup$iter <- 1 + setup$maxiter <- setup$iterations + setup$timesteps <- setup$timesteps + setup$simulation_time <- 0 + + if (is.null(setup[["store_result"]])) { + setup$store_result <- TRUE } - } else { - - } - - setup$iter <- 1 - setup$maxiter <- setup$iterations - setup$timesteps <- setup$timesteps - setup$simulation_time <- 0 - - if (is.null(setup[["store_result"]])) { - setup$store_result <- TRUE - } - - if (setup$store_result) { - if (is.null(setup[["out_save"]])) { - setup$out_save <- seq(1, setup$iterations) + + if (setup$store_result) { + if (is.null(setup[["out_save"]])) { + setup$out_save <- seq(1, setup$iterations) + } } - } - - return(setup) + + return(setup) } ## This function, called only by master, stores on disk the last @@ -95,125 +98,125 @@ master_iteration_end <- function(setup) { ## function for the workers to compute chemistry through PHREEQC slave_chemistry <- function(setup, data) { - base <- setup$base - first <- setup$first - prop <- setup$prop - immobile <- setup$immobile - kin <- setup$kin - ann <- setup$ann - - iter <- setup$iter - timesteps <- setup$timesteps - dt <- timesteps[iter] - - state_T <- data ## not the global field, but the work-package - - ## treat special H+/pH, e-/pe cases - state_T <- RedModRphree::Act2pH(state_T) - - ## reduction of the problem - if (setup$reduce) { - reduced <- ReduceStateOmit(state_T, omit = setup$ann) - } else { - reduced <- state_T - } - - ## form the PHREEQC input script for the current work package - inplist <- SplitMultiKin( - data = reduced, procs = 1, base = base, first = first, - ann = ann, prop = prop, minerals = immobile, kin = kin, dt = dt - ) - - ## if (local_rank==1 & iter==1) - ## RPhreeWriteInp("FirstInp", inplist) - - tmpC <- RunPQC(inplist, procs = 1, second = TRUE) - - ## recompose after the reduction - if (setup$reduce) { - state_C <- RecomposeState(tmpC, reduced) - } else { - state_C <- tmpC - } - - ## the next line is needed since we don't need all columns of - ## PHREEQC output - return(state_C[, prop]) + base <- setup$base + first <- setup$first + prop <- setup$prop + immobile <- setup$immobile + kin <- setup$kin + ann <- setup$ann + + iter <- setup$iter + timesteps <- setup$timesteps + dt <- timesteps[iter] + + state_T <- data ## not the global field, but the work-package + + ## treat special H+/pH, e-/pe cases + state_T <- RedModRphree::Act2pH(state_T) + + ## reduction of the problem + if (setup$reduce) { + reduced <- ReduceStateOmit(state_T, omit = setup$ann) + } else { + reduced <- state_T + } + + ## form the PHREEQC input script for the current work package + inplist <- SplitMultiKin( + data = reduced, procs = 1, base = base, first = first, + ann = ann, prop = prop, minerals = immobile, kin = kin, dt = dt + ) + + ## if (local_rank==1 & iter==1) + ## RPhreeWriteInp("FirstInp", inplist) + + tmpC <- RunPQC(inplist, procs = 1, second = TRUE) + + ## recompose after the reduction + if (setup$reduce) { + state_C <- RecomposeState(tmpC, reduced) + } else { + state_C <- tmpC + } + + ## the next line is needed since we don't need all columns of + ## PHREEQC output + return(state_C[, prop]) } ## This function, called by master master_chemistry <- function(setup, data) { - state_T <- setup$state_T - - msgm(" chemistry iteration", setup$iter) - - ## treat special H+/pH, e-/pe cases - state_T <- RedModRphree::Act2pH(state_T) - - ## reduction of the problem - if (setup$reduce) { - reduced <- ReduceStateOmit(state_T, omit = setup$ann) - } else { - reduced <- state_T - } - - ### inject data from workers - res_C <- data - - rownames(res_C) <- NULL - - ## print(res_C) - - if (nrow(res_C) > nrow(reduced)) { - res_C <- res_C[seq(2, nrow(res_C), by = 2), ] - } - - ## recompose after the reduction - if (setup$reduce) { - state_C <- RecomposeState(res_C, reduced) - } else { - state_C <- res_C - } - - setup$state_C <- state_C - setup$reduced <- reduced - - return(setup) + state_T <- setup$state_T + + msgm(" chemistry iteration", setup$iter) + + ## treat special H+/pH, e-/pe cases + state_T <- RedModRphree::Act2pH(state_T) + + ## reduction of the problem + if (setup$reduce) { + reduced <- ReduceStateOmit(state_T, omit = setup$ann) + } else { + reduced <- state_T + } + + ## inject data from workers + res_C <- data + + rownames(res_C) <- NULL + + ## print(res_C) + + if (nrow(res_C) > nrow(reduced)) { + res_C <- res_C[seq(2, nrow(res_C), by = 2), ] + } + + ## recompose after the reduction + if (setup$reduce) { + state_C <- RecomposeState(res_C, reduced) + } else { + state_C <- res_C + } + + setup$state_C <- state_C + setup$reduced <- reduced + + return(setup) } ## Adapted version for "reduction" ReduceStateOmit <- function(data, omit = NULL, sign = 6) { - require(mgcv) + require(mgcv) + + rem <- colnames(data) + if (is.list(omit)) { + indomi <- match(names(omit), colnames(data)) + datao <- data[, -indomi] + } else { + datao <- data + } + + datao <- signif(datao, sign) + red <- mgcv::uniquecombs(datao) + inds <- attr(red, "index") + now <- ncol(red) + + + ## reattach the omitted column(s) + ## FIXME: control if more than one ann is present + if (is.list(omit)) { + red <- cbind(red, rep(data[1, indomi], nrow(red))) - rem <- colnames(data) - if (is.list(omit)) { - indomi <- match(names(omit), colnames(data)) - datao <- data[, -indomi] - } else { - datao <- data - } + colnames(red)[now + 1] <- names(omit) - datao <- signif(datao, sign) - red <- mgcv::uniquecombs(datao) - inds <- attr(red, "index") - now <- ncol(red) - - - ## reattach the omitted column(s) - ## FIXME: control if more than one ann is present - if (is.list(omit)) { - red <- cbind(red, rep(data[1, indomi], nrow(red))) - - colnames(red)[now + 1] <- names(omit) - - ret <- red[, colnames(data)] - } else { - ret <- red - } - rownames(ret) <- NULL - attr(ret, "index") <- inds - return(ret) + ret <- red[, colnames(data)] + } else { + ret <- red + } + rownames(ret) <- NULL + attr(ret, "index") <- inds + return(ret) } @@ -221,67 +224,70 @@ ReduceStateOmit <- function(data, omit = NULL, sign = 6) { ## Attach the name of the calling function to the message displayed on ## R's stdout msgm <- function(...) { - if (local_rank == 0) { - fname <- as.list(sys.call(-1))[[1]] - prefix <- paste0("R: ", fname, " ::") - cat(paste(prefix, ..., "\n")) - } - invisible() + if (local_rank == 0) { + fname <- as.list(sys.call(-1))[[1]] + prefix <- paste0("R: ", fname, " ::") + cat(paste(prefix, ..., "\n")) + } + invisible() } ## Function called by master R process to store on disk all relevant ## parameters for the simulation StoreSetup <- function(setup) { - to_store <- vector(mode = "list", length = 3) - # names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT") - names(to_store) <- c("Sim", "Transport", "DHT") - ## read the setup R file, which is sourced in kin.cpp - tmpbuff <- file(filesim, "r") - setupfile <- readLines(tmpbuff) - close.connection(tmpbuff) + to_store <- vector(mode = "list", length = 4) + ## names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT") + names(to_store) <- c("Sim", "Transport", "DHT", "Cmdline") + + ## read the setup R file, which is sourced in kin.cpp + tmpbuff <- file(filesim, "r") + setupfile <- readLines(tmpbuff) + close.connection(tmpbuff) + + to_store$Sim <- setupfile + + ## to_store$Flow <- list( + ## snapshots = setup$snapshots, + ## gridfile = setup$gridfile, + ## phase = setup$phase, + ## density = setup$density, + ## dt_differ = setup$dt_differ, + ## prolong = setup$prolong, + ## maxiter = setup$maxiter, + ## saved_iter = setup$iter_output, + ## out_save = setup$out_save ) - to_store$Sim <- setupfile + to_store$Transport <- setup$diffusion - # to_store$Flow <- list( - # snapshots = setup$snapshots, - # gridfile = setup$gridfile, - # phase = setup$phase, - # density = setup$density, - # dt_differ = setup$dt_differ, - # prolong = setup$prolong, - # maxiter = setup$maxiter, - # saved_iter = setup$iter_output, - # out_save = setup$out_save ) + ## to_store$Chemistry <- list( + ## nprocs = n_procs, + ## wp_size = work_package_size, + ## base = setup$base, + ## first = setup$first, + ## init = setup$initsim, + ## db = db, + ## kin = setup$kin, + ## ann = setup$ann) - to_store$Transport <- setup$diffusion - # to_store$Chemistry <- list( - # nprocs = n_procs, - # wp_size = work_package_size, - # base = setup$base, - # first = setup$first, - # init = setup$initsim, - # db = db, - # kin = setup$kin, - # ann = setup$ann) + if (dht_enabled) { + to_store$DHT <- list( + enabled = dht_enabled, + log = dht_log + ## signif = dht_final_signif, + ## proptype = dht_final_proptype + ) + } else { + to_store$DHT <- FALSE + } - if (dht_enabled) { - to_store$DHT <- list( - enabled = dht_enabled, - log = dht_log - #signif = dht_final_signif, - #proptype = dht_final_proptype - ) - } else { - to_store$DHT <- FALSE - } - - saveRDS(to_store, file = paste0(fileout, "/setup.rds")) - msgm("initialization stored in ", paste0(fileout, "/setup.rds")) + to_store$Cmdline <- commandArgs(trailingOnly=FALSE) + saveRDS(to_store, file = paste0(fileout, "/setup.rds")) + msgm("initialization stored in ", paste0(fileout, "/setup.rds")) } GetWorkPackageSizesVector <- function(n_packages, package_size, len) { - ids <- rep(1:n_packages, times=package_size, each = 1)[1:len] - return(as.integer(table(ids))) + ids <- rep(1:n_packages, times=package_size, each = 1)[1:len] + return(as.integer(table(ids))) } diff --git a/src/SimParams.cpp b/src/SimParams.cpp index fd77dde20..0a81c5b73 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -1,8 +1,10 @@ +// Time-stamp: "Last modified 2023-08-15 12:12:36 delucia" + /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of ** Potsdam) ** -** Copyright (C) 2018-2022 Marco De Lucia, Max Luebke (GFZ Potsdam) +** Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam) ** ** POET is free software; you can redistribute it and/or modify it under the ** terms of the GNU General Public License as published by the Free Software @@ -144,9 +146,9 @@ int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { simparams.print_progressbar = cmdl[{"P", "progress"}]; - simparams.print_progressbar = cmdl[{"P", "progress"}]; + // simparams.print_progressbar = cmdl[{"P", "progress"}]; - /*Parse DHT arguments*/ + /* Parse DHT arguments */ chem_params.use_dht = cmdl["dht"]; chem_params.use_interp = cmdl["interp"]; // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; From 7269b6fc81ebdd61f89bff4ff8be612ad138ee19 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 12:26:52 +0200 Subject: [PATCH 22/52] doc: added bench/barite/README.org --- bench/barite/README.org | 47 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 bench/barite/README.org diff --git a/bench/barite/README.org b/bench/barite/README.org new file mode 100644 index 000000000..a9e8f94e7 --- /dev/null +++ b/bench/barite/README.org @@ -0,0 +1,47 @@ +#+TITLE: Description of \texttt{barite} benchmark +#+AUTHOR: MDL +#+DATE: 2023-08-26 +#+STARTUP: inlineimages +#+LATEX_CLASS_OPTIONS: [a4paper,9pt] +#+LATEX_HEADER: \usepackage{fullpage} +#+LATEX_HEADER: \usepackage{amsmath, systeme} +#+LATEX_HEADER: \usepackage{graphicx} +#+LATEX_HEADER: \usepackage{charter} +#+OPTIONS: toc:nil + +* Quick start + +#+begin_src sh :language sh :frame single +mpirun -np 4 ./poet barite.R barite_results +#+end_src + + +* Chemical system + +The benchmark accounts for reaction kinetics for celestite dissolution +and barite precipitation. The system is initially at equilibrium with +celestite; following diffusion of $BaCl_2$ celestite dissolution +occurs Dissolution of celestite and the successive release of +$SO_4^{2-}$ into solution causes barite to precipitate: + + +$ \mathrm{Ba}^{2+}_{\mathrm{(aq)}} + \mathrm{SrSO}_{4, \mathrm{(s)}} \rightarrow \mathrm{BaSO}_{4,\mathrm{(s)}} + \mathrm{Sr}^{2+}_{\mathrm{(s)}} $ + +Reaction rates are calculated using a general kinetics rate law for +both dissolution and precipitation based on transition state +theory: + +$ \frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} (1-\mathrm{SR}_{m})$ + + +where $\mathrm{d}m\,(\mathrm{mol/s})$ is the rate of a mineral phase +$m$, $\mathrm{SA}\,\mathrm{(m^2)}$ is the reactive surface area, +$k_{\mathrm{r}}\,\mathrm{(mol/m^2/s)}$ is the rate constant, and +$\mathrm{SR}\, {(\text{--})}$ is the saturation ratio, i.e., the ratio +of the ion activity product of the reacting species and the solubility +constant. + + +* List of Files + + From a5e68fbf34054406d8581368a6470076be185324 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 12:29:31 +0200 Subject: [PATCH 23/52] doc: testing src_latex in README.org --- bench/barite/README.org | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index a9e8f94e7..bce368b0f 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -24,8 +24,9 @@ celestite; following diffusion of $BaCl_2$ celestite dissolution occurs Dissolution of celestite and the successive release of $SO_4^{2-}$ into solution causes barite to precipitate: - +#+begin_src tex $ \mathrm{Ba}^{2+}_{\mathrm{(aq)}} + \mathrm{SrSO}_{4, \mathrm{(s)}} \rightarrow \mathrm{BaSO}_{4,\mathrm{(s)}} + \mathrm{Sr}^{2+}_{\mathrm{(s)}} $ +#+end_src Reaction rates are calculated using a general kinetics rate law for both dissolution and precipitation based on transition state From 79ef735be72bfa3461efbef9c1b2b80af0d177f6 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 12:32:47 +0200 Subject: [PATCH 24/52] Update file README.org --- bench/barite/README.org | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index bce368b0f..180c3c4de 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -24,15 +24,13 @@ celestite; following diffusion of $BaCl_2$ celestite dissolution occurs Dissolution of celestite and the successive release of $SO_4^{2-}$ into solution causes barite to precipitate: -#+begin_src tex -$ \mathrm{Ba}^{2+}_{\mathrm{(aq)}} + \mathrm{SrSO}_{4, \mathrm{(s)}} \rightarrow \mathrm{BaSO}_{4,\mathrm{(s)}} + \mathrm{Sr}^{2+}_{\mathrm{(s)}} $ -#+end_src +```\mathrm{Ba}^{2+}_{\mathrm{(aq)}} + \mathrm{SrSO}_{4, \mathrm{(s)}} \rightarrow \mathrm{BaSO}_{4,\mathrm{(s)}} + \mathrm{Sr}^{2+}_{\mathrm{(s)}}``` Reaction rates are calculated using a general kinetics rate law for both dissolution and precipitation based on transition state theory: -$ \frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} (1-\mathrm{SR}_{m})$ +```\frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} (1-\mathrm{SR}_{m})``` where $\mathrm{d}m\,(\mathrm{mol/s})$ is the rate of a mineral phase From 3c530a68d0d68ed39dd0616189f31ee5f07da9a0 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 12:49:31 +0200 Subject: [PATCH 25/52] Revert "Update file README.org" This reverts commit 79ef735be72bfa3461efbef9c1b2b80af0d177f6 --- bench/barite/README.org | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index 180c3c4de..bce368b0f 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -24,13 +24,15 @@ celestite; following diffusion of $BaCl_2$ celestite dissolution occurs Dissolution of celestite and the successive release of $SO_4^{2-}$ into solution causes barite to precipitate: -```\mathrm{Ba}^{2+}_{\mathrm{(aq)}} + \mathrm{SrSO}_{4, \mathrm{(s)}} \rightarrow \mathrm{BaSO}_{4,\mathrm{(s)}} + \mathrm{Sr}^{2+}_{\mathrm{(s)}}``` +#+begin_src tex +$ \mathrm{Ba}^{2+}_{\mathrm{(aq)}} + \mathrm{SrSO}_{4, \mathrm{(s)}} \rightarrow \mathrm{BaSO}_{4,\mathrm{(s)}} + \mathrm{Sr}^{2+}_{\mathrm{(s)}} $ +#+end_src Reaction rates are calculated using a general kinetics rate law for both dissolution and precipitation based on transition state theory: -```\frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} (1-\mathrm{SR}_{m})``` +$ \frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} (1-\mathrm{SR}_{m})$ where $\mathrm{d}m\,(\mathrm{mol/s})$ is the rate of a mineral phase From 44c6e45fefac13d3ad993ad000ba6a5931aa0300 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 12:46:27 +0200 Subject: [PATCH 26/52] some fixes --- bench/barite/README.org | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index bce368b0f..f417d1b32 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -18,21 +18,21 @@ mpirun -np 4 ./poet barite.R barite_results * Chemical system -The benchmark accounts for reaction kinetics for celestite dissolution -and barite precipitation. The system is initially at equilibrium with -celestite; following diffusion of $BaCl_2$ celestite dissolution -occurs Dissolution of celestite and the successive release of -$SO_4^{2-}$ into solution causes barite to precipitate: +The benchmark depicts a porous system where pure water is initially at +equilibrium with the *celestite* (strontium sulfate; brute formula: +SrSO_4). A solution containing only dissolved Ba^{2+} and Cl^- +diffuses into the system causing celestite dissolution. The resulting +increased concentration of dissolved sulfate SO_4^{2-} induces +precipitation of *barite* (barium sulfate; brute formula: +BaSO_4^{2-}). The overall reaction can be written: -#+begin_src tex -$ \mathrm{Ba}^{2+}_{\mathrm{(aq)}} + \mathrm{SrSO}_{4, \mathrm{(s)}} \rightarrow \mathrm{BaSO}_{4,\mathrm{(s)}} + \mathrm{Sr}^{2+}_{\mathrm{(s)}} $ -#+end_src +Ba^{2+} + SrSO_4 \rightarrow BaSO_4 + Sr^{2+} -Reaction rates are calculated using a general kinetics rate law for -both dissolution and precipitation based on transition state -theory: +Both celestite dissolution and barite precipitation are calculated +using a general kinetics rate law based on transition state theory: -$ \frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} (1-\mathrm{SR}_{m})$ +\frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} +(1-\mathrm{SR}_{m}) where $\mathrm{d}m\,(\mathrm{mol/s})$ is the rate of a mineral phase From a8d23a9563b0cf08c967c92cbfc647e9e3de9ae9 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 13:10:17 +0200 Subject: [PATCH 27/52] eqs and text --- bench/barite/README.org | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index f417d1b32..caf4635e4 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -1,4 +1,4 @@ -#+TITLE: Description of \texttt{barite} benchmark +#+TITLE: Description of =barite= benchmark #+AUTHOR: MDL #+DATE: 2023-08-26 #+STARTUP: inlineimages @@ -19,28 +19,27 @@ mpirun -np 4 ./poet barite.R barite_results * Chemical system The benchmark depicts a porous system where pure water is initially at -equilibrium with the *celestite* (strontium sulfate; brute formula: -SrSO_4). A solution containing only dissolved Ba^{2+} and Cl^- -diffuses into the system causing celestite dissolution. The resulting -increased concentration of dissolved sulfate SO_4^{2-} induces -precipitation of *barite* (barium sulfate; brute formula: -BaSO_4^{2-}). The overall reaction can be written: +equilibrium with *celestite* (strontium sulfate; brute formula: +SrSO_4). -Ba^{2+} + SrSO_4 \rightarrow BaSO_4 + Sr^{2+} +A solution containing only dissolved Ba^{2+} and Cl^- diffuses into +the system causing celestite dissolution. The increased concentration +of dissolved sulfate SO_{4}^{2-} induces precipitation of *barite* +(barium sulfate; brute formula: BaSO_{4}^{2-}). The overall reaction +can be written: + +Ba^{2+} + celestite \rightarrow barite + Sr^{2+} Both celestite dissolution and barite precipitation are calculated -using a general kinetics rate law based on transition state theory: +using a kinetics rate law based on transition state theory: -\frac{\mathrm{d}m_{m}}{\mathrm{d}t} = -\mathrm{SA}_m k_{\mathrm{r},m} -(1-\mathrm{SR}_{m}) +rate = -S_{m} K (1-SR_{m}) +where the reaction rate has units mol/s, S_m (m^2) is the reactive +surface area, K (mol/m^2/s) is the rate constant, and SR is the +saturation ratio, i.e., the ratio of the ion activity product of the +reacting species and the solubility constant. -where $\mathrm{d}m\,(\mathrm{mol/s})$ is the rate of a mineral phase -$m$, $\mathrm{SA}\,\mathrm{(m^2)}$ is the reactive surface area, -$k_{\mathrm{r}}\,\mathrm{(mol/m^2/s)}$ is the rate constant, and -$\mathrm{SR}\, {(\text{--})}$ is the saturation ratio, i.e., the ratio -of the ion activity product of the reacting species and the solubility -constant. * List of Files From dbb25dc0afe788b076ab2582aa7c15ea346e97f0 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 13:41:00 +0200 Subject: [PATCH 28/52] more eqs and text --- bench/barite/README.org | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index caf4635e4..b732ad9b0 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -13,8 +13,17 @@ #+begin_src sh :language sh :frame single mpirun -np 4 ./poet barite.R barite_results +mpirun -np 4 ./poet --interp barite_interp_eval.R barite_results #+end_src +* List of Files + +- =barite.R=: POET input script for a 20x20 simulation grid +- =barite_interp_eval.R=: POET input script for a 400x200 simulation + grid +- =db_barite.dat=: PHREEQC database containing the kinetic expressions + for barite and celestite, stripped down from =phreeqc.dat= +- =barite.pqi=: PHREEQC input script defining the chemical system * Chemical system @@ -35,13 +44,34 @@ using a kinetics rate law based on transition state theory: rate = -S_{m} K (1-SR_{m}) -where the reaction rate has units mol/s, S_m (m^2) is the reactive -surface area, K (mol/m^2/s) is the rate constant, and SR is the +where the reaction rate has units mol/s, S_{m} (m^{2}) is the reactive +surface area, K (mol/m^{2}/s) is the rate constant, and SR is the saturation ratio, i.e., the ratio of the ion activity product of the reacting species and the solubility constant. +For barite, the reaction rate is computed as sum of two mechanisms, +/acid/ and /neutral/: + rate_{barite} = S_{m} (acid + neutral) * (1 - SR_{barite}) -* List of Files +where + acid = 10^{-6.9} \exp(\frac {-30800 \cdot T'} R) * act(H^{+})^{0.22} +and + + neutral = 10^{-7.9} \exp( \frac {-30800 * T'} R) + +R is the gas constant (8.314462 J / K / mol) and T' (K^{-1}) accounts +for temperature dependence of the kinetic coefficients which are +experimentally determined at 25 °C or 298.15 K + + T' = (1 / K) - (1 / 298.15) + + +The kinetic rate law as implemented in the =db_barite.dat= file +accepts one parameter which represents reactive surface area in m^{2}. +For this benchmarks the surface areas are set to + +- S_{barite}: 50 m^{2} +- S_{celestite}: 10 m^{2} From b86931bffecd8649657ea6652bfc3b7c8b0096c2 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 13:55:06 +0200 Subject: [PATCH 29/52] refs, eqs and text --- bench/barite/README.org | 48 +++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index b732ad9b0..589ca25a7 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -27,10 +27,9 @@ mpirun -np 4 ./poet --interp barite_interp_eval.R barite_results * Chemical system -The benchmark depicts a porous system where pure water is initially at -equilibrium with *celestite* (strontium sulfate; brute formula: -SrSO_4). - +The benchmark depicts an isotherm porous system at *25 °C* where pure +water is initially at equilibrium with *celestite* (strontium sulfate; +brute formula: SrSO_4). A solution containing only dissolved Ba^{2+} and Cl^- diffuses into the system causing celestite dissolution. The increased concentration of dissolved sulfate SO_{4}^{2-} induces precipitation of *barite* @@ -50,28 +49,41 @@ saturation ratio, i.e., the ratio of the ion activity product of the reacting species and the solubility constant. For barite, the reaction rate is computed as sum of two mechanisms, -/acid/ and /neutral/: +r_{/acid/} and r_{/neutral/}: - rate_{barite} = S_{m} (acid + neutral) * (1 - SR_{barite}) +rate_{barite} = S_{barite} (r_{/acid/} + r_{/neutral/}) * (1 - SR_{barite}) -where +where: - acid = 10^{-6.9} \exp(\frac {-30800 \cdot T'} R) * act(H^{+})^{0.22} +r_{/acid/} = 10^{-6.9} e^{-30800 / R} \cdot act(H^{+})^{0.22} -and +r_{/neutral/} = 10^{-7.9} e^{-30800 / R} - neutral = 10^{-7.9} \exp( \frac {-30800 * T'} R) +R (8.314462 J K^{-1} mol^{-1}) is the gas constant. -R is the gas constant (8.314462 J / K / mol) and T' (K^{-1}) accounts -for temperature dependence of the kinetic coefficients which are -experimentally determined at 25 °C or 298.15 K +For celestite the kinetic law considers only the acidic term and +reads: - T' = (1 / K) - (1 / 298.15) - +rate_{celestite} = S_{celestite} 10^{-5.66} e^{-23800 / R} \cdot +act(H^{+})^{0.109} \cdot (1 - SR_{celestite}) -The kinetic rate law as implemented in the =db_barite.dat= file +The kinetic rate laws as implemented in the =db_barite.dat= file accepts one parameter which represents reactive surface area in m^{2}. -For this benchmarks the surface areas are set to +For the benchmarks the surface areas are set to - S_{barite}: 50 m^{2} -- S_{celestite}: 10 m^{2} +- S_{celestite}: 10 m^{2} + + +** Initial conditions + +The parametrization + + +* References + +- Tranter, Wetzel, De Lucia and Kühn (2021): Reactive transport model + of kinetically controlled celestite to barite replacement, Adv. + Geosci., 1, 1–9 ,https://doi.org/10.5194/adgeo-1-1-2021 + + From f0c9c3ea7147577c7863a00403177441cbc2efb8 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 14:24:26 +0200 Subject: [PATCH 30/52] ICs, BCs, fixes --- bench/barite/README.org | 86 +++++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 21 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index 589ca25a7..6378516b5 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -29,61 +29,105 @@ mpirun -np 4 ./poet --interp barite_interp_eval.R barite_results The benchmark depicts an isotherm porous system at *25 °C* where pure water is initially at equilibrium with *celestite* (strontium sulfate; -brute formula: SrSO_4). -A solution containing only dissolved Ba^{2+} and Cl^- diffuses into -the system causing celestite dissolution. The increased concentration -of dissolved sulfate SO_{4}^{2-} induces precipitation of *barite* -(barium sulfate; brute formula: BaSO_{4}^{2-}). The overall reaction -can be written: +brute formula: SrSO_{4}). A solution containing only dissolved Ba^{2+} +and Cl^- diffuses into the system causing celestite dissolution. The +increased concentration of dissolved sulfate SO_{4}^{2-} induces +precipitation of *barite* (barium sulfate; brute formula: +BaSO_{4}^{2-}). The overall reaction can be written: Ba^{2+} + celestite \rightarrow barite + Sr^{2+} Both celestite dissolution and barite precipitation are calculated using a kinetics rate law based on transition state theory: -rate = -S_{m} K (1-SR_{m}) +rate = -S_{m} k_{barite} (1-SR_{m}) where the reaction rate has units mol/s, S_{m} (m^{2}) is the reactive -surface area, K (mol/m^{2}/s) is the rate constant, and SR is the -saturation ratio, i.e., the ratio of the ion activity product of the -reacting species and the solubility constant. +surface area, k (mol/m^{2}/s) is the kinetic coefficient, and SR is +the saturation ratio, i.e., the ratio of the ion activity product of +the reacting species and the solubility constant, calculated +internally by PHREEQC from the speciated solution. For barite, the reaction rate is computed as sum of two mechanisms, r_{/acid/} and r_{/neutral/}: -rate_{barite} = S_{barite} (r_{/acid/} + r_{/neutral/}) * (1 - SR_{barite}) +rate_{barite} = S_{barite} (k_{/acid/} + k_{/neutral/}) * (1 - SR_{barite}) where: -r_{/acid/} = 10^{-6.9} e^{-30800 / R} \cdot act(H^{+})^{0.22} +k_{/acid/} = 10^{-6.9} e^{-30800 / R} \cdot act(H^{+})^{0.22} -r_{/neutral/} = 10^{-7.9} e^{-30800 / R} +k_{/neutral/} = 10^{-7.9} e^{-30800 / R} R (8.314462 J K^{-1} mol^{-1}) is the gas constant. -For celestite the kinetic law considers only the acidic term and +For celestite the kinetic law considers only the acidic mechanism and reads: rate_{celestite} = S_{celestite} 10^{-5.66} e^{-23800 / R} \cdot act(H^{+})^{0.109} \cdot (1 - SR_{celestite}) -The kinetic rate laws as implemented in the =db_barite.dat= file -accepts one parameter which represents reactive surface area in m^{2}. -For the benchmarks the surface areas are set to +The kinetic rates as implemented in the =db_barite.dat= file accepts +one parameter which represents reactive surface area in m^{2}. For the +benchmarks the surface areas are set to - S_{barite}: 50 m^{2} - S_{celestite}: 10 m^{2} +A starting seed for barite is given at 0.001 mol in each domain +element. -** Initial conditions +* TODO Nucleation -The parametrization +Geochemical benchmark inspired by Tranter et al. (2021) without +nucleation. +* POET simulations + +Currently these benchmarks are pure diffusion simulations. There are 7 +transported species: H, O, Charge, Ba, Cl, S(6), Sr. + +** =barite.R= + +- Grid discretization: square domain of 1 \cdot 1 m^{2} discretized in + 20x20 cells +- Boundary conditions: E, S and W sides of the domain are closed; the + N boundary has a *fixed concentration* (Dirichlet) of 0.1 molal + BaCl_{2} +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 20 iteration with \Delta t = 250 s +- *DHT* parameters: +| H | 10 | +| O | 10 | +| Charge | 3 | +| Ba | 5 | +| Cl | 5 | +| S(6) | 5 | +| Sr | 5 | + + + +** =barite_interp_eval.R= +- Grid discretization: rectangular domain of 40 \cdot 20 m^{2} + discretized in 400 \cdot 200 cells +- Boundary conditions: all boundaries are closed. The center of the + domain at indeces (200, 100) has fixed concentration of 0.1 molal of + BaCl_{2} +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 200 iterations with \Delta t = 250 s +- *PHT* parameters: +| H | 10 | +| O | 10 | +| Charge | 3 | +| Ba | 5 | +| Cl | 5 | +| S(6) | 5 | +| Sr | 5 | * References - Tranter, Wetzel, De Lucia and Kühn (2021): Reactive transport model - of kinetically controlled celestite to barite replacement, Adv. - Geosci., 1, 1–9 ,https://doi.org/10.5194/adgeo-1-1-2021 + of kinetically controlled celestite to barite replacement, Advances + in Geosciences, 1, 1–9, https://doi.org/10.5194/adgeo-1-1-2021 From bd5be576a7986f0e33c06bd28b15324d7e1eaee2 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 17:22:22 +0200 Subject: [PATCH 31/52] docs: readmes for dolo & barite --- bench/barite/README.org | 3 +- bench/dolo/README.org | 176 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 bench/dolo/README.org diff --git a/bench/barite/README.org b/bench/barite/README.org index 6378516b5..cc42b8c32 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -128,6 +128,7 @@ transported species: H, O, Charge, Ba, Cl, S(6), Sr. - Tranter, Wetzel, De Lucia and Kühn (2021): Reactive transport model of kinetically controlled celestite to barite replacement, Advances - in Geosciences, 1, 1–9, https://doi.org/10.5194/adgeo-1-1-2021 + in Geosciences, 56, 57-–65, 2021. + https://doi.org/10.5194/adgeo-56-57-20211 diff --git a/bench/dolo/README.org b/bench/dolo/README.org new file mode 100644 index 000000000..72259f1db --- /dev/null +++ b/bench/dolo/README.org @@ -0,0 +1,176 @@ +#+TITLE: Description of =barite= benchmark +#+AUTHOR: MDL +#+DATE: 2023-08-26 +#+STARTUP: inlineimages +#+LATEX_CLASS_OPTIONS: [a4paper,9pt] +#+LATEX_HEADER: \usepackage{fullpage} +#+LATEX_HEADER: \usepackage{amsmath, systeme} +#+LATEX_HEADER: \usepackage{graphicx} +#+LATEX_HEADER: \usepackage{charter} +#+OPTIONS: toc:nil + +* Quick start + +#+begin_src sh :language sh :frame single +mpirun -np 4 ./poet dolomite.R dolomite_results +mpirun -np 4 ./poet --interp dolomite_interp_eval.R dolomite_results +#+end_src + +* List of Files + +- =dolo.R=: POET input script for a 20x20 simulation grid +- =dolo_diffu_inner.R=: POET input script for a 400x200 simulation + grid +- =dolo_diffu_inner_large.R=: POET input script for a 400x200 + simulation grid +- =dolo_interp_long.R=: POET input script for a 400x200 simulation + grid +- =phreeqc_kin.dat=: PHREEQC database containing the kinetic expressions + for dolomite and celestite, stripped down from =phreeqc.dat= +- =dol.pqi=: PHREEQC input script for the chemical system + +* Chemical system + +This model describes a simplified version of /dolomitization/ of +calcite, itself a complex and not yet fully understood natural process +which is observed naturally at higher temperatures (Möller and De +Lucia, 2020). Variants of such model have been widely used in many +instances especially for the purpose of benchmarking numerical codes +(De Lucia et al., 2021 and references therein). + +We consider an isotherm porous system at *25 °C* in which pure water +is initially at equilibrium with *calcite* (calcium carbonate; brute +formula: CaCO_{3}). An MgCl_{2} solution enters the system causing +calcite dissolution. The released excess concentration of dissolved +calcium Ca^{2+} and carbonate (CO_{3}^{2-}) induces after a while +supersaturation and hence precipitation of *dolomite* +(calcium-magnesium carbonate; brute formula: CaMg(CO_{3})_{2}). The +overall /dolomitization/ reaction can be written: + +Mg^{2+} + 2 \cdot calcite \rightarrow dolomite + 2 \cdot Ca^{2+} + +The precipitation of dolomite continues until enough Ca^{2+} is +present in solution. Further injection of MgCl_{2} changes its +saturation state causing its dissolution too. After enough time, the +whole system has depleted all minerals and the injected MgCl_{2} +solution fills up the domain. + +Both calcite dissolution and dolomite precipitation/dissolution follow +a kinetics rate law based on transition state theory (Palandri and +Karhaka, 2004; De Lucia et al., 2021). + +rate = -S_{m} k_{m} (1-SR_{m}) + +where the reaction rate has units mol/s, S_{m} (m^{2}) is the reactive +surface area, k_{m} (mol/m^{2}/s) is the kinetic coefficient, and SR +is the saturation ratio, i.e., the ratio of the ion activity product +of the reacting species and the solubility constant, calculated +internally by PHREEQC from the speciated solution. + +For dolomite, the kinetic coefficient results from the sum of two +mechanisms, r_{/acid/} and r_{/neutral/}: + +rate_{dolomite} = S_{dolomite} (k_{/acid/} + k_{/neutral/}) * (1 - SR_{dolomite}) + +where: + +k_{/acid/} = 10^{-3.19} e^{-36100 / R} \cdot act(H^{+})^{0.5} + +k_{/neutral/} = 10^{-7.53} e^{-52200 / R} + +R (8.314462 J K^{-1} mol^{-1}) is the gas constant. + +Similarly, the kinetic law for calcite reads: + +k_{/acid/} = 10^{-0.3} e^{-14400 / R} \cdot act(H^{+})^{0.5} + +k_{/neutral/} = 10^{-5.81} e^{-23500 / R} + +The kinetic laws as implemented in the =phreeqc_kin.dat= file accepts +one parameter which represents reactive surface area in m^{2}. For the +benchmarks the surface areas are set to + +- S_{dolomite}: 0.005 m^{2} +- S_{calcite}: 0.05 m^{2} + +The initial content of calcite in the domain is of 0.0002 mol per kg +of water. A constant partial pressure of 10^{-1675} atm of O_{2(g)} is +maintained at any time in the domain in order to fix the redox +potential of the solution to an oxidizing state (pe around 9). + +Note that Cl is unreactive in this system and only effects the +computation of the activities in solution. + +* POET simulations + +Several benchmarks based on the same chemical system are defined here +with different grid sizes, resolution and boundary conditions. The +transported elemental concentrations are 7: C(4), Ca, Cl, Mg and the +implicit total H, total O and Charge as required by PHREEQC_RM. + +** =dolo_diffu_inner.R= + +- Grid discretization: square domain of 1 \cdot 1 m^{2} discretized in + 100x100 cells +- Boundary conditions: All sides of the domain are closed. *Fixed + concentration* of 0.001 molal of MgCl_{2} is defined in the domain + cell (20, 20) and of 0.002 molal MgCl_{2} at cells (60, 60) and + (80, 80) +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 10 iterations with \Delta t of 200 s +- *DHT* parameters: + | H | 10 | + | O | 10 | + | Charge | 3 | + | C(4) | 5 | + | Ca | 5 | + | Cl | 5 | + | Mg | 5 | + | Calcite | 5 | + | Dolomite | 5 | +- Hooks: the following hooks are defined: + 1. =dht_fill=: + 2. =dht_fuzz=: + 3. =interp_pre_func=: + 4. =interp_post_func=: + + +** =dolo_interp_long.R= + +- Grid discretization: rectangular domain of 5 \cdot 2.5 m^{2} + discretized in 400 \times 200 cells +- Boundary conditions: *Fixed concentrations* equal to the initial + state are imposed at all four sides of the domain. *Fixed + concentration* of 0.001 molal of MgCl_{2} is defined in the domain + center at cell (100, 50) +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 20000 iterations with \Delta t of 200 s +- *DHT* parameters: + | H | 10 | + | O | 10 | + | Charge | 3 | + | C(4) | 5 | + | Ca | 5 | + | Cl | 5 | + | Mg | 5 | + | Calcite | 5 | + | Dolomite | 5 | +- Hooks: the following hooks are defined: + 1. =dht_fill=: + 2. =dht_fuzz=: + 3. =interp_pre_func=: + 4. =interp_post_func=: + + +* References + +- De Lucia, Kühn, Lindemann, Lübke, Schnor: POET (v0.1): speedup of + many-core parallel reactive transport simulations with fast DHT + lookups, Geosci. Model Dev., 14, 7391–7409, 2021. + https://doi.org/10.5194/gmd-14-7391-2021 +- Möller, Marco De Lucia: The impact of Mg^{2+} ions on equilibration + of Mg-Ca carbonates in groundwater and brines, Geochemistry + 80, 2020. https://doi.org/10.1016/j.chemer.2020.125611 +- Palandri, Kharaka: A Compilation of Rate Parameters of Water-Mineral + Interaction Kinetics for Application to Geochemical Modeling, Report + 2004-1068, USGS, 2004. From e9ce4eeb1dcfe8c5440bccac93d9c1b1b908e097 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 17:34:21 +0200 Subject: [PATCH 32/52] fix: stdout time for diffusion step. Fixes in READMEs --- bench/barite/README.org | 22 ++++++---------------- bench/dolo/README.org | 36 +++++++++++------------------------- src/DiffusionModule.cpp | 2 +- 3 files changed, 18 insertions(+), 42 deletions(-) diff --git a/bench/barite/README.org b/bench/barite/README.org index cc42b8c32..e7b316052 100644 --- a/bench/barite/README.org +++ b/bench/barite/README.org @@ -77,7 +77,7 @@ benchmarks the surface areas are set to A starting seed for barite is given at 0.001 mol in each domain element. -* TODO Nucleation +* Nucleation (TODO) Geochemical benchmark inspired by Tranter et al. (2021) without nucleation. @@ -97,13 +97,8 @@ transported species: H, O, Charge, Ba, Cl, S(6), Sr. - Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 - Time steps & iterations: 20 iteration with \Delta t = 250 s - *DHT* parameters: -| H | 10 | -| O | 10 | -| Charge | 3 | -| Ba | 5 | -| Cl | 5 | -| S(6) | 5 | -| Sr | 5 | + | H | O | Charge | Ba | Cl | S(6) | Sr | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | @@ -115,14 +110,9 @@ transported species: H, O, Charge, Ba, Cl, S(6), Sr. BaCl_{2} - Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 - Time steps & iterations: 200 iterations with \Delta t = 250 s -- *PHT* parameters: -| H | 10 | -| O | 10 | -| Charge | 3 | -| Ba | 5 | -| Cl | 5 | -| S(6) | 5 | -| Sr | 5 | +- *DHT* parameters: + | H | O | Charge | Ba | Cl | S(6) | Sr | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | * References diff --git a/bench/dolo/README.org b/bench/dolo/README.org index 72259f1db..67caf6e12 100644 --- a/bench/dolo/README.org +++ b/bench/dolo/README.org @@ -1,4 +1,4 @@ -#+TITLE: Description of =barite= benchmark +#+TITLE: Description of =dolo= benchmark #+AUTHOR: MDL #+DATE: 2023-08-26 #+STARTUP: inlineimages @@ -12,22 +12,22 @@ * Quick start #+begin_src sh :language sh :frame single -mpirun -np 4 ./poet dolomite.R dolomite_results -mpirun -np 4 ./poet --interp dolomite_interp_eval.R dolomite_results +mpirun -np 4 ./poet dolo_diffu_inner.R dolo_diffu_inner_res +mpirun -np 4 ./poet --dht --interp dolo_interp_long.R dolo_interp_long_res #+end_src * List of Files -- =dolo.R=: POET input script for a 20x20 simulation grid -- =dolo_diffu_inner.R=: POET input script for a 400x200 simulation +- =dolo_diffu_inner.R=: POET input script for a 100x100 simulation grid -- =dolo_diffu_inner_large.R=: POET input script for a 400x200 - simulation grid - =dolo_interp_long.R=: POET input script for a 400x200 simulation grid - =phreeqc_kin.dat=: PHREEQC database containing the kinetic expressions for dolomite and celestite, stripped down from =phreeqc.dat= - =dol.pqi=: PHREEQC input script for the chemical system +# - =dolo.R=: POET input script for a 20x20 simulation grid +# - =dolo_diffu_inner_large.R=: POET input script for a 400x200 +# simulation grid * Chemical system @@ -119,15 +119,8 @@ implicit total H, total O and Charge as required by PHREEQC_RM. - Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 - Time steps & iterations: 10 iterations with \Delta t of 200 s - *DHT* parameters: - | H | 10 | - | O | 10 | - | Charge | 3 | - | C(4) | 5 | - | Ca | 5 | - | Cl | 5 | - | Mg | 5 | - | Calcite | 5 | - | Dolomite | 5 | + | H | O | Charge | C(4) | Ca | Cl | Mg | Calcite | Dolomite | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | 5 | 5 | - Hooks: the following hooks are defined: 1. =dht_fill=: 2. =dht_fuzz=: @@ -146,15 +139,8 @@ implicit total H, total O and Charge as required by PHREEQC_RM. - Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 - Time steps & iterations: 20000 iterations with \Delta t of 200 s - *DHT* parameters: - | H | 10 | - | O | 10 | - | Charge | 3 | - | C(4) | 5 | - | Ca | 5 | - | Cl | 5 | - | Mg | 5 | - | Calcite | 5 | - | Dolomite | 5 | + | H | O | Charge | C(4) | Ca | Cl | Mg | Calcite | Dolomite | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | 5 | 5 | - Hooks: the following hooks are defined: 1. =dht_fill=: 2. =dht_fuzz=: diff --git a/src/DiffusionModule.cpp b/src/DiffusionModule.cpp index e02c42e03..ba7a60e62 100644 --- a/src/DiffusionModule.cpp +++ b/src/DiffusionModule.cpp @@ -177,11 +177,11 @@ void DiffusionModule::simulate(double dt) { t_field = field_2d; - std::cout << " done!\n"; sim_a_transport = MPI_Wtime(); transport_t += sim_a_transport - sim_b_transport; + std::cout << " done in "<< sim_a_transport - sim_b_transport << "sec" << std::endl; } void DiffusionModule::end() { From 2b896fa070ba58597a4110ebc9ad872ab7e681d1 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Sat, 26 Aug 2023 18:39:50 +0200 Subject: [PATCH 33/52] docs: README for surfex bench --- bench/surfex/README.org | 100 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 bench/surfex/README.org diff --git a/bench/surfex/README.org b/bench/surfex/README.org new file mode 100644 index 000000000..b765f4ef5 --- /dev/null +++ b/bench/surfex/README.org @@ -0,0 +1,100 @@ +#+TITLE: Description of =surfex= benchmark +#+AUTHOR: MDL +#+DATE: 2023-08-26 +#+STARTUP: inlineimages +#+LATEX_CLASS_OPTIONS: [a4paper,9pt] +#+LATEX_HEADER: \usepackage{fullpage} +#+LATEX_HEADER: \usepackage{amsmath, systeme} +#+LATEX_HEADER: \usepackage{graphicx} +#+LATEX_HEADER: \usepackage{charter} +#+OPTIONS: toc:nil + +* Quick start + +#+begin_src sh :language sh :frame single +mpirun -np 4 ./poet ex.R ex_res +mpirun -np 4 ./poet surfex.R surfex_res +#+end_src + +* List of Files +- =ex.R=: POET input script for a 100x100 simulation grid, only + exchange +- =ExBase.pqi=: PHREEQC input script for the =ex.R= model +- =surfex.R=: POET input script for a 100x100 simulation grid + considering both cation exchange and surface complexation +- =SurfExBase.pqi=: PHREEQC input script for the =surfex.R= model +- =SMILE_2021_11_01_TH.dat=: PHREEQC database containing the + parametrized data for Surface and Exchange, based on the SMILE + Thermodynamic Database (Version 01-November-2021) + +* Chemical system + +This model describes migration of Uranium radionuclide in Opalinus +clay subject to surface complexation and cation exchange on the +surface of clay minerals. These two processes account for the binding +of aqueous complexes to the surfaces of minerals, which may have a +significant impact on safety of underground nuclear waste repository. +Namely, they can act as retardation buffer for uranium complexes +entering into a natural system. The system is kindly provided by Dr. +T. Hennig and is inspired to the sandy facies BWS-A3 sample from the +Mont Terri underground lab (Hennig and Kühn, 2021). + +This chemical system is highly redox-sensitive, and several elements +are defined in significant amounts in different valence states. In +total, 20 elemental concentrations and valences are transported: +C(-4), C(4), Ca, Cl, Fe(2), Fe(3), K, Mg, Na, S(-2), S(2), S(4), S(6), +Sr , U(4), U(5), U(6); plus the total H, total O and Charge implicitly +required by PHREEQC_RM. + +** Exchange + +The SMILE database defines thermodynamical data for exchange of all +major cations and uranyl-ions on Illite and Montmorillonite. In +PHREEQC terms: +- *Y* for Montmorillonite, with a total amount of 1.2585 + milliequivalents and +- *Z* for Illite, with a total amount of 0.9418 meq + +** Surface + +Here we consider a Donnan diffuse double layer of 0.49 nm. Six +distinct sorption sites are defined: +- Kln_aOH (aluminol site) and Kln_siOH (silanol) for Kaolinite +- For Illite, strong and weak sites Ill_sOH and Ill_wOH respectively +- For Montmorillonite, strong and weak sites Mll_sOH and Mll_wOH + respectively + +Refer to the =SurfExBase.pqi= script for the actual numerical values +of the parameters. + +* POET simulations + +** =ex.R= + +This benchmark only considers EXCHANGE, no mineral or SURFACE +complexation is involved. + +- Grid discretization: square domain of 1 \cdot 1 m^{2} discretized in + 100x100 cells +- Boundary conditions: E, S and W sides of the domain are closed. + *Fixed concentrations* are fixed at the N boundary. +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 10 iterations with \Delta t of 200 s +- *DHT* is not implemented as of yet for models including SURFACE and + EXCHANGE geochemical processes *TODO* +- Hooks: no hooks defined *TODO* + +** =surfex.R= + +- Grid discretization: rectangular domain of 1 \cdot 1 m^{2} + discretized in 10 \times 10 cells +- Boundary conditions: E, S and W sides of the domain are closed. + *Fixed concentrations* are fixed at the N boundary. +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 10 iterations with \Delta t of 200 s + +* References + +- Hennig, T.; Kühn, M.Surrogate Model for Multi-Component Diffusion of + Uranium through Opalinus Clay on the Host Rock Scale. Appl. Sci. + 2021, 11, 786. https://doi.org/10.3390/app11020786 From 5cd517510f22befad3d85ef042cbd4624c89f709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Thu, 7 Sep 2023 09:12:44 +0200 Subject: [PATCH 34/52] fix: output DHT size in megabyte --- src/SimParams.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 0a81c5b73..e02046392 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -194,7 +194,7 @@ int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { // << simparams.dht_significant_digits); // MSG("DHT logarithm before rounding: " // << (simparams.dht_log ? "ON" : "OFF")); - MSG("DHT size per process (Byte) = " + + MSG("DHT size per process (Megabyte) = " + std::to_string(chem_params.dht_size)); MSG("DHT save snapshots is " + BOOL_PRINT(chem_params.dht_snaps)); MSG("DHT load file is " + chem_params.dht_file); From 384f5bd336169c9530cb82324ab0ab34bc10db36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Fri, 8 Sep 2023 22:43:52 +0200 Subject: [PATCH 35/52] feat: make storage of input values in DHT optional --- include/poet/DHT_Wrapper.hpp | 14 ++++- src/ChemistryModule/ChemistryModule.cpp | 6 +- .../SurrogateModels/DHT_Wrapper.cpp | 57 +++++++++++++++---- 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/include/poet/DHT_Wrapper.hpp b/include/poet/DHT_Wrapper.hpp index a74db4f2d..115d5e769 100644 --- a/include/poet/DHT_Wrapper.hpp +++ b/include/poet/DHT_Wrapper.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-15 14:57:51 mluebke" +// Time-stamp: "Last modified 2023-09-08 14:43:02 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -89,7 +89,7 @@ public: const std::vector &key_indices, const std::vector &output_names, const ChemistryParams::Chem_Hook_Functions &hooks, - uint32_t data_count); + uint32_t data_count, bool with_interp); /** * @brief Destroy the dht wrapper object * @@ -240,7 +240,14 @@ private: const std::vector &new_results); std::vector - inputAndRatesToOutput(const std::vector &dht_data); + inputAndRatesToOutput(const std::vector &dht_data, + const std::vector &input_values); + + std::vector outputToRates(const std::vector &old_results, + const std::vector &new_results); + + std::vector ratesToOutput(const std::vector &dht_data, + const std::vector &input_values); uint32_t dht_hits = 0; uint32_t dht_evictions = 0; @@ -254,6 +261,7 @@ private: const std::vector &output_names; const ChemistryParams::Chem_Hook_Functions &hooks; + const bool with_interp; DHT_ResultObject dht_results; diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index 3060e8617..a1b1a8837 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -378,9 +378,9 @@ void poet::ChemistryModule::initializeDHT( const std::uint64_t dht_size = size_mb * MB_FACTOR; - this->dht = - new DHT_Wrapper(dht_comm, dht_size, map_copy, key_indices, - this->prop_names, params.hooks, this->prop_count); + this->dht = new DHT_Wrapper(dht_comm, dht_size, map_copy, key_indices, + this->prop_names, params.hooks, + this->prop_count, params.use_interp); this->dht->setBaseTotals(base_totals.at(0), base_totals.at(1)); } } diff --git a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp index d1ee112c2..af5fbf12e 100644 --- a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp +++ b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-08-16 16:44:17 mluebke" +// Time-stamp: "Last modified 2023-09-08 22:09:03 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -48,15 +48,17 @@ DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, const std::vector &key_indices, const std::vector &_output_names, const ChemistryParams::Chem_Hook_Functions &_hooks, - uint32_t data_count) + uint32_t data_count, bool _with_interp) : key_count(key_indices.size()), data_count(data_count), input_key_elements(key_indices), communicator(dht_comm), - key_species(key_species), output_names(_output_names), hooks(_hooks) { + key_species(key_species), output_names(_output_names), hooks(_hooks), + with_interp(_with_interp) { // initialize DHT object // key size = count of key elements + timestep uint32_t key_size = (key_count + 1) * sizeof(Lookup_Keyelement); uint32_t data_size = - (data_count + input_key_elements.size()) * sizeof(double); + (data_count + (with_interp ? input_key_elements.size() : 0)) * + sizeof(double); uint32_t buckets_per_process = static_cast(dht_size / (data_size + key_size)); dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, @@ -93,8 +95,8 @@ auto DHT_Wrapper::checkDHT(WorkPackage &work_package) const auto length = work_package.size; - std::vector bucket_writer(this->data_count + - input_key_elements.size()); + std::vector bucket_writer( + this->data_count + (with_interp ? input_key_elements.size() : 0)); // loop over every grid cell contained in work package for (int i = 0; i < length; i++) { @@ -107,7 +109,10 @@ auto DHT_Wrapper::checkDHT(WorkPackage &work_package) switch (res) { case DHT_SUCCESS: - work_package.output[i] = inputAndRatesToOutput(bucket_writer); + work_package.output[i] = + (with_interp + ? inputAndRatesToOutput(bucket_writer, work_package.input[i]) + : bucket_writer); work_package.mapping[i] = CHEM_DHT; this->dht_hits++; break; @@ -145,7 +150,10 @@ void DHT_Wrapper::fillDHT(const WorkPackage &work_package) { uint32_t proc, index; auto &key = dht_results.keys[i]; const auto data = - outputToInputAndRates(work_package.input[i], work_package.output[i]); + (with_interp + ? outputToInputAndRates(work_package.input[i], + work_package.output[i]) + : work_package.output[i]); // void *data = (void *)&(work_package[i * this->data_count]); // fuzz data (round, logarithm etc.) @@ -165,7 +173,7 @@ void DHT_Wrapper::fillDHT(const WorkPackage &work_package) { } } -std::vector +inline std::vector DHT_Wrapper::outputToInputAndRates(const std::vector &old_results, const std::vector &new_results) { const int prefix_size = this->input_key_elements.size(); @@ -183,11 +191,12 @@ DHT_Wrapper::outputToInputAndRates(const std::vector &old_results, return output; } -std::vector -DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data) { +inline std::vector +DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data, + const std::vector &input_values) { const int prefix_size = this->input_key_elements.size(); - std::vector output{dht_data.begin() + prefix_size, dht_data.end()}; + std::vector output(input_values); for (int i = 0; i < prefix_size; i++) { const int data_elem_i = input_key_elements[i]; @@ -197,6 +206,30 @@ DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data) { return output; } +inline std::vector +DHT_Wrapper::outputToRates(const std::vector &old_results, + const std::vector &new_results) { + std::vector output(new_results); + + for (const auto &data_elem_i : input_key_elements) { + output[data_elem_i] -= old_results[data_elem_i]; + } + + return output; +} + +inline std::vector +DHT_Wrapper::ratesToOutput(const std::vector &dht_data, + const std::vector &input_values) { + std::vector output(input_values); + + for (const auto &data_elem_i : input_key_elements) { + output[data_elem_i] += dht_data[data_elem_i]; + } + + return output; +} + // void DHT_Wrapper::resultsToWP(std::vector &work_package) { // for (int i = 0; i < dht_results.length; i++) { // if (!dht_results.needPhreeqc[i]) { From bb12c1bb815aab8db29e72259838b493dcd7cd88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Fri, 8 Sep 2023 12:44:27 +0200 Subject: [PATCH 36/52] feat: instead of always storing exact input and output make it dependent on the usage of interpolation --- src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp index af5fbf12e..861375c04 100644 --- a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp +++ b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-09-08 22:09:03 mluebke" +// Time-stamp: "Last modified 2023-11-01 10:54:45 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of From 75702bb3515b7f8658addb0afe819fb0dfbdfb73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 1 Nov 2023 11:01:38 +0100 Subject: [PATCH 37/52] docs: improve main SVG --- docs/20221216_Scheme_PORT_en.svg | 572 --------------- docs/20230720_Scheme_POET_en.svg | 1117 ++++++++++++++++-------------- 2 files changed, 593 insertions(+), 1096 deletions(-) delete mode 100644 docs/20221216_Scheme_PORT_en.svg diff --git a/docs/20221216_Scheme_PORT_en.svg b/docs/20221216_Scheme_PORT_en.svg deleted file mode 100644 index d5c0c2208..000000000 --- a/docs/20221216_Scheme_PORT_en.svg +++ /dev/null @@ -1,572 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TUG - - - - - - - - - - - In DHT?Next Iteration - NoYes - - - - - Store in DHTChemistry - - - - - RetrieveResult - - - - - - Flow - - - - - Transport - - - - - PHREEQC - - AI Surrogate - - - - - - Accurate? - - - - - - - - - Yes - No - - Update - PORT - - - - - - - - diff --git a/docs/20230720_Scheme_POET_en.svg b/docs/20230720_Scheme_POET_en.svg index 89e339751..dd29d9b9e 100644 --- a/docs/20230720_Scheme_POET_en.svg +++ b/docs/20230720_Scheme_POET_en.svg @@ -1,568 +1,637 @@ + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TUG - - - - - - - - - - - In DHT?Next Iteration - NoYes - - - - - Store in DHTChemistry - - - - - RetrieveResult - - - - - - Flow - - - - - Transport - - - - - PHREEQC - - AI Surrogate - - - - - - Accurate? - - - - - - - - - Yes - No - - Update - POET - tugFlowTransportPOET - - - - - - - + transform="matrix(1.0545,0,0,1,0.0745073,-0.0682484)" + id="g182">ChemistryIn DHT?RetrieveResultPHREEQCStore in DHTAI SurrogateAccurate?YesNoUpdateNoYesWorkerDistribute WorkGather ResultsMasterNext Iteration From 433f095f585348b1c886e87ae7a6c645e4075ced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Tue, 21 Nov 2023 16:25:50 +0100 Subject: [PATCH 38/52] Update file dolo_interp_long.R --- bench/dolo/dolo_interp_long.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/dolo/dolo_interp_long.R b/bench/dolo/dolo_interp_long.R index 80770f07a..7acca4298 100644 --- a/bench/dolo/dolo_interp_long.R +++ b/bench/dolo/dolo_interp_long.R @@ -165,8 +165,8 @@ check_sign_cal_dol_interp <- function(to_interp, data_set) { } check_neg_cal_dol <- function(result) { - neg_sign <- (result["Calcite"] <- 0) || (result["Dolomite"] < 0) - return(any(neg_sign)) + neg_sign <- (result["Calcite"] < 0) || (result["Dolomite"] < 0) + return(neg_sign) } hooks <- list( From 0b14de480bfa75e03e61b2b79ee1a3551a7ce4fb Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Fri, 12 Jan 2024 12:50:54 +0100 Subject: [PATCH 39/52] fix: updated bench/barite/{barite.R, barite_interp_eval.R}, added bench/barite/barite_200.R --- bench/barite/barite.R | 68 ++++++++------- bench/barite/barite_200.R | 138 ++++++++++++++++++++++++++++++ bench/barite/barite_interp_eval.R | 87 ++++++++----------- 3 files changed, 209 insertions(+), 84 deletions(-) create mode 100644 bench/barite/barite_200.R diff --git a/bench/barite/barite.R b/bench/barite/barite.R index 0543ff39a..7d73de75b 100644 --- a/bench/barite/barite.R +++ b/bench/barite/barite.R @@ -1,5 +1,4 @@ -## Time-stamp: "Last modified 2023-08-02 13:59:22 mluebke" - +## Time-stamp: "Last modified 2024-01-12 12:39:14 delucia" database <- normalizePath("../share/poet/bench/barite/db_barite.dat") input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") @@ -16,11 +15,11 @@ types <- c("scratch", "phreeqc", "rds") init_cell <- list( "H" = 110.0124, "O" = 55.5087, - "Charge" = -1.217E-09, - "Ba" = 1.E-10, - "Cl" = 2.E-10, - "S" = 6.205E-4, - "Sr" = 6.205E-4, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04, "Barite" = 0.001, "Celestite" = 1 ) @@ -28,7 +27,11 @@ init_cell <- list( grid <- list( n_cells = c(n, m), s_cells = c(1, 1), - type = types[1] + type = types[1], + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script ) @@ -40,26 +43,20 @@ grid <- list( ## initial conditions init_diffu <- list( - #"H" = 110.0124, - "H" = 0.00000028904, - #"O" = 55.5087, - "O" = 0.000000165205, - #"Charge" = -1.217E-09, - "Charge" = -3.337E-08, - "Ba" = 1.E-10, - "Cl" = 1.E-10, - "S(6)" = 6.205E-4, - "Sr" = 6.205E-4 + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04 ) injection_diff <- list( list( - #"H" = 111.0124, - "H" = 0.0000002890408, - #"O" = 55.50622, - "O" = 0.00002014464, - #"Charge" = -3.337E-08, - "Charge" = -3.337000004885E-08, + "H" = 111.0124, + "O" = 55.50622, + "Charge" = -3.336970273297e-08, "Ba" = 0.1, "Cl" = 0.2, "S(6)" = 0, @@ -84,11 +81,12 @@ alpha_diffu <- c( ## ) boundary <- list( - "N" = rep(1, n), + "N" = c(1,1, rep(0, n-2)), ## "N" = rep(0, n), "E" = rep(0, n), "S" = rep(0, n), - "W" = rep(0, n) +# "W" = rep(0, n) + "W" = c(1,1, rep(0, n-2)) ) diffu_list <- names(alpha_diffu) @@ -112,15 +110,19 @@ diffusion <- list( ## # Needed when using DHT dht_species <- c( - "H" = 10, - "O" = 10, - "Charge" = 3, - "Ba" = 5, - "Cl" = 5, - "S(6)" = 5, - "Sr" = 5 + "H" = 7, + "O" = 7, + "Charge" = 4, + "Ba" = 7, + "Cl" = 7, + "S(6)" = 7, + "Sr" = 7, + "Barite" = 4, + "Celestite" = 4 ) + + chemistry <- list( database = database, input_script = input_script, diff --git a/bench/barite/barite_200.R b/bench/barite/barite_200.R new file mode 100644 index 000000000..76526b00e --- /dev/null +++ b/bench/barite/barite_200.R @@ -0,0 +1,138 @@ +## Time-stamp: "Last modified 2024-01-12 12:49:03 delucia" + +database <- normalizePath("../share/poet/bench/barite/db_barite.dat") +input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") +## database <- normalizePath("/home/work/simR/Rphree/poetsims/Sims/Hans/db_barite.dat") +## input_script <- normalizePath("/home/work/simR/Rphree/poetsims/Sims/Hans/barite.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 200 +m <- 200 + +init_cell <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04, + "Barite" = 0.001, + "Celestite" = 1 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(1, 1), + type = "scratch", + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions + +init_diffu <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04 +) + +injection_diff <- list( + list( + "H" = 111.0124, + "O" = 55.50622, + "Charge" = -3.336970273297e-08, + "Ba" = 0.1, + "Cl" = 0.2, + "S(6)" = 0, + "Sr" = 0) +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-06, + "O" = 1E-06, + "Charge" = 1E-06, + "Ba" = 1E-06, + "Cl" = 1E-06, + "S(6)" = 1E-06, + "Sr" = 1E-06 +) + +boundary <- list( + "N" = c(1,1, rep(0, n-2)), + "E" = rep(0, n), + "S" = rep(0, n), + "W" = c(1,1, rep(0, n-2)) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, injection_diff) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + +## DHT significant digits +dht_species <- c( + "H" = 7, + "O" = 7, + "Charge" = 4, + "Ba" = 7, + "Cl" = 7, + "S(6)" = 7, + "Sr" = 7, + "Barite" = 4, + "Celestite" = 4 +) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# + +iterations <- 50 +dt <- 100 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = seq(1, iterations) + ## out_save = c(1, 5, 10, seq(50, iterations, by=50)) +) diff --git a/bench/barite/barite_interp_eval.R b/bench/barite/barite_interp_eval.R index fcbe4884b..6f2b79a34 100644 --- a/bench/barite/barite_interp_eval.R +++ b/bench/barite/barite_interp_eval.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-07-21 15:04:49 mluebke" +## Time-stamp: "Last modified 2024-01-12 11:35:11 delucia" database <- normalizePath("../share/poet/bench/barite/db_barite.dat") input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") @@ -16,18 +16,18 @@ types <- c("scratch", "phreeqc", "rds") init_cell <- list( "H" = 110.0124, "O" = 55.5087, - "Charge" = -1.217E-09, - "Ba" = 1.E-10, - "Cl" = 2.E-10, - "S" = 6.205E-4, - "Sr" = 6.205E-4, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04, "Barite" = 0.001, "Celestite" = 1 ) grid <- list( n_cells = c(n, m), - s_cells = c(n / 10, m / 10), + s_cells = c(1, 1), type = types[1], init_cell = as.data.frame(init_cell, check.names = FALSE), props = names(init_cell), @@ -44,56 +44,42 @@ grid <- list( ## initial conditions init_diffu <- list( - # "H" = 110.0124, - "H" = 0.00000028904, - # "O" = 55.5087, - "O" = 0.000000165205, - # "Charge" = -1.217E-09, - "Charge" = -3.337E-08, - "Ba" = 1.E-10, - "Cl" = 1.E-10, - "S(6)" = 6.205E-4, - "Sr" = 6.205E-4 + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04 ) injection_diff <- list( - list( - # "H" = 111.0124, - "H" = 0.0000002890408, - # "O" = 55.50622, - "O" = 0.00002014464, - # "Charge" = -3.337E-08, - "Charge" = -3.337000004885E-08, - "Ba" = 0.1, - "Cl" = 0.2, - "S(6)" = 0, - "Sr" = 0 - ) + list( + "H" = 111.0124, + "O" = 55.50622, + "Charge" = -3.336970273297e-08, + "Ba" = 0.1, + "Cl" = 0.2, + "S(6)" = 0, + "Sr" = 0) ) ## diffusion coefficients alpha_diffu <- c( - "H" = 1E-06, - "O" = 1E-06, + "H" = 1E-06, + "O" = 1E-06, "Charge" = 1E-06, "Ba" = 1E-06, "Cl" = 1E-06, - "S(6)" = 1E-06, + "S(6)" = 1E-06, "Sr" = 1E-06 ) -vecinj_inner <- list( - l1 = c(1, floor(n / 2), floor(m / 2)) - ## l2 = c(2,80,80), - ## l3 = c(2,60,80) -) - boundary <- list( - # "N" = rep(1, n), - "N" = rep(0, n), + "N" = c(1,1, rep(0, n-2)), "E" = rep(0, n), "S" = rep(0, n), - "W" = rep(0, n) + "W" = c(1,1, rep(0, n-2)) ) diffu_list <- names(alpha_diffu) @@ -104,7 +90,6 @@ names(vecinj) <- names(init_diffu) diffusion <- list( init = as.data.frame(init_diffu, check.names = FALSE), vecinj = vecinj, - vecinj_inner = vecinj_inner, vecinj_index = boundary, alpha = alpha_diffu ) @@ -116,13 +101,15 @@ diffusion <- list( ## # Needed when using DHT dht_species <- c( - "H" = 10, - "O" = 10, - "Charge" = 3, - "Ba" = 5, - "Cl" = 5, - "S(6)" = 5, - "Sr" = 5 + "H" = 7, + "O" = 7, + "Charge" = 4, + "Ba" = 7, + "Cl" = 7, + "S(6)" = 7, + "Sr" = 7, + "Barite" = 4, + "Celestite" = 4 ) chemistry <- list( @@ -135,8 +122,6 @@ chemistry <- list( ## Section 4 ## ## Putting all those things together ## ################################################################# - - iterations <- 200 dt <- 250 From caa54ebf5d35bd4b05a9eff0ddeb1571d0f24730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Thu, 22 Feb 2024 09:47:05 +0100 Subject: [PATCH 40/52] Add .vscode to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 8cb918a01..1e597b203 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,5 @@ vignettes/*.pdf build/ /.cache/ + +.vscode \ No newline at end of file From ee0bd9167cedd133c18c0d815b0ebb049c671930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Mon, 26 Feb 2024 12:34:42 +0100 Subject: [PATCH 41/52] Update initial and boundary conditions for dolo large model --- bench/dolo/dolo_diffu_inner_large.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bench/dolo/dolo_diffu_inner_large.R b/bench/dolo/dolo_diffu_inner_large.R index ce1da8246..c69803f80 100644 --- a/bench/dolo/dolo_diffu_inner_large.R +++ b/bench/dolo/dolo_diffu_inner_large.R @@ -40,8 +40,8 @@ grid <- list( ## initial conditions init_diffu <- list( - "H" = 0.000211313883539788, - "O" = 0.00398302904424952, + "H" = 110.683, + "O" = 55.3413, "Charge" = -5.0822e-19, "C(4)" = 1.2279E-4, "Ca" = 1.2279E-4, @@ -63,8 +63,8 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( list( - "H" = 0.0001540445, - "O" = 0.002148006, + "H" = 110.683, + "O" = 55.3413, "Charge" = 1.90431e-16, "C(4)" = 0, "Ca" = 0, @@ -72,8 +72,8 @@ vecinj_diffu <- list( "Mg" = 0.001 ), list( - "H" = 0.0001610193, - "O" = 0.002386934, + "H" = 110.683, + "O" = 55.3413, "Charge" = 1.90431e-16, "C(4)" = 0, "Ca" = 0.0, From e5ed6f9f8751f09ff7ff23ba0b0089fe0a08d760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Fri, 1 Mar 2024 12:57:42 +0100 Subject: [PATCH 42/52] Fix broken Dolo large benchmark Update surfex to a larger grid size --- bench/dolo/dolo_diffu_inner_large.R | 2 +- bench/surfex/surfex.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bench/dolo/dolo_diffu_inner_large.R b/bench/dolo/dolo_diffu_inner_large.R index c69803f80..c9c70c6a4 100644 --- a/bench/dolo/dolo_diffu_inner_large.R +++ b/bench/dolo/dolo_diffu_inner_large.R @@ -28,7 +28,7 @@ init_cell <- list( grid <- list( n_cells = c(n, m), - s_cells = c(2, 1), + s_cells = c(20, 10), type = types[1] ) diff --git a/bench/surfex/surfex.R b/bench/surfex/surfex.R index 409b0f277..27409b97f 100644 --- a/bench/surfex/surfex.R +++ b/bench/surfex/surfex.R @@ -10,8 +10,8 @@ cat(paste(":: R This is a test 1\n")) ## Grid initialization ## ################################################################# -n <- 10 -m <- 10 +n <- 1000 +m <- 1000 types <- c("scratch", "phreeqc", "rds") @@ -39,7 +39,7 @@ init_cell <- list(H = 1.476571028625e-01, grid <- list( n_cells = c(n, m), - s_cells = c(1, 1), + s_cells = c(n/10, m/10), type = "scratch" ) @@ -127,7 +127,7 @@ chemistry <- list( ################################################################# -iterations <- 10 +iterations <- 100 dt <- 200 setup <- list( From 62412e8d87e5b6ec4984d92598d963712168a65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 11:26:52 +0100 Subject: [PATCH 43/52] Refactor build process Now include/app paths are combined in the src dir. --- app/CMakeLists.txt | 9 ++++---- app/poet.h.in | 11 --------- src/{ => Base}/Grid.cpp | 5 ++-- {include/poet => src/Base}/Grid.hpp | 3 ++- src/{ => Base}/SimParams.cpp | 6 +++-- {include/poet => src/Base}/SimParams.hpp | 10 ++++---- {include/poet => src/Base}/argh.hpp | 0 src/CMakeLists.txt | 23 +++++++++++++------ .../ChemistryModule.cpp | 10 ++++---- .../Chemistry}/ChemistryModule.hpp | 13 ++++++----- .../MasterFunctions.cpp | 5 ++-- .../SurrogateModels/CMakeLists.txt | 0 .../SurrogateModels/DHT.c | 3 ++- .../Chemistry/SurrogateModels}/DHT.h | 0 .../SurrogateModels/DHT_Wrapper.cpp | 16 ++++--------- .../SurrogateModels}/DHT_Wrapper.hpp | 14 +++++------ .../SurrogateModels/HashFunctions.cpp | 2 +- .../SurrogateModels}/HashFunctions.hpp | 0 .../SurrogateModels}/Interpolation.hpp | 13 +++++------ .../SurrogateModels/InterpolationModule.cpp | 19 ++++++++------- .../Chemistry/SurrogateModels}/LookupKey.hpp | 3 ++- .../SurrogateModels/ProximityHashTable.cpp | 14 ++++++----- .../Chemistry/SurrogateModels}/Rounding.hpp | 0 .../WorkerFunctions.cpp | 6 ++--- {include/poet => src/Chemistry}/enums.hpp | 0 .../DataStructures}/DataStructures.hpp | 3 ++- src/DataStructures/Field.cpp | 3 ++- {include/poet => src}/Macros.hpp | 0 {include/poet => src}/RInsidePOET.hpp | 0 src/{ => Transport}/DiffusionModule.cpp | 16 ++++++------- .../Transport}/DiffusionModule.hpp | 13 ++++++----- {app => src}/poet.cpp | 16 +++++++------ src/poet.h.in | 8 +++++++ 33 files changed, 131 insertions(+), 113 deletions(-) delete mode 100644 app/poet.h.in rename src/{ => Base}/Grid.cpp (99%) rename {include/poet => src/Base}/Grid.hpp (99%) rename src/{ => Base}/SimParams.cpp (99%) rename {include/poet => src/Base}/SimParams.hpp (98%) rename {include/poet => src/Base}/argh.hpp (100%) rename src/{ChemistryModule => Chemistry}/ChemistryModule.cpp (99%) rename {include/poet => src/Chemistry}/ChemistryModule.hpp (98%) rename src/{ChemistryModule => Chemistry}/MasterFunctions.cpp (99%) rename src/{ChemistryModule => Chemistry}/SurrogateModels/CMakeLists.txt (100%) rename src/{ChemistryModule => Chemistry}/SurrogateModels/DHT.c (99%) rename {include/poet => src/Chemistry/SurrogateModels}/DHT.h (100%) rename src/{ChemistryModule => Chemistry}/SurrogateModels/DHT_Wrapper.cpp (96%) rename {include/poet => src/Chemistry/SurrogateModels}/DHT_Wrapper.hpp (97%) rename src/{ChemistryModule => Chemistry}/SurrogateModels/HashFunctions.cpp (98%) rename {include/poet => src/Chemistry/SurrogateModels}/HashFunctions.hpp (100%) rename {include/poet => src/Chemistry/SurrogateModels}/Interpolation.hpp (97%) rename src/{ChemistryModule => Chemistry}/SurrogateModels/InterpolationModule.cpp (94%) rename {include/poet => src/Chemistry/SurrogateModels}/LookupKey.hpp (97%) rename src/{ChemistryModule => Chemistry}/SurrogateModels/ProximityHashTable.cpp (97%) rename {include/poet => src/Chemistry/SurrogateModels}/Rounding.hpp (100%) rename src/{ChemistryModule => Chemistry}/WorkerFunctions.cpp (98%) rename {include/poet => src/Chemistry}/enums.hpp (100%) rename {include/poet => src/DataStructures}/DataStructures.hpp (99%) rename {include/poet => src}/Macros.hpp (100%) rename {include/poet => src}/RInsidePOET.hpp (100%) rename src/{ => Transport}/DiffusionModule.cpp (96%) rename {include/poet => src/Transport}/DiffusionModule.hpp (96%) rename {app => src}/poet.cpp (98%) create mode 100644 src/poet.h.in diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 1aa791dd4..bd4fc4bf1 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -1,7 +1,6 @@ -configure_file(poet.h.in poet.h) -add_executable(poet poet.cpp) -target_include_directories(poet PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries(poet PUBLIC poet_lib MPI::MPI_CXX) +# add_executable(poet poet.cpp) +# target_include_directories(poet PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") +# target_link_libraries(poet PUBLIC poet_lib MPI::MPI_CXX) -install(TARGETS poet DESTINATION bin) +# install(TARGETS poet DESTINATION bin) diff --git a/app/poet.h.in b/app/poet.h.in deleted file mode 100644 index 96a25a2c1..000000000 --- a/app/poet.h.in +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef POET_H -#define POET_H - -#include "poet/ChemistryModule.hpp" -#include -const char *poet_version = "@POET_VERSION@"; - -const char *CHEMISTRY_MODULE_NAME = "state_C"; - - -#endif // POET_H diff --git a/src/Grid.cpp b/src/Base/Grid.cpp similarity index 99% rename from src/Grid.cpp rename to src/Base/Grid.cpp index 64561b5ae..e51b114ba 100644 --- a/src/Grid.cpp +++ b/src/Base/Grid.cpp @@ -18,7 +18,9 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/SimParams.hpp" +#include "Grid.hpp" +#include "SimParams.hpp" + #include #include #include @@ -26,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/include/poet/Grid.hpp b/src/Base/Grid.hpp similarity index 99% rename from include/poet/Grid.hpp rename to src/Base/Grid.hpp index 2ea2068b9..deec5c04a 100644 --- a/include/poet/Grid.hpp +++ b/src/Base/Grid.hpp @@ -21,7 +21,8 @@ #ifndef GRID_H #define GRID_H -#include "poet/SimParams.hpp" +#include "SimParams.hpp" + #include #include #include diff --git a/src/SimParams.cpp b/src/Base/SimParams.cpp similarity index 99% rename from src/SimParams.cpp rename to src/Base/SimParams.cpp index e02046392..f711e796a 100644 --- a/src/SimParams.cpp +++ b/src/Base/SimParams.cpp @@ -20,11 +20,13 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/enums.hpp" +#include "SimParams.hpp" + +#include "../Chemistry/enums.hpp" + #include #include #include -#include #include #include diff --git a/include/poet/SimParams.hpp b/src/Base/SimParams.hpp similarity index 98% rename from include/poet/SimParams.hpp rename to src/Base/SimParams.hpp index 9928bcdba..212ec9948 100644 --- a/include/poet/SimParams.hpp +++ b/src/Base/SimParams.hpp @@ -21,6 +21,12 @@ #ifndef PARSER_H #define PARSER_H +#include "../DataStructures/DataStructures.hpp" +#include "../Macros.hpp" +#include "../RInsidePOET.hpp" + +#include "argh.hpp" // Argument handler https://github.com/adishavit/argh + #include #include #include @@ -28,10 +34,6 @@ #include #include -#include "DataStructures.hpp" -#include "Macros.hpp" -#include "RInsidePOET.hpp" -#include "argh.hpp" // Argument handler https://github.com/adishavit/argh #include #include // BSD-licenced diff --git a/include/poet/argh.hpp b/src/Base/argh.hpp similarity index 100% rename from include/poet/argh.hpp rename to src/Base/argh.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 74a61c8c7..addb7c11b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,12 +2,19 @@ file(GLOB_RECURSE poet_lib_SRC CONFIGURE_DEPENDS "*.cpp" "*.c") -add_library(poet_lib ${poet_lib_SRC}) -target_include_directories(poet_lib PUBLIC ${PROJECT_SOURCE_DIR}/include) -target_link_libraries(poet_lib PUBLIC - MPI::MPI_CXX ${MATH_LIBRARY} RRuntime PhreeqcRM tug) +configure_file(poet.h.in poet.h) -target_compile_definitions(poet_lib PUBLIC STRICT_R_HEADERS OMPI_SKIP_MPICXX) +add_executable(poet ${poet_lib_SRC}) +target_link_libraries(poet + MPI::MPI_CXX + ${MATH_LIBRARY} + RRuntime + PhreeqcRM + tug +) +target_include_directories(poet PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") + +target_compile_definitions(poet PRIVATE STRICT_R_HEADERS OMPI_SKIP_MPICXX) mark_as_advanced(PHREEQCRM_BUILD_MPI PHREEQCRM_DISABLE_OPENMP) set(PHREEQCRM_DISABLE_OPENMP ON CACHE BOOL "" FORCE) @@ -15,11 +22,13 @@ set(PHREEQCRM_DISABLE_OPENMP ON CACHE BOOL "" FORCE) option(POET_DHT_DEBUG "Build with DHT debug info" OFF) if(POET_DHT_DEBUG) - target_compile_definitions(poet_lib PRIVATE DHT_STATISTICS) + target_compile_definitions(poet PRIVATE DHT_STATISTICS) endif() option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) if (POET_PHT_ADDITIONAL_INFO) - target_compile_definitions(poet_lib PRIVATE POET_PHT_ADD) + target_compile_definitions(poet PRIVATE POET_PHT_ADD) endif() + +install(TARGETS poet DESTINATION bin) \ No newline at end of file diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/Chemistry/ChemistryModule.cpp similarity index 99% rename from src/ChemistryModule/ChemistryModule.cpp rename to src/Chemistry/ChemistryModule.cpp index a1b1a8837..46531f2c6 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/Chemistry/ChemistryModule.cpp @@ -1,7 +1,9 @@ -#include "poet/ChemistryModule.hpp" -#include "PhreeqcRM.h" -#include "poet/DHT_Wrapper.hpp" -#include "poet/Interpolation.hpp" +#include "ChemistryModule.hpp" + +#include "SurrogateModels/DHT_Wrapper.hpp" +#include "SurrogateModels/Interpolation.hpp" + +#include #include #include diff --git a/include/poet/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp similarity index 98% rename from include/poet/ChemistryModule.hpp rename to src/Chemistry/ChemistryModule.hpp index c0c7f9083..a17069d53 100644 --- a/include/poet/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -3,12 +3,13 @@ #ifndef CHEMISTRYMODULE_H_ #define CHEMISTRYMODULE_H_ -#include "DHT_Wrapper.hpp" -#include "DataStructures.hpp" -#include "Interpolation.hpp" -#include "IrmResult.h" -#include "PhreeqcRM.h" -#include "SimParams.hpp" +#include "../Base/SimParams.hpp" +#include "../DataStructures/DataStructures.hpp" +#include "SurrogateModels/DHT_Wrapper.hpp" +#include "SurrogateModels/Interpolation.hpp" + +#include +#include #include #include diff --git a/src/ChemistryModule/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp similarity index 99% rename from src/ChemistryModule/MasterFunctions.cpp rename to src/Chemistry/MasterFunctions.cpp index 51b656da3..543cd21a8 100644 --- a/src/ChemistryModule/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -1,5 +1,6 @@ -#include "PhreeqcRM.h" -#include "poet/ChemistryModule.hpp" +#include "ChemistryModule.hpp" + +#include #include #include diff --git a/src/ChemistryModule/SurrogateModels/CMakeLists.txt b/src/Chemistry/SurrogateModels/CMakeLists.txt similarity index 100% rename from src/ChemistryModule/SurrogateModels/CMakeLists.txt rename to src/Chemistry/SurrogateModels/CMakeLists.txt diff --git a/src/ChemistryModule/SurrogateModels/DHT.c b/src/Chemistry/SurrogateModels/DHT.c similarity index 99% rename from src/ChemistryModule/SurrogateModels/DHT.c rename to src/Chemistry/SurrogateModels/DHT.c index 0a7261049..b0d5b3caa 100644 --- a/src/ChemistryModule/SurrogateModels/DHT.c +++ b/src/Chemistry/SurrogateModels/DHT.c @@ -16,8 +16,9 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include "DHT.h" + #include -#include #include #include diff --git a/include/poet/DHT.h b/src/Chemistry/SurrogateModels/DHT.h similarity index 100% rename from include/poet/DHT.h rename to src/Chemistry/SurrogateModels/DHT.h diff --git a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp b/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp similarity index 96% rename from src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp rename to src/Chemistry/SurrogateModels/DHT_Wrapper.cpp index 861375c04..69491b2c2 100644 --- a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp +++ b/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp @@ -20,14 +20,7 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/DHT_Wrapper.hpp" -#include "poet/HashFunctions.hpp" -#include "poet/Interpolation.hpp" -#include "poet/LookupKey.hpp" -#include "poet/Rounding.hpp" -#include "poet/enums.hpp" - -#include "poet/RInsidePOET.hpp" +#include "DHT_Wrapper.hpp" #include #include @@ -150,10 +143,9 @@ void DHT_Wrapper::fillDHT(const WorkPackage &work_package) { uint32_t proc, index; auto &key = dht_results.keys[i]; const auto data = - (with_interp - ? outputToInputAndRates(work_package.input[i], - work_package.output[i]) - : work_package.output[i]); + (with_interp ? outputToInputAndRates(work_package.input[i], + work_package.output[i]) + : work_package.output[i]); // void *data = (void *)&(work_package[i * this->data_count]); // fuzz data (round, logarithm etc.) diff --git a/include/poet/DHT_Wrapper.hpp b/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp similarity index 97% rename from include/poet/DHT_Wrapper.hpp rename to src/Chemistry/SurrogateModels/DHT_Wrapper.hpp index 115d5e769..083fe9d25 100644 --- a/include/poet/DHT_Wrapper.hpp +++ b/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp @@ -23,14 +23,14 @@ #ifndef DHT_WRAPPER_H #define DHT_WRAPPER_H -#include "DataStructures.hpp" +#include "../../Base/SimParams.hpp" +#include "../../DataStructures/DataStructures.hpp" +#include "../../RInsidePOET.hpp" +#include "../enums.hpp" +#include "HashFunctions.hpp" #include "LookupKey.hpp" -#include "RInsidePOET.hpp" -#include "SimParams.hpp" -#include "enums.hpp" -#include "poet/HashFunctions.hpp" -#include "poet/LookupKey.hpp" -#include "poet/Rounding.hpp" +#include "Rounding.hpp" + #include #include #include diff --git a/src/ChemistryModule/SurrogateModels/HashFunctions.cpp b/src/Chemistry/SurrogateModels/HashFunctions.cpp similarity index 98% rename from src/ChemistryModule/SurrogateModels/HashFunctions.cpp rename to src/Chemistry/SurrogateModels/HashFunctions.cpp index 431cd286f..d93702990 100644 --- a/src/ChemistryModule/SurrogateModels/HashFunctions.cpp +++ b/src/Chemistry/SurrogateModels/HashFunctions.cpp @@ -24,7 +24,7 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/HashFunctions.hpp" +#include "HashFunctions.hpp" #if defined(_MSC_VER) diff --git a/include/poet/HashFunctions.hpp b/src/Chemistry/SurrogateModels/HashFunctions.hpp similarity index 100% rename from include/poet/HashFunctions.hpp rename to src/Chemistry/SurrogateModels/HashFunctions.hpp diff --git a/include/poet/Interpolation.hpp b/src/Chemistry/SurrogateModels/Interpolation.hpp similarity index 97% rename from include/poet/Interpolation.hpp rename to src/Chemistry/SurrogateModels/Interpolation.hpp index 71b9f9a01..15361f4bf 100644 --- a/include/poet/Interpolation.hpp +++ b/src/Chemistry/SurrogateModels/Interpolation.hpp @@ -3,13 +3,12 @@ #ifndef INTERPOLATION_H_ #define INTERPOLATION_H_ -#include "DHT.h" +#include "../../Base/SimParams.hpp" +#include "../../DataStructures/DataStructures.hpp" #include "DHT_Wrapper.hpp" -#include "DataStructures.hpp" #include "LookupKey.hpp" -#include "poet/DHT_Wrapper.hpp" -#include "poet/Rounding.hpp" -#include "poet/SimParams.hpp" +#include "Rounding.hpp" + #include #include #include @@ -17,11 +16,11 @@ #include #include #include + extern "C" { -#include "poet/DHT.h" +#include "DHT.h" } -#include "poet/LookupKey.hpp" #include #include #include diff --git a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp b/src/Chemistry/SurrogateModels/InterpolationModule.cpp similarity index 94% rename from src/ChemistryModule/SurrogateModels/InterpolationModule.cpp rename to src/Chemistry/SurrogateModels/InterpolationModule.cpp index 9384731b2..26bfd4ab0 100644 --- a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp +++ b/src/Chemistry/SurrogateModels/InterpolationModule.cpp @@ -1,13 +1,15 @@ // Time-stamp: "Last modified 2023-08-16 17:02:31 mluebke" +#include "Interpolation.hpp" -#include "poet/DHT_Wrapper.hpp" -#include "poet/DataStructures.hpp" -#include "poet/HashFunctions.hpp" -#include "poet/Interpolation.hpp" -#include "poet/LookupKey.hpp" -#include "poet/Rounding.hpp" -#include +#include "../../DataStructures/DataStructures.hpp" +#include "DHT_Wrapper.hpp" +#include "HashFunctions.hpp" +#include "LookupKey.hpp" +#include "Rounding.hpp" + +#include #include + #include #include #include @@ -20,8 +22,9 @@ #include #include #include + extern "C" { -#include "poet/DHT.h" +#include "DHT.h" } namespace poet { diff --git a/include/poet/LookupKey.hpp b/src/Chemistry/SurrogateModels/LookupKey.hpp similarity index 97% rename from include/poet/LookupKey.hpp rename to src/Chemistry/SurrogateModels/LookupKey.hpp index fbb064c53..f64b92721 100644 --- a/include/poet/LookupKey.hpp +++ b/src/Chemistry/SurrogateModels/LookupKey.hpp @@ -3,7 +3,8 @@ #ifndef LOOKUPKEY_H_ #define LOOKUPKEY_H_ -#include "poet/HashFunctions.hpp" +#include "HashFunctions.hpp" + #include #include #include diff --git a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp b/src/Chemistry/SurrogateModels/ProximityHashTable.cpp similarity index 97% rename from src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp rename to src/Chemistry/SurrogateModels/ProximityHashTable.cpp index 3ea44430d..f1589fa21 100644 --- a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp +++ b/src/Chemistry/SurrogateModels/ProximityHashTable.cpp @@ -1,10 +1,11 @@ // Time-stamp: "Last modified 2023-08-15 14:50:59 mluebke" +#include "Interpolation.hpp" + +#include "DHT_Wrapper.hpp" +#include "HashFunctions.hpp" +#include "LookupKey.hpp" +#include "Rounding.hpp" -#include "poet/DHT_Wrapper.hpp" -#include "poet/HashFunctions.hpp" -#include "poet/Interpolation.hpp" -#include "poet/LookupKey.hpp" -#include "poet/Rounding.hpp" #include #include #include @@ -12,8 +13,9 @@ #include #include #include + extern "C" { -#include "poet/DHT.h" +#include "DHT.h" } namespace poet { diff --git a/include/poet/Rounding.hpp b/src/Chemistry/SurrogateModels/Rounding.hpp similarity index 100% rename from include/poet/Rounding.hpp rename to src/Chemistry/SurrogateModels/Rounding.hpp diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp similarity index 98% rename from src/ChemistryModule/WorkerFunctions.cpp rename to src/Chemistry/WorkerFunctions.cpp index 624a78b4f..faa99834b 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -1,8 +1,8 @@ // Time-stamp: "Last modified 2023-08-16 14:50:04 mluebke" -#include "poet/ChemistryModule.hpp" -#include "poet/DHT_Wrapper.hpp" -#include "poet/Interpolation.hpp" +#include "ChemistryModule.hpp" +#include "SurrogateModels/DHT_Wrapper.hpp" +#include "SurrogateModels/Interpolation.hpp" #include #include diff --git a/include/poet/enums.hpp b/src/Chemistry/enums.hpp similarity index 100% rename from include/poet/enums.hpp rename to src/Chemistry/enums.hpp diff --git a/include/poet/DataStructures.hpp b/src/DataStructures/DataStructures.hpp similarity index 99% rename from include/poet/DataStructures.hpp rename to src/DataStructures/DataStructures.hpp index 834ceca50..30f69e5dd 100644 --- a/include/poet/DataStructures.hpp +++ b/src/DataStructures/DataStructures.hpp @@ -1,7 +1,7 @@ #ifndef DATASTRUCTURES_H_ #define DATASTRUCTURES_H_ -#include "enums.hpp" +#include "../Chemistry/enums.hpp" #include @@ -13,6 +13,7 @@ #include #include #include + namespace poet { struct WorkPackage { diff --git a/src/DataStructures/Field.cpp b/src/DataStructures/Field.cpp index de364a599..71d51dea4 100644 --- a/src/DataStructures/Field.cpp +++ b/src/DataStructures/Field.cpp @@ -1,4 +1,5 @@ -#include "poet/DataStructures.hpp" +#include "DataStructures.hpp" + #include #include #include diff --git a/include/poet/Macros.hpp b/src/Macros.hpp similarity index 100% rename from include/poet/Macros.hpp rename to src/Macros.hpp diff --git a/include/poet/RInsidePOET.hpp b/src/RInsidePOET.hpp similarity index 100% rename from include/poet/RInsidePOET.hpp rename to src/RInsidePOET.hpp diff --git a/src/DiffusionModule.cpp b/src/Transport/DiffusionModule.cpp similarity index 96% rename from src/DiffusionModule.cpp rename to src/Transport/DiffusionModule.cpp index ba7a60e62..b02f5ca0b 100644 --- a/src/DiffusionModule.cpp +++ b/src/Transport/DiffusionModule.cpp @@ -18,15 +18,15 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include "DiffusionModule.hpp" +#include "../Macros.hpp" + +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include #include #include @@ -177,11 +177,11 @@ void DiffusionModule::simulate(double dt) { t_field = field_2d; - sim_a_transport = MPI_Wtime(); transport_t += sim_a_transport - sim_b_transport; - std::cout << " done in "<< sim_a_transport - sim_b_transport << "sec" << std::endl; + std::cout << " done in " << sim_a_transport - sim_b_transport << "sec" + << std::endl; } void DiffusionModule::end() { diff --git a/include/poet/DiffusionModule.hpp b/src/Transport/DiffusionModule.hpp similarity index 96% rename from include/poet/DiffusionModule.hpp rename to src/Transport/DiffusionModule.hpp index 8ea379027..a31bc0738 100644 --- a/include/poet/DiffusionModule.hpp +++ b/src/Transport/DiffusionModule.hpp @@ -21,16 +21,17 @@ #ifndef DIFFUSION_MODULE_H #define DIFFUSION_MODULE_H -#include "DataStructures.hpp" -#include "SimParams.hpp" -#include "poet/SimParams.hpp" +#include "../Base/Grid.hpp" +#include "../Base/SimParams.hpp" +#include "../DataStructures/DataStructures.hpp" + +#include +#include + #include #include #include -#include #include -#include -#include #include namespace poet { diff --git a/app/poet.cpp b/src/poet.cpp similarity index 98% rename from app/poet.cpp rename to src/poet.cpp index 4095cf858..aa6c22cb6 100644 --- a/app/poet.cpp +++ b/src/poet.cpp @@ -18,15 +18,18 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include "Base/Grid.hpp" +#include "Base/SimParams.hpp" +#include "Chemistry/ChemistryModule.hpp" +#include "Macros.hpp" +#include "RInsidePOET.hpp" +#include "Transport/DiffusionModule.hpp" + +#include + #include #include #include -#include -#include -#include -#include -#include -#include #include #include @@ -34,7 +37,6 @@ #include #include -#include using namespace std; using namespace poet; diff --git a/src/poet.h.in b/src/poet.h.in new file mode 100644 index 000000000..6a85179d4 --- /dev/null +++ b/src/poet.h.in @@ -0,0 +1,8 @@ +#ifndef POET_H +#define POET_H + +static const char *poet_version = "@POET_VERSION@"; + +static const char *CHEMISTRY_MODULE_NAME = "state_C"; + +#endif // POET_H From fbff818e486a5c21d0517221f35c063981e95dc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 11:32:45 +0100 Subject: [PATCH 44/52] Remove unused 'app' directory --- CMakeLists.txt | 1 - app/CMakeLists.txt | 6 ------ 2 files changed, 7 deletions(-) delete mode 100644 app/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fd34afeb..68cc4db1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,6 @@ find_package(RRuntime REQUIRED) add_subdirectory(src) add_subdirectory(R_lib) -add_subdirectory(app) add_subdirectory(bench) # as tug will also pull in doctest as a dependency diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt deleted file mode 100644 index bd4fc4bf1..000000000 --- a/app/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ - -# add_executable(poet poet.cpp) -# target_include_directories(poet PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") -# target_link_libraries(poet PUBLIC poet_lib MPI::MPI_CXX) - -# install(TARGETS poet DESTINATION bin) From 7abf98712354b044837e694f98e27e3de3f45925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 11:59:12 +0100 Subject: [PATCH 45/52] Add Macros.hpp and RInsidePOET.hpp*** ***Add RInsidePOET class and RHookFunction template*** ***Update include paths in SimParams.hpp*** ***Update CMakeLists.txt to link poetlib*** ***Update DHT_Wrapper.hpp include path*** ***Update DiffusionModule.cpp include path*** ***Update poet.cpp include paths*** ***Add poet.hpp.in --- src/{ => Base}/Macros.hpp | 0 src/{ => Base}/RInsidePOET.hpp | 0 src/Base/SimParams.hpp | 4 +- src/CMakeLists.txt | 39 ++++++++++++------- src/Chemistry/SurrogateModels/DHT_Wrapper.hpp | 2 +- src/Transport/DiffusionModule.cpp | 2 +- src/poet.cpp | 6 +-- src/{poet.h.in => poet.hpp.in} | 0 8 files changed, 33 insertions(+), 20 deletions(-) rename src/{ => Base}/Macros.hpp (100%) rename src/{ => Base}/RInsidePOET.hpp (100%) rename src/{poet.h.in => poet.hpp.in} (100%) diff --git a/src/Macros.hpp b/src/Base/Macros.hpp similarity index 100% rename from src/Macros.hpp rename to src/Base/Macros.hpp diff --git a/src/RInsidePOET.hpp b/src/Base/RInsidePOET.hpp similarity index 100% rename from src/RInsidePOET.hpp rename to src/Base/RInsidePOET.hpp diff --git a/src/Base/SimParams.hpp b/src/Base/SimParams.hpp index 212ec9948..523b5b105 100644 --- a/src/Base/SimParams.hpp +++ b/src/Base/SimParams.hpp @@ -22,8 +22,8 @@ #define PARSER_H #include "../DataStructures/DataStructures.hpp" -#include "../Macros.hpp" -#include "../RInsidePOET.hpp" +#include "Macros.hpp" +#include "RInsidePOET.hpp" #include "argh.hpp" // Argument handler https://github.com/adishavit/argh diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index addb7c11b..51261c39a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,20 +1,27 @@ -file(GLOB_RECURSE poet_lib_SRC - CONFIGURE_DEPENDS - "*.cpp" "*.c") +add_library(poetlib + Base/Grid.cpp + Base/SimParams.cpp + Chemistry/ChemistryModule.cpp + Chemistry/MasterFunctions.cpp + Chemistry/WorkerFunctions.cpp + Chemistry/SurrogateModels/DHT_Wrapper.cpp + Chemistry/SurrogateModels/DHT.c + Chemistry/SurrogateModels/HashFunctions.cpp + Chemistry/SurrogateModels/InterpolationModule.cpp + Chemistry/SurrogateModels/ProximityHashTable.cpp + DataStructures/Field.cpp + Transport/DiffusionModule.cpp +) -configure_file(poet.h.in poet.h) - -add_executable(poet ${poet_lib_SRC}) -target_link_libraries(poet - MPI::MPI_CXX +target_link_libraries(poetlib PUBLIC + MPI::MPI_C ${MATH_LIBRARY} RRuntime PhreeqcRM tug ) -target_include_directories(poet PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") -target_compile_definitions(poet PRIVATE STRICT_R_HEADERS OMPI_SKIP_MPICXX) +target_compile_definitions(poetlib PUBLIC STRICT_R_HEADERS) mark_as_advanced(PHREEQCRM_BUILD_MPI PHREEQCRM_DISABLE_OPENMP) set(PHREEQCRM_DISABLE_OPENMP ON CACHE BOOL "" FORCE) @@ -22,13 +29,19 @@ set(PHREEQCRM_DISABLE_OPENMP ON CACHE BOOL "" FORCE) option(POET_DHT_DEBUG "Build with DHT debug info" OFF) if(POET_DHT_DEBUG) - target_compile_definitions(poet PRIVATE DHT_STATISTICS) + target_compile_definitions(poetlib PRIVATE DHT_STATISTICS) endif() option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) if (POET_PHT_ADDITIONAL_INFO) - target_compile_definitions(poet PRIVATE POET_PHT_ADD) + target_compile_definitions(poetlib PRIVATE POET_PHT_ADD) endif() -install(TARGETS poet DESTINATION bin) \ No newline at end of file +configure_file(poet.hpp.in poet.hpp) + +add_executable(poet poet.cpp) +target_link_libraries(poet PRIVATE poetlib MPI::MPI_C RRuntime) +target_include_directories(poet PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") + +install(TARGETS poet DESTINATION bin) diff --git a/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp b/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp index 083fe9d25..791be2d1b 100644 --- a/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp +++ b/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp @@ -23,9 +23,9 @@ #ifndef DHT_WRAPPER_H #define DHT_WRAPPER_H +#include "../../Base/RInsidePOET.hpp" #include "../../Base/SimParams.hpp" #include "../../DataStructures/DataStructures.hpp" -#include "../../RInsidePOET.hpp" #include "../enums.hpp" #include "HashFunctions.hpp" #include "LookupKey.hpp" diff --git a/src/Transport/DiffusionModule.cpp b/src/Transport/DiffusionModule.cpp index b02f5ca0b..abbecce95 100644 --- a/src/Transport/DiffusionModule.cpp +++ b/src/Transport/DiffusionModule.cpp @@ -19,7 +19,7 @@ */ #include "DiffusionModule.hpp" -#include "../Macros.hpp" +#include "../Base/Macros.hpp" #include #include diff --git a/src/poet.cpp b/src/poet.cpp index aa6c22cb6..c4996dc97 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -19,13 +19,13 @@ */ #include "Base/Grid.hpp" +#include "Base/Macros.hpp" +#include "Base/RInsidePOET.hpp" #include "Base/SimParams.hpp" #include "Chemistry/ChemistryModule.hpp" -#include "Macros.hpp" -#include "RInsidePOET.hpp" #include "Transport/DiffusionModule.hpp" -#include +#include #include #include diff --git a/src/poet.h.in b/src/poet.hpp.in similarity index 100% rename from src/poet.h.in rename to src/poet.hpp.in From a2ebc3f1d7753e91f74f767b2b05035f5b15a35c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 11:59:18 +0100 Subject: [PATCH 46/52] Update CMakeLists.txt and include headers in test files --- test/CMakeLists.txt | 3 ++- test/testField.cpp | 5 +++-- test/testNamedVector.cpp | 5 +++-- test/testRounding.cpp | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 32078d388..884750735 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,7 +3,8 @@ file(GLOB test_SRC "*.cpp" "*.c") add_executable(testPOET ${test_SRC}) -target_link_libraries(testPOET doctest poet_lib) +target_link_libraries(testPOET doctest poetlib) +target_include_directories(testPOET PRIVATE "${PROJECT_SOURCE_DIR}/src") get_filename_component(TEST_RInsideSourceFile "RInsidePOET_funcs.R" REALPATH) configure_file(testDataStructures.hpp.in testDataStructures.hpp) diff --git a/test/testField.cpp b/test/testField.cpp index 8ee1bf5d6..acfe8d1a8 100644 --- a/test/testField.cpp +++ b/test/testField.cpp @@ -5,11 +5,12 @@ #include #include #include -#include -#include #include #include +#include +#include + #include "testDataStructures.hpp" using namespace poet; diff --git a/test/testNamedVector.cpp b/test/testNamedVector.cpp index 713f2729b..d5ee908f5 100644 --- a/test/testNamedVector.cpp +++ b/test/testNamedVector.cpp @@ -1,11 +1,12 @@ #include #include #include -#include -#include #include #include +#include +#include + #include "testDataStructures.hpp" TEST_CASE("NamedVector") { diff --git a/test/testRounding.cpp b/test/testRounding.cpp index 06deef193..be50fac99 100644 --- a/test/testRounding.cpp +++ b/test/testRounding.cpp @@ -1,6 +1,6 @@ #include -#include "poet/Rounding.hpp" +#include TEST_CASE("Rounding") { constexpr int signif = 3; From ffb4fe6bc6361efb09bb6645a18259405bfe2d9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 12:01:19 +0100 Subject: [PATCH 47/52] Add OMPI_SKIP_MPICXX to target_compile_definitions in CMakeLists.txt --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 51261c39a..984e2c2d0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,7 +21,7 @@ target_link_libraries(poetlib PUBLIC tug ) -target_compile_definitions(poetlib PUBLIC STRICT_R_HEADERS) +target_compile_definitions(poetlib PUBLIC STRICT_R_HEADERS OMPI_SKIP_MPICXX) mark_as_advanced(PHREEQCRM_BUILD_MPI PHREEQCRM_DISABLE_OPENMP) set(PHREEQCRM_DISABLE_OPENMP ON CACHE BOOL "" FORCE) From 0fa0aa699bf9022488c1dd45e9f8ece67c150e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 12:03:02 +0100 Subject: [PATCH 48/52] Remove unused CMakeLists.txt file for SurrogateModels --- src/Chemistry/SurrogateModels/CMakeLists.txt | 21 -------------------- 1 file changed, 21 deletions(-) delete mode 100644 src/Chemistry/SurrogateModels/CMakeLists.txt diff --git a/src/Chemistry/SurrogateModels/CMakeLists.txt b/src/Chemistry/SurrogateModels/CMakeLists.txt deleted file mode 100644 index 334ec98b5..000000000 --- a/src/Chemistry/SurrogateModels/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -file(GLOB surrogate_models_SRC - CONFIGURE_DEPENDS - "*.cpp" "*.c") - -find_library(MATH_LIBRARY m) - -add_library(surrogate_models ${surrogate_models_SRC}) -target_include_directories(surrogate_models PUBLIC ${PROJECT_SOURCE_DIR}/include) -target_link_libraries(surrogate_models PUBLIC - MPI::MPI_CXX ${MATH_LIBRARY}) -target_compile_definitions(surrogate_models PUBLIC OMPI_SKIP_MPICXX) - -option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) - -if (POET_PHT_ADDITIONAL_INFO) - target_compile_definitions(surrogate_models PRIVATE POET_PHT_ADD) -endif() - -if(POET_DHT_DEBUG) - target_compile_definitions(surrogate_models PRIVATE DHT_STATISTICS) -endif() From ab83d8c3502d7cf0354529d1a39cff92057f60d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 12:34:02 +0100 Subject: [PATCH 49/52] Add R library file as string to executable --- src/CMakeLists.txt | 4 +++- src/poet.cpp | 3 +-- src/poet.hpp.in | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 984e2c2d0..1494c0849 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,7 +38,9 @@ if (POET_PHT_ADDITIONAL_INFO) target_compile_definitions(poetlib PRIVATE POET_PHT_ADD) endif() -configure_file(poet.hpp.in poet.hpp) +file(READ "${PROJECT_SOURCE_DIR}/R_lib/kin_r_library.R" R_KIN_LIB ) + +configure_file(poet.hpp.in poet.hpp @ONLY) add_executable(poet poet.cpp) target_link_libraries(poet PRIVATE poetlib MPI::MPI_C RRuntime) diff --git a/src/poet.cpp b/src/poet.cpp index c4996dc97..10c706a0d 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -302,8 +302,7 @@ int main(int argc, char *argv[]) { /*Loading Dependencies*/ // TODO: kann raus - std::string r_load_dependencies = "source('../R_lib/kin_r_library.R');"; - R.parseEvalQ(r_load_dependencies); + R.parseEvalQ(kin_r_library); SimParams params(world_rank, world_size); int pret = params.parseFromCmdl(argv, R); diff --git a/src/poet.hpp.in b/src/poet.hpp.in index 6a85179d4..666fbaa08 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -1,8 +1,10 @@ #ifndef POET_H #define POET_H +#include static const char *poet_version = "@POET_VERSION@"; -static const char *CHEMISTRY_MODULE_NAME = "state_C"; +// using the Raw string literal to avoid escaping the quotes +static inline std::string kin_r_library = R"(@R_KIN_LIB@)"; #endif // POET_H From 263f23534d964e21107a4618fdba95a407be38dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 12:20:27 +0000 Subject: [PATCH 50/52] Remove R_lib subdirectory from CMakeLists.txt --- CMakeLists.txt | 1 - R_lib/CMakeLists.txt | 1 - 2 files changed, 2 deletions(-) delete mode 100644 R_lib/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 68cc4db1f..8faf514e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,6 @@ find_package(MPI REQUIRED) find_package(RRuntime REQUIRED) add_subdirectory(src) -add_subdirectory(R_lib) add_subdirectory(bench) # as tug will also pull in doctest as a dependency diff --git a/R_lib/CMakeLists.txt b/R_lib/CMakeLists.txt deleted file mode 100644 index 24bab1690..000000000 --- a/R_lib/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -install(FILES kin_r_library.R DESTINATION R_lib) From a3683f0459478e4693458453f9f26411f1d85794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 12:24:34 +0000 Subject: [PATCH 51/52] Add Dockerfile and devcontainer.json for VS Code development environment --- .devcontainer/Dockerfile | 12 ++++++++++++ .devcontainer/devcontainer.json | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 000000000..8016eb576 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,12 @@ +FROM mcr.microsoft.com/vscode/devcontainers/base:debian + +RUN sudo apt-get update && export DEBIAN_FRONTEND=noninteractive \ + && sudo apt-get install -y \ + cmake-curses-gui \ + clangd \ + git \ + libeigen3-dev \ + libopenmpi-dev \ + r-cran-rcpp \ + r-cran-rinside + diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..d32365cee --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,28 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile +{ + "build": { + "dockerfile": "Dockerfile" + }, + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // Uncomment the next line to run commands after the container is created. + // "postCreateCommand": "cat /etc/os-release", + // Configure tool-specific properties. + "customizations": { + "vscode": { + "extensions": [ + "twxs.cmake", + "llvm-vs-code-extensions.vscode-clangd" + ] + } + }, + // in case you want to push/pull from remote repositories using ssh + "mounts": [ + "source=${localEnv:HOME}/.ssh,target=/home/vscode/.ssh,type=bind,consistency=cached" + ] + // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "devcontainer" +} \ No newline at end of file From 7181465835a7ac44ee334f7e9277723dff94a65a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Wed, 6 Mar 2024 12:25:30 +0000 Subject: [PATCH 52/52] Update doxygen configuration --- docs/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 8b4303dfa..4781c2b87 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -12,7 +12,7 @@ if(DOXYGEN_FOUND) set(DOXYGEN_PROJECT_NUMBER ${POET_VERSION}) doxygen_add_docs(doxygen - ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/README.md ${PROJECT_SOURCE_DIR}/docs/Input_Scripts.md ${PROJECT_SOURCE_DIR}/docs/Output.md