From 0c2597d97f537b078b49d8436d8ba0cfc0f7fb4b Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Tue, 25 Apr 2023 17:38:58 +0200 Subject: [PATCH] feat: introduce LookupKey and rounding schemes feat: implement data clustering using PHT feat: implement interpolation refactor: use named vector for DHT species definition and significant digits data: remove unusable input scripts data: move Phreeqc database to benchmark dir refactor: rename dolomite benchmark directory refactor: remove DHT prop type from input script --- CMake/POET_Scripts.cmake | 2 +- CMakeLists.txt | 1 - README.md | 2 +- R_lib/kin_r_library.R | 53 +-- app/poet.cpp | 90 ++-- bench/CMakeLists.txt | 2 +- bench/barite/CMakeLists.txt | 1 + bench/barite/barite.R | 16 +- bench/barite/barite_interp_eval.R | 151 +++++++ .../{dolo_diffu_inner => dolo}/CMakeLists.txt | 2 + bench/{dolo_diffu_inner => dolo}/Eval.R | 0 {data => bench/dolo}/dol.pqi | 0 .../dolo_diffu_inner.R | 33 +- .../dolo_diffu_inner_large.R | 22 +- .../{dolo_diffu_inner => dolo}/dolo_inner.pqi | 0 bench/dolo/dolo_interp_long.R | 171 ++++++++ {data => bench/dolo}/phreeqc_kin.dat | 6 +- data/CMakeLists.txt | 8 - data/SimDol1D_diffu.R | 180 -------- data/SimDol2D_diffu.R | 213 --------- data/SimDol2D_diffu.yaml | 13 - ext/phreeqcrm | 2 +- include/poet/ChemistryModule.hpp | 126 +++--- include/poet/DHT.h | 51 ++- include/poet/DHT_Types.hpp | 2 +- include/poet/DHT_Wrapper.hpp | 121 +++--- include/poet/DataStructures.hpp | 35 ++ include/poet/Interpolation.hpp | 266 ++++++++++++ include/poet/LookupKey.hpp | 50 +++ include/poet/RInsidePOET.hpp | 40 ++ include/poet/Rounding.hpp | 92 ++++ include/poet/SimParams.hpp | 72 ++-- src/CMakeLists.txt | 9 +- src/ChemistryModule/ChemistryModule.cpp | 406 +++++++++++++----- src/ChemistryModule/DHT_Wrapper.cpp | 226 ---------- src/ChemistryModule/MasterFunctions.cpp | 153 ++++++- .../SurrogateModels/CMakeLists.txt | 21 + .../{ => SurrogateModels}/DHT.c | 233 ++++++++-- .../SurrogateModels/DHT_Wrapper.cpp | 300 +++++++++++++ .../{ => SurrogateModels}/HashFunctions.cpp | 2 +- .../SurrogateModels/InterpolationModule.cpp | 197 +++++++++ .../SurrogateModels/ProximityHashTable.cpp | 275 ++++++++++++ src/ChemistryModule/WorkerFunctions.cpp | 293 ++++++++++--- src/SimParams.cpp | 115 ++--- test/testRounding.cpp | 79 ++++ util/data_evaluation/RFun_Eval.R | 95 +++- util/data_evaluation/interpret_keys.cpp | 13 + 47 files changed, 3003 insertions(+), 1237 deletions(-) create mode 100644 bench/barite/barite_interp_eval.R rename bench/{dolo_diffu_inner => dolo}/CMakeLists.txt (74%) rename bench/{dolo_diffu_inner => dolo}/Eval.R (100%) rename {data => bench/dolo}/dol.pqi (100%) rename bench/{dolo_diffu_inner => dolo}/dolo_diffu_inner.R (84%) rename bench/{dolo_diffu_inner => dolo}/dolo_diffu_inner_large.R (88%) rename bench/{dolo_diffu_inner => dolo}/dolo_inner.pqi (100%) create mode 100644 bench/dolo/dolo_interp_long.R rename {data => bench/dolo}/phreeqc_kin.dat (99%) delete mode 100644 data/CMakeLists.txt delete mode 100644 data/SimDol1D_diffu.R delete mode 100644 data/SimDol2D_diffu.R delete mode 100644 data/SimDol2D_diffu.yaml create mode 100644 include/poet/DataStructures.hpp create mode 100644 include/poet/Interpolation.hpp create mode 100644 include/poet/LookupKey.hpp create mode 100644 include/poet/Rounding.hpp delete mode 100644 src/ChemistryModule/DHT_Wrapper.cpp create mode 100644 src/ChemistryModule/SurrogateModels/CMakeLists.txt rename src/ChemistryModule/{ => SurrogateModels}/DHT.c (68%) create mode 100644 src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp rename src/ChemistryModule/{ => SurrogateModels}/HashFunctions.cpp (97%) create mode 100644 src/ChemistryModule/SurrogateModels/InterpolationModule.cpp create mode 100644 src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp create mode 100644 test/testRounding.cpp diff --git a/CMake/POET_Scripts.cmake b/CMake/POET_Scripts.cmake index 69760cc44..89fc1e84f 100644 --- a/CMake/POET_Scripts.cmake +++ b/CMake/POET_Scripts.cmake @@ -9,7 +9,7 @@ macro(get_POET_version) OUTPUT_VARIABLE POET_GIT_BRANCH OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process( - COMMAND ${GIT_EXECUTABLE} describe --always + COMMAND ${GIT_EXECUTABLE} describe --tags --always WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE POET_GIT_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb87f3a7a..9fd34afeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,6 @@ find_package(RRuntime REQUIRED) add_subdirectory(src) add_subdirectory(R_lib) -add_subdirectory(data) add_subdirectory(app) add_subdirectory(bench) diff --git a/README.md b/README.md index 4e29aea28..7ef495139 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # POET diff --git a/R_lib/kin_r_library.R b/R_lib/kin_r_library.R index e350ac35e..c759bf9e1 100644 --- a/R_lib/kin_r_library.R +++ b/R_lib/kin_r_library.R @@ -64,28 +64,33 @@ master_init <- function(setup) { ## calculated time step if store_result is TRUE and increments the ## iteration counter master_iteration_end <- function(setup) { - iter <- setup$iter - ## MDL Write on disk state_T and state_C after every iteration - ## comprised in setup$out_save - if (setup$store_result) { - if (iter %in% setup$out_save) { - nameout <- paste0(fileout, "/iter_", sprintf("%03d", iter), ".rds") - info <- list( - tr_req_dt = as.integer(setup$req_dt) - # tr_allow_dt = setup$allowed_dt, - # tr_inniter = as.integer(setup$inniter) - ) - saveRDS(list( - T = setup$state_T, C = setup$state_C, - simtime = as.integer(setup$simtime), - tr_info = info - ), file = nameout) - msgm("results stored in <", nameout, ">") + iter <- setup$iter + ## max digits for iterations + dgts <- as.integer(ceiling(log10(setup$iterations + 1))) + ## string format to use in sprintf + fmt <- paste0("%0", dgts, "d") + + ## Write on disk state_T and state_C after every iteration + ## comprised in setup$out_save + if (setup$store_result) { + if (iter %in% setup$out_save) { + nameout <- paste0(fileout, "/iter_", sprintf(fmt=fmt, iter), ".rds") + info <- list( + tr_req_dt = as.integer(setup$req_dt) + ## tr_allow_dt = setup$allowed_dt, + ## tr_inniter = as.integer(setup$inniter) + ) + saveRDS(list( + T = setup$state_T, C = setup$state_C, + simtime = as.integer(setup$simtime), + tr_info = info + ), file = nameout) + msgm("results stored in <", nameout, ">") + } } - } - msgm("done iteration", iter, "/", setup$maxiter) - setup$iter <- setup$iter + 1 - return(setup) + msgm("done iteration", iter, "/", setup$maxiter) + setup$iter <- setup$iter + 1 + return(setup) } ## function for the workers to compute chemistry through PHREEQC @@ -264,9 +269,9 @@ StoreSetup <- function(setup) { if (dht_enabled) { to_store$DHT <- list( enabled = dht_enabled, - log = dht_log, - signif = dht_final_signif, - proptype = dht_final_proptype + log = dht_log + #signif = dht_final_signif, + #proptype = dht_final_proptype ) } else { to_store$DHT <- FALSE diff --git a/app/poet.cpp b/app/poet.cpp index bc47c2c2f..69d1b470f 100644 --- a/app/poet.cpp +++ b/app/poet.cpp @@ -70,7 +70,7 @@ void writeFieldsToR(RInside &R, const Field &trans, const Field &chem) { void set_chem_parameters(poet::ChemistryModule &chem, uint32_t wp_size, const std::string &database_path) { chem.SetErrorHandlerMode(1); - chem.SetComponentH2O(true); + chem.SetComponentH2O(false); chem.SetRebalanceFraction(0.5); chem.SetRebalanceByCell(true); chem.UseSolutionDensityVolume(false); @@ -112,7 +112,6 @@ void set_chem_parameters(poet::ChemistryModule &chem, uint32_t wp_size, } inline double RunMasterLoop(SimParams ¶ms, RInside &R, - ChemistryParams &chem_params, const GridParams &g_params, uint32_t nxyz_master) { DiffusionParams d_params{R}; @@ -123,10 +122,11 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, double sim_time = .0; - ChemistryModule chem(nxyz_master, params.getNumParams().wp_size, - MPI_COMM_WORLD); - set_chem_parameters(chem, nxyz_master, chem_params.database_path); - chem.RunInitFile(chem_params.input_script); + ChemistryModule chem(nxyz_master, params.getNumParams().wp_size, maxiter, + params.getChemParams(), MPI_COMM_WORLD); + + set_chem_parameters(chem, nxyz_master, params.getChemParams().database_path); + chem.RunInitFile(params.getChemParams().input_script); poet::ChemistryModule::SingleCMap init_df = DFToHashMap(d_params.initial_t); chem.initializeField(diffusion.getField()); @@ -135,27 +135,11 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, chem.setProgressBarPrintout(true); } - if (params.getNumParams().dht_enabled) { - chem.SetDHTEnabled(true, params.getNumParams().dht_size_per_process, - chem_params.dht_species); - if (!chem_params.dht_signif.empty()) { - chem.SetDHTSignifVector(chem_params.dht_signif); - } - if (!params.getDHTPropTypeVector().empty()) { - chem.SetDHTPropTypeVector(params.getDHTPropTypeVector()); - } - if (!params.getDHTFile().empty()) { - chem.ReadDHTFile(params.getDHTFile()); - } - if (params.getNumParams().dht_snaps > 0) { - chem.SetDHTSnaps(params.getNumParams().dht_snaps, params.getOutDir()); - } - } - /* SIMULATION LOOP */ - double dStartTime = MPI_Wtime(); + double dSimTime{0}; for (uint32_t iter = 1; iter < maxiter + 1; iter++) { + double start_t = MPI_Wtime(); uint32_t tick = 0; // cout << "CPP: Evaluating next time step" << endl; // R.parseEvalQ("mysetup <- master_iteration_setup(mysetup)"); @@ -203,7 +187,8 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, << endl; // MPI_Barrier(MPI_COMM_WORLD); - + double end_t = MPI_Wtime(); + dSimTime += end_t - start_t; } // END SIMULATION LOOP R.parseEvalQ("profiling <- list()"); @@ -232,23 +217,36 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, // R["phreeqc_count"] = phreeqc_counts; // R.parseEvalQ("profiling$phreeqc_count <- phreeqc_count"); - if (params.getNumParams().dht_enabled) { + if (params.getChemParams().use_dht) { R["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); R.parseEvalQ("profiling$dht_hits <- dht_hits"); - R["dht_miss"] = Rcpp::wrap(chem.GetWorkerDHTMiss()); - R.parseEvalQ("profiling$dht_miss <- dht_miss"); R["dht_evictions"] = Rcpp::wrap(chem.GetWorkerDHTEvictions()); R.parseEvalQ("profiling$dht_evictions <- dht_evictions"); R["dht_get_time"] = Rcpp::wrap(chem.GetWorkerDHTGetTimings()); R.parseEvalQ("profiling$dht_get_time <- dht_get_time"); R["dht_fill_time"] = Rcpp::wrap(chem.GetWorkerDHTFillTimings()); R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time"); + if (params.getChemParams().use_interp) { + R["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); + R.parseEvalQ("profiling$interp_write <- interp_w"); + R["interp_r"] = Rcpp::wrap(chem.GetWorkerInterpolationReadTimings()); + R.parseEvalQ("profiling$interp_read <- interp_r"); + R["interp_g"] = Rcpp::wrap(chem.GetWorkerInterpolationGatherTimings()); + R.parseEvalQ("profiling$interp_gather <- interp_g"); + R["interp_fc"] = + Rcpp::wrap(chem.GetWorkerInterpolationFunctionCallTimings()); + R.parseEvalQ("profiling$interp_function_calls <- interp_fc"); + R["interp_calls"] = Rcpp::wrap(chem.GetWorkerInterpolationCalls()); + R.parseEvalQ("profiling$interp_calls <- interp_calls"); + R["interp_cached"] = Rcpp::wrap(chem.GetWorkerPHTCacheHits()); + R.parseEvalQ("profiling$interp_cached <- interp_cached"); + } } chem.MasterLoopBreak(); diffusion.end(); - return MPI_Wtime() - dStartTime; + return dSimTime; } int main(int argc, char *argv[]) { @@ -268,19 +266,23 @@ int main(int argc, char *argv[]) { if (world_rank > 0) { { - uint32_t c_size; - MPI_Bcast(&c_size, 1, MPI_UINT32_T, 0, MPI_COMM_WORLD); - char *buffer = new char[c_size + 1]; - MPI_Bcast(buffer, c_size, MPI_CHAR, 0, MPI_COMM_WORLD); - buffer[c_size] = '\0'; + SimParams params(world_rank, world_size); + int pret = params.parseFromCmdl(argv, RInsidePOET::getInstance()); + + if (pret == poet::PARSER_ERROR) { + MPI_Finalize(); + return EXIT_FAILURE; + } else if (pret == poet::PARSER_HELP) { + MPI_Finalize(); + return EXIT_SUCCESS; + } // ChemistryModule worker(nxyz, nxyz, MPI_COMM_WORLD); - ChemistryModule worker = - poet::ChemistryModule::createWorker(MPI_COMM_WORLD); - set_chem_parameters(worker, worker.GetWPSize(), std::string(buffer)); - - delete[] buffer; + ChemistryModule worker = poet::ChemistryModule::createWorker( + MPI_COMM_WORLD, params.getChemParams()); + set_chem_parameters(worker, worker.GetWPSize(), + params.getChemParams().database_path); worker.WorkerLoop(); } @@ -334,17 +336,9 @@ int main(int argc, char *argv[]) { // MPI_Barrier(MPI_COMM_WORLD); - poet::ChemistryParams chem_params(R); - - /* THIS IS EXECUTED BY THE MASTER */ - std::string db_path = chem_params.database_path; - uint32_t c_size = db_path.size(); - MPI_Bcast(&c_size, 1, MPI_UINT32_T, 0, MPI_COMM_WORLD); - - MPI_Bcast(db_path.data(), c_size, MPI_CHAR, 0, MPI_COMM_WORLD); uint32_t nxyz_master = (world_size == 1 ? g_params.total_n : 1); - dSimTime = RunMasterLoop(params, R, chem_params, g_params, nxyz_master); + dSimTime = RunMasterLoop(params, R, g_params, nxyz_master); cout << "CPP: finished simulation loop" << endl; diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index a891897c0..6b5e9b9f3 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -1,3 +1,3 @@ -add_subdirectory(dolo_diffu_inner) +add_subdirectory(dolo) add_subdirectory(surfex) add_subdirectory(barite) diff --git a/bench/barite/CMakeLists.txt b/bench/barite/CMakeLists.txt index 29982a1e0..6c8b63aee 100644 --- a/bench/barite/CMakeLists.txt +++ b/bench/barite/CMakeLists.txt @@ -1,5 +1,6 @@ install(FILES barite.R + barite_interp_eval.R barite.pqi db_barite.dat DESTINATION diff --git a/bench/barite/barite.R b/bench/barite/barite.R index dad3b79ea..d6941d3ed 100644 --- a/bench/barite/barite.R +++ b/bench/barite/barite.R @@ -115,16 +115,20 @@ diffusion <- list( ## # Needed when using DHT - -dht_species <- names(init_diffu) -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5) +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "Ba" = 5, + "Cl" = 5, + "S(6)" = 5, + "Sr" = 5 +) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species, - dht_signif = dht_signif + dht_species = dht_species ) ################################################################# diff --git a/bench/barite/barite_interp_eval.R b/bench/barite/barite_interp_eval.R new file mode 100644 index 000000000..fcbe4884b --- /dev/null +++ b/bench/barite/barite_interp_eval.R @@ -0,0 +1,151 @@ +## Time-stamp: "Last modified 2023-07-21 15:04:49 mluebke" + +database <- normalizePath("../share/poet/bench/barite/db_barite.dat") +input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 400 +m <- 200 + +types <- c("scratch", "phreeqc", "rds") + +init_cell <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.217E-09, + "Ba" = 1.E-10, + "Cl" = 2.E-10, + "S" = 6.205E-4, + "Sr" = 6.205E-4, + "Barite" = 0.001, + "Celestite" = 1 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(n / 10, m / 10), + type = types[1], + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions + +init_diffu <- list( + # "H" = 110.0124, + "H" = 0.00000028904, + # "O" = 55.5087, + "O" = 0.000000165205, + # "Charge" = -1.217E-09, + "Charge" = -3.337E-08, + "Ba" = 1.E-10, + "Cl" = 1.E-10, + "S(6)" = 6.205E-4, + "Sr" = 6.205E-4 +) + +injection_diff <- list( + list( + # "H" = 111.0124, + "H" = 0.0000002890408, + # "O" = 55.50622, + "O" = 0.00002014464, + # "Charge" = -3.337E-08, + "Charge" = -3.337000004885E-08, + "Ba" = 0.1, + "Cl" = 0.2, + "S(6)" = 0, + "Sr" = 0 + ) +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-06, + "O" = 1E-06, + "Charge" = 1E-06, + "Ba" = 1E-06, + "Cl" = 1E-06, + "S(6)" = 1E-06, + "Sr" = 1E-06 +) + +vecinj_inner <- list( + l1 = c(1, floor(n / 2), floor(m / 2)) + ## l2 = c(2,80,80), + ## l3 = c(2,60,80) +) + +boundary <- list( + # "N" = rep(1, n), + "N" = rep(0, n), + "E" = rep(0, n), + "S" = rep(0, n), + "W" = rep(0, n) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, injection_diff) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_inner = vecinj_inner, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + +## # Needed when using DHT +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "Ba" = 5, + "Cl" = 5, + "S(6)" = 5, + "Sr" = 5 +) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# + + +iterations <- 200 +dt <- 250 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = seq(1, iterations) +) diff --git a/bench/dolo_diffu_inner/CMakeLists.txt b/bench/dolo/CMakeLists.txt similarity index 74% rename from bench/dolo_diffu_inner/CMakeLists.txt rename to bench/dolo/CMakeLists.txt index cde190af0..d2c0acd1e 100644 --- a/bench/dolo_diffu_inner/CMakeLists.txt +++ b/bench/dolo/CMakeLists.txt @@ -2,6 +2,8 @@ install(FILES dolo_diffu_inner.R dolo_diffu_inner_large.R dolo_inner.pqi + dolo_interp_long.R + phreeqc_kin.dat DESTINATION share/poet/bench/dolo ) diff --git a/bench/dolo_diffu_inner/Eval.R b/bench/dolo/Eval.R similarity index 100% rename from bench/dolo_diffu_inner/Eval.R rename to bench/dolo/Eval.R diff --git a/data/dol.pqi b/bench/dolo/dol.pqi similarity index 100% rename from data/dol.pqi rename to bench/dolo/dol.pqi diff --git a/bench/dolo_diffu_inner/dolo_diffu_inner.R b/bench/dolo/dolo_diffu_inner.R similarity index 84% rename from bench/dolo_diffu_inner/dolo_diffu_inner.R rename to bench/dolo/dolo_diffu_inner.R index f243889d4..f981ed5c4 100644 --- a/bench/dolo_diffu_inner/dolo_diffu_inner.R +++ b/bench/dolo/dolo_diffu_inner.R @@ -1,6 +1,6 @@ -## Time-stamp: "Last modified 2023-04-24 15:12:02 mluebke" +## Time-stamp: "Last modified 2023-07-28 16:57:40 mluebke" -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") ################################################################# @@ -44,8 +44,8 @@ grid <- list( ## initial conditions init_diffu <- list( - "H" = 0.000211313883539788, - "O" = 0.00398302904424952, + "H" = 110.683, + "O" = 55.3413, "Charge" = -5.0822e-19, "C(4)" = 1.2279E-4, "Ca" = 1.2279E-4, @@ -67,8 +67,8 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( list( - "H" = 0.0001540445, - "O" = 0.002148006, + "H" = 110.683, + "O" = 55.3413, "Charge" = 1.90431e-16, "C(4)" = 0, "Ca" = 0, @@ -76,8 +76,8 @@ vecinj_diffu <- list( "Mg" = 0.001 ), list( - "H" = 0.0001610193, - "O" = 0.002386934, + "H" = 110.683, + "O" = 55.3413, "Charge" = 1.90431e-16, "C(4)" = 0, "Ca" = 0.0, @@ -120,16 +120,21 @@ diffusion <- list( ## # Needed when using DHT -dht_species <- c("H", "O", "Charge", "C(4)", "Ca", "Cl", "Mg", "Calcite", - "Dolomite") -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5, 5, 5) +dht_species <- c("H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" =5 + ) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species, - dht_signif = dht_signif + dht_species = dht_species ) ################################################################# diff --git a/bench/dolo_diffu_inner/dolo_diffu_inner_large.R b/bench/dolo/dolo_diffu_inner_large.R similarity index 88% rename from bench/dolo_diffu_inner/dolo_diffu_inner_large.R rename to bench/dolo/dolo_diffu_inner_large.R index 69b528ce3..5dfb95677 100644 --- a/bench/dolo_diffu_inner/dolo_diffu_inner_large.R +++ b/bench/dolo/dolo_diffu_inner_large.R @@ -1,6 +1,6 @@ -## Time-stamp: "Last modified 2023-04-24 15:43:26 mluebke" +## Time-stamp: "Last modified 2023-07-28 16:57:40 mluebke" -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") ################################################################# @@ -118,18 +118,22 @@ diffusion <- list( ## Chemistry module (Phreeqc) ## ################################################################# - ## # Needed when using DHT -dht_species <- c("H", "O", "Charge", "C(4)", "Ca", "Cl", "Mg", "Calcite", - "Dolomite") -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5, 5, 5) +dht_species <- c("H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" =5 + ) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species, - dht_signif = dht_signif + dht_species = dht_species ) ################################################################# diff --git a/bench/dolo_diffu_inner/dolo_inner.pqi b/bench/dolo/dolo_inner.pqi similarity index 100% rename from bench/dolo_diffu_inner/dolo_inner.pqi rename to bench/dolo/dolo_inner.pqi diff --git a/bench/dolo/dolo_interp_long.R b/bench/dolo/dolo_interp_long.R new file mode 100644 index 000000000..b333891ea --- /dev/null +++ b/bench/dolo/dolo_interp_long.R @@ -0,0 +1,171 @@ +## Time-stamp: "Last modified 2023-08-01 18:34:47 mluebke" + +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") +input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 400 +m <- 200 + +types <- c("scratch", "phreeqc", "rds") + +init_cell <- list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = -5.0822e-19, + "C" = 1.2279E-4, + "Ca" = 1.2279E-4, + "Cl" = 0, + "Mg" = 0, + "O2g" = 0.499957, + "Calcite" = 2.07e-4, + "Dolomite" = 0 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(5, 2.5), + type = types[1], + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions +init_diffu <- list( + "H" = 1.110124E+02, + "O" = 5.550833E+01, + "Charge" = -1.216307659761E-09, + "C(4)" = 1.230067028174E-04, + "Ca" = 1.230067028174E-04, + "Cl" = 0, + "Mg" = 0 +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-6, + "O" = 1E-6, + "Charge" = 1E-6, + "C(4)" = 1E-6, + "Ca" = 1E-6, + "Cl" = 1E-6, + "Mg" = 1E-6 +) + +## list of boundary conditions/inner nodes +vecinj_diffu <- list( + list( + "H" = 1.110124E+02, + "O" = 5.550796E+01, + "Charge" = -3.230390327801E-08, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ), + init_diffu +) + +vecinj_inner <- list( + l1 = c(1, floor(n / 2), floor(m / 2)) + # l2 = c(2,1400,800), + # l3 = c(2,1600,800) +) + +boundary <- list( + # "N" = c(1, rep(0, n-1)), + "N" = rep(3, n), + "E" = rep(3, m), + "S" = rep(3, n), + "W" = rep(3, m) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, vecinj_diffu) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_inner = vecinj_inner, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + + +## # optional when using DHT +dht_species <- c( + "H" = 3, + "O" = 3, + "Charge" = 3, + "C(4)" = 6, + "Ca" = 6, + "Cl" = 3, + "Mg" = 5, + "Calcite" = 4, + "Dolomite" = 4 +) + +## # Optional when using Interpolation (example with less key species and custom +## # significant digits) + +#pht_species <- c( +# "C(4)" = 3, +# "Ca" = 3, +# "Mg" = 2, +# "Calcite" = 2, +# "Dolomite" = 2 +#) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species +# pht_species = pht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# + + +iterations <- 20000 +dt <- 200 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = c(1, seq(50, iterations, by = 50)) +) diff --git a/data/phreeqc_kin.dat b/bench/dolo/phreeqc_kin.dat similarity index 99% rename from data/phreeqc_kin.dat rename to bench/dolo/phreeqc_kin.dat index 8c1b671aa..01d5bf516 100644 --- a/data/phreeqc_kin.dat +++ b/bench/dolo/phreeqc_kin.dat @@ -2,7 +2,7 @@ ### SURFACE and with the RATES for Calcite and Dolomite to use with ### RedModRphree -### Time-stamp: "Last modified 2018-05-06 14:36:23 delucia" +### Time-stamp: "Last modified 2023-05-23 10:35:56 mluebke" # PHREEQC.DAT for calculating pressure dependence of reactions, with # molal volumina of aqueous species and of minerals, and @@ -1276,9 +1276,9 @@ Calcite 110 logK25=-5.81 120 mech_c=(10^logK25)*(e^(-Ea/R*deltaT)) 130 rate=mech_a+mech_c - ## 140 IF SI("Calcite")<0 then moles=parm(1)*rate*(1-SR("Calcite")) # dissolution + 140 IF (SI("Calcite")<0 AND M>0) then moles=parm(1)*rate*(1-SR("Calcite")) # dissolution ## 145 IF SI("Calcite")>0 then moles=parm(1)*M*rate*(-1+SR("Calcite")) # precipitation - 150 moles=parm(1)*rate*(1-SR("Calcite")) # precipitation + ## 150 moles=parm(1)*rate*(1-SR("Calcite")) # precipitation 200 save moles*time -end diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt deleted file mode 100644 index 664e4ac45..000000000 --- a/data/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -install( -FILES - SimDol2D_diffu.R - SimDol1D_diffu.R - phreeqc_kin.dat - dol.pqi -DESTINATION - share/poet/examples) diff --git a/data/SimDol1D_diffu.R b/data/SimDol1D_diffu.R deleted file mode 100644 index 6e74e2ccf..000000000 --- a/data/SimDol1D_diffu.R +++ /dev/null @@ -1,180 +0,0 @@ -################################################################# -## Section 1 ## -## Grid initialization ## -################################################################# - -n <- 5 -m <- 5 - -types <- c("scratch", "phreeqc", "rds") - -# initsim <- c("SELECTED_OUTPUT", "-high_precision true", -# "-reset false", -# "USER_PUNCH", -# "-head C Ca Cl Mg pH pe O2g Calcite Dolomite", -# "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")", -# "SOLUTION 1", -# "units mol/kgw", -# "temp 25.0", "water 1", -# "pH 9.91 charge", -# "pe 4.0", -# "C 1.2279E-04", -# "Ca 1.2279E-04", -# "Cl 1E-12", -# "Mg 1E-12", -# "PURE 1", -# "O2g -0.6788 10.0", -# "Calcite 0.0 2.07E-3", -# "Dolomite 0.0 0.0", -# "END") - -# needed if init type is set to "scratch" -# prop <- c("C", "Ca", "Cl", "Mg", "pH", "pe", "O2g", "Calcite", "Dolomite") - -init_cell <- list( - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "pH" = 9.91, - "pe" = 4, - "O2g" = 10, - "Calcite" = 2.07e-4, - "Dolomite" = 0 -) - -grid <- list( - n_cells = n, - s_cells = n, - type = types[1], - init_cell = as.data.frame(init_cell), - props = names(init_cell), - init_script = NULL -) - - -################################################################## -## Section 2 ## -## Diffusion parameters and boundary conditions ## -################################################################## - -init_diffu <- c( - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "pe" = 4, - "pH" = 7 -) - -alpha_diffu <- c( - "C" = 1E-4, - "Ca" = 1E-4, - "Cl" = 1E-4, - "Mg" = 1E-4, - "pe" = 1E-4, - "pH" = 1E-4 -) - -vecinj_diffu <- list( - list( - "C" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001, - "pe" = 4, - "pH" = 9.91 - ) -) - -boundary <- list( - "E" = 0, - "W" = 1 -) - -diffu_list <- names(alpha_diffu) - -diffusion <- list( - init = init_diffu, - vecinj = do.call(rbind.data.frame, vecinj_diffu), - vecinj_index = boundary, - alpha = alpha_diffu -) - -################################################################## -## Section 3 ## -## Phreeqc simulation ## -################################################################## - -db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", - package = "RedModRphree" -), is.db = TRUE) - -phreeqc::phrLoadDatabaseString(db) - -# NOTE: This won't be needed in the future either. Could also be done in a. pqi -# file -base <- c( - "SOLUTION 1", - "units mol/kgw", - "temp 25.0", - "water 1", - "pH 9.91 charge", - "pe 4.0", - "C 1.2279E-04", - "Ca 1.2279E-04", - "Mg 0.001", - "Cl 0.002", - "PURE 1", - "O2g -0.1675 10", - "KINETICS 1", - "-steps 100", - "-step_divide 100", - "-bad_step_max 2000", - "Calcite", "-m 0.000207", - "-parms 0.0032", - "Dolomite", - "-m 0.0", - "-parms 0.00032", - "END" -) - -selout <- c( - "SELECTED_OUTPUT", "-high_precision true", "-reset false", - "-time", "-soln", "-temperature true", "-water true", - "-pH", "-pe", "-totals C Ca Cl Mg", - "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g" -) - -# Needed when using DHT -signif_vector <- c(7, 7, 7, 7, 7, 7, 7, 5, 5) -prop_type <- c("act", "act", "act", "act", "logact", "logact", "ignore", "act", "act") -prop <- names(init_cell) - -iterations <- 500 - -setup <- list( - # bound = myboundmat, - base = base, - first = selout, - # initsim = initsim, - # Cf = 1, - grid = grid, - diffusion = diffusion, - prop = prop, - immobile = c(7, 8, 9), - kin = c(8, 9), - ann = list(O2g = -0.1675), - # phase = "FLUX1", - # density = "DEN1", - reduce = FALSE, - # snapshots = demodir, ## directory where we will read MUFITS SUM files - # gridfile = paste0(demodir, "/d2ascii.run.GRID.SUM") - # init = init, - # vecinj = vecinj, - # cinj = c(0,1), - # boundary = boundary, - # injections = FALSE, - iterations = iterations, - timesteps = rep(10, iterations) -) diff --git a/data/SimDol2D_diffu.R b/data/SimDol2D_diffu.R deleted file mode 100644 index df965bcc6..000000000 --- a/data/SimDol2D_diffu.R +++ /dev/null @@ -1,213 +0,0 @@ -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") -input_script <- normalizePath("../share/poet/examples/dol.pqi") - -################################################################# -## Section 1 ## -## Grid initialization ## -################################################################# - -n <- 100 -m <- 100 - -types <- c("scratch", "phreeqc", "rds") - -# initsim <- c("SELECTED_OUTPUT", "-high_precision true", -# "-reset false", -# "USER_PUNCH", -# "-head C Ca Cl Mg pH pe O2g Calcite Dolomite", -# "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")", -# "SOLUTION 1", -# "units mol/kgw", -# "temp 25.0", "water 1", -# "pH 9.91 charge", -# "pe 4.0", -# "C 1.2279E-04", -# "Ca 1.2279E-04", -# "Cl 1E-12", -# "Mg 1E-12", -# "PURE 1", -# "O2g -0.6788 10.0", -# "Calcite 0.0 2.07E-3", -# "Dolomite 0.0 0.0", -# "END") - -# needed if init type is set to "scratch" -# prop <- c("C", "Ca", "Cl", "Mg", "pH", "pe", "O2g", "Calcite", "Dolomite") - -init_cell <- list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = -5.0822e-19, - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "O2g" = 0.499957, - "Calcite" = 2.07e-4, - "Dolomite" = 0 -) - -grid <- list( - n_cells = c(n, m), - s_cells = c(n, m), - type = types[1], - init_cell = as.data.frame(init_cell), - props = names(init_cell), - database = database, - input_script = input_script -) - - -################################################################## -## Section 2 ## -## Diffusion parameters and boundary conditions ## -################################################################## - -init_diffu <- c( - "H" = 110.683, - "O" = 55.3413, - "Charge" = -5.0822e-19, - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0 -) - -alpha_diffu <- c( - "H" = 1E-4, - "O" = 1E-4, - "Charge" = 1E-4, - "C" = 1E-4, - "Ca" = 1E-4, - "Cl" = 1E-4, - "Mg" = 1E-4 -) - -vecinj_diffu <- list( - list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = 1.90431e-16, - "C" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ) -) - -#inner_index <- c(5, 15, 25) -#inner_vecinj_index <- rep(1, 3) -# -#vecinj_inner <- cbind(inner_index, inner_vecinj_index) -vecinj_inner <- list( - l1 = c(1,2,2) -) - - -boundary <- list( - "N" = rep(0, n), - "E" = rep(0, n), - "S" = rep(0, n), - "W" = rep(0, n) -) - -diffu_list <- names(alpha_diffu) - -diffusion <- list( - init = init_diffu, - vecinj = do.call(rbind.data.frame, vecinj_diffu), - vecinj_inner = vecinj_inner, - vecinj_index = boundary, - alpha = alpha_diffu -) - -################################################################# -## Section 3 ## -## Chemitry module (Phreeqc) ## -################################################################# - -# db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", -# package = "RedModRphree" -# ), is.db = TRUE) -# -# phreeqc::phrLoadDatabaseString(db) - -# NOTE: This won't be needed in the future either. Could also be done in a. pqi -# file -base <- c( - "SOLUTION 1", - "units mol/kgw", - "temp 25.0", - "water 1", - "pH 9.91 charge", - "pe 4.0", - "C 1.2279E-04", - "Ca 1.2279E-04", - "Mg 0.001", - "Cl 0.002", - "PURE 1", - "O2g -0.1675 10", - "KINETICS 1", - "-steps 100", - "-step_divide 100", - "-bad_step_max 2000", - "Calcite", "-m 0.000207", - "-parms 0.0032", - "Dolomite", - "-m 0.0", - "-parms 0.00032", - "END" -) - -selout <- c( - "SELECTED_OUTPUT", "-high_precision true", "-reset false", - "-time", "-soln", "-temperature true", "-water true", - "-pH", "-pe", "-totals C Ca Cl Mg", - "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g" -) - - -# Needed when using DHT -signif_vector <- c(10, 10, 10, 7, 7, 7, 7, 0, 5, 5) -prop_type <- c("", "", "", "act", "act", "act", "act", "ignore", "", "") -prop <- names(init_cell) - -chemistry <- list( - database = database, - input_script = input_script -) - -################################################################# -## Section 4 ## -## Putting all those things together ## -################################################################# - - -iterations <- 10 - -setup <- list( - # bound = myboundmat, - base = base, - first = selout, - # initsim = initsim, - # Cf = 1, - grid = grid, - diffusion = diffusion, - chemistry = chemistry, - prop = prop, - immobile = c(7, 8, 9), - kin = c(8, 9), - ann = list(O2g = -0.1675), - # phase = "FLUX1", - # density = "DEN1", - reduce = FALSE, - # snapshots = demodir, ## directory where we will read MUFITS SUM files - # gridfile = paste0(demodir, "/d2ascii.run.GRID.SUM") - # init = init, - # vecinj = vecinj, - # cinj = c(0,1), - # boundary = boundary, - # injections = FALSE, - iterations = iterations, - timesteps = rep(10, iterations) -) diff --git a/data/SimDol2D_diffu.yaml b/data/SimDol2D_diffu.yaml deleted file mode 100644 index 84c04dd29..000000000 --- a/data/SimDol2D_diffu.yaml +++ /dev/null @@ -1,13 +0,0 @@ -phreeqc: - RebalanceByCell: True - RebalanceFraction: 0.5 - SolutionDensityVolume: False - PartitionUZSolids: False - Units: - Solution: "mg/L" - PPassamblage: "mol/L" - Exchange: "mol/L" - Surface: "mol/L" - GasPhase: "mol/L" - SSassemblage: "mol/L" - Kinetics: "mol/L" diff --git a/ext/phreeqcrm b/ext/phreeqcrm index ba5dc40af..89f713b27 160000 --- a/ext/phreeqcrm +++ b/ext/phreeqcrm @@ -1 +1 @@ -Subproject commit ba5dc40af119da015d36d2554ecd558ef9da1eb6 +Subproject commit 89f713b273cd5340b2e8169523da04c2d7ad89c9 diff --git a/include/poet/ChemistryModule.hpp b/include/poet/ChemistryModule.hpp index 2db3cd17e..16dc1f5f9 100644 --- a/include/poet/ChemistryModule.hpp +++ b/include/poet/ChemistryModule.hpp @@ -1,20 +1,23 @@ -// Time-stamp: "Last modified 2023-07-21 12:35:23 mluebke" +// Time-stamp: "Last modified 2023-08-01 13:13:18 mluebke" #ifndef CHEMISTRYMODULE_H_ #define CHEMISTRYMODULE_H_ +#include "DHT_Wrapper.hpp" #include "Field.hpp" +#include "Interpolation.hpp" #include "IrmResult.h" #include "PhreeqcRM.h" -#include "poet/DHT_Wrapper.hpp" +#include "SimParams.hpp" +#include "DataStructures.hpp" #include #include #include #include +#include #include #include -#include #include namespace poet { @@ -39,7 +42,8 @@ public: * \param wp_size Count of grid cells to fill each work package at maximum. * \param communicator MPI communicator to distribute work in. */ - ChemistryModule(uint32_t nxyz, uint32_t wp_size, MPI_Comm communicator); + ChemistryModule(uint32_t nxyz, uint32_t wp_size, std::uint32_t maxiter, + const ChemistryParams &chem_param, MPI_Comm communicator); /** * Deconstructor, which frees DHT data structure if used. @@ -86,7 +90,8 @@ public: * * \returns A worker instance with fixed work package size. */ - static ChemistryModule createWorker(MPI_Comm communicator); + static ChemistryModule createWorker(MPI_Comm communicator, + const ChemistryParams &chem_param); /** * Default work package size. @@ -109,9 +114,9 @@ public: * Enumerating DHT file options */ enum { - DHT_FILES_DISABLED = 0, //!< disabled file output - DHT_FILES_SIMEND, //!< only output of snapshot after simulation - DHT_FILES_ITEREND //!< output snapshots after each iteration + DHT_SNAPS_DISABLED = 0, //!< disabled file output + DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation + DHT_SNAPS_ITEREND //!< output snapshots after each iteration }; /** @@ -135,47 +140,6 @@ public: */ void MasterLoopBreak(); - /** - * **Master only** Enables DHT usage for the workers. - * - * If called multiple times enabling the DHT, the current state of DHT will be - * overwritten with a new instance of the DHT. - * - * \param enable Enables or disables the usage of the DHT. - * \param size_mb Size in megabyte to allocate for the DHT if enabled. - * \param key_species Name of species to be used for key creation. - */ - void SetDHTEnabled(bool enable, uint32_t size_mb, - const std::vector &key_species); - /** - * **Master only** Set DHT snapshots to specific mode. - * - * \param type DHT snapshot mode. - * \param out_dir Path to output DHT snapshots. - */ - void SetDHTSnaps(int type, const std::string &out_dir); - /** - * **Master only** Set the vector with significant digits to round before - * inserting into DHT. - * - * \param signif_vec Vector defining significant digit for each species. Order - * is defined by prop_type vector (ChemistryModule::GetPropNames). - */ - void SetDHTSignifVector(std::vector &signif_vec); - /** - * **Master only** Set the DHT rounding type of each species. See - * DHT_PROP_TYPES enumemartion for explanation. - * - * \param proptype_vec Vector defining DHT prop type for each species. - */ - void SetDHTPropTypeVector(std::vector proptype_vec); - /** - * **Master only** Load the state of the DHT from given file. - * - * \param input_file File to load data from. - */ - void ReadDHTFile(const std::string &input_file); - /** * **Master only** Return count of grid cells. */ @@ -237,13 +201,6 @@ public: */ std::vector GetWorkerDHTHits() const; - /** - * **Master only** Collect and return DHT misses of all workers. - * - * \return Vector of all count of DHT misses. - */ - std::vector GetWorkerDHTMiss() const; - /** * **Master only** Collect and return DHT evictions of all workers. * @@ -263,9 +220,29 @@ public: * * \param enabled True if print progressbar, false if not. */ - void setProgressBarPrintout(bool enabled); + void setProgressBarPrintout(bool enabled) { + this->print_progessbar = enabled; + }; + + std::vector GetWorkerInterpolationCalls() const; + + std::vector GetWorkerInterpolationWriteTimings() const; + std::vector GetWorkerInterpolationReadTimings() const; + std::vector GetWorkerInterpolationGatherTimings() const; + std::vector GetWorkerInterpolationFunctionCallTimings() const; + + std::vector GetWorkerPHTCacheHits() const; protected: + void initializeDHT(uint32_t size_mb, + const NamedVector &key_species); + void setDHTSnapshots(int type, const std::string &out_dir); + void setDHTReadFile(const std::string &input_file); + + void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, + std::uint32_t min_entries, + const NamedVector &key_species); + enum { CHEM_INIT, CHEM_WP_SIZE, @@ -275,9 +252,11 @@ protected: CHEM_DHT_PROP_TYPE_VEC, CHEM_DHT_SNAPS, CHEM_DHT_READ_FILE, + CHEM_IP_ENABLE, + CHEM_IP_MIN_ENTRIES, + CHEM_IP_SIGNIF_VEC, CHEM_WORK_LOOP, CHEM_PERF, - CHEM_PROGRESSBAR, CHEM_BREAK_MAIN_LOOP }; @@ -288,11 +267,20 @@ protected: WORKER_DHT_GET, WORKER_DHT_FILL, WORKER_IDLE, + WORKER_IP_WRITE, + WORKER_IP_READ, + WORKER_IP_GATHER, + WORKER_IP_FC, WORKER_DHT_HITS, - WORKER_DHT_MISS, - WORKER_DHT_EVICTIONS + WORKER_DHT_EVICTIONS, + WORKER_PHT_CACHE_HITS, + WORKER_IP_CALLS }; + std::vector interp_calls; + std::vector dht_hits; + std::vector dht_evictions; + struct worker_s { double phreeqc_t = 0.; double dht_get = 0.; @@ -347,8 +335,11 @@ protected: void unshuffleField(const std::vector &in_buffer, uint32_t size_per_prop, uint32_t prop_count, uint32_t wp_count, std::vector &out_field); - std::vector - parseDHTSpeciesVec(const std::vector &species_vec) const; + std::vector + parseDHTSpeciesVec(const NamedVector &key_species, + const std::vector &to_compare) const; + + void BCastStringVec(std::vector &io); int comm_size, comm_rank; MPI_Comm group_comm; @@ -358,11 +349,14 @@ protected: uint32_t wp_size{CHEM_DEFAULT_WP_SIZE}; bool dht_enabled{false}; - int dht_snaps_type{DHT_FILES_DISABLED}; + int dht_snaps_type{DHT_SNAPS_DISABLED}; std::string dht_file_out_dir; poet::DHT_Wrapper *dht = nullptr; + bool interp_enabled{false}; + std::unique_ptr interp; + static constexpr uint32_t BUFFER_OFFSET = 5; inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { @@ -381,7 +375,9 @@ protected: bool print_progessbar{false}; - double chem_t = 0.; + std::uint32_t file_pad; + + double chem_t{0.}; uint32_t n_cells = 0; uint32_t prop_count = 0; @@ -391,6 +387,8 @@ protected: static constexpr int MODULE_COUNT = 5; + const ChemistryParams ¶ms; + std::array speciesPerModule{}; }; } // namespace poet diff --git a/include/poet/DHT.h b/include/poet/DHT.h index e8f8399f8..ba0022bff 100644 --- a/include/poet/DHT.h +++ b/include/poet/DHT.h @@ -67,7 +67,7 @@ */ typedef struct { /** Count of writes to specific process this process did. */ - int* writes_local; + int *writes_local; /** Writes after last call of DHT_print_statistics. */ int old_writes; /** How many read misses occur? */ @@ -100,27 +100,36 @@ typedef struct { /** Size of the MPI communicator respectively all participating processes. */ int comm_size; /** Pointer to a hashfunction. */ - uint64_t (*hash_func)(int, const void*); + uint64_t (*hash_func)(int, const void *); /** Pre-allocated memory where a bucket can be received. */ - void* recv_entry; + void *recv_entry; /** Pre-allocated memory where a bucket to send can be stored. */ - void* send_entry; + void *send_entry; /** Allocated memory on which the MPI window was created. */ - void* mem_alloc; + void *mem_alloc; /** Count of read misses over all time. */ int read_misses; /** Count of evictions over all time. */ int evictions; /** Array of indeces where a bucket can be stored. */ - uint64_t* index; + uint64_t *index; /** Count of possible indeces. */ unsigned int index_count; + + int (*accumulate_callback)(int, void *, int, void *); #ifdef DHT_STATISTICS /** Detailed statistics of the usage of the DHT. */ - DHT_stats* stats; + DHT_stats *stats; #endif } DHT; +extern void DHT_set_accumulate_callback(DHT *table, + int (*callback_func)(int, void *, int, + void *)); + +extern int DHT_write_accumulate(DHT *table, const void *key, int send_size, + void *data, uint32_t *proc, uint32_t *index, int *callback_ret); + /** * @brief Create a DHT. * @@ -141,9 +150,9 @@ typedef struct { * @return DHT* The returned value is the \a DHT-object which serves as a handle * for all DHT operations. If an error occured NULL is returned. */ -extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, +extern DHT *DHT_create(MPI_Comm comm, uint64_t size_per_process, unsigned int data_size, unsigned int key_size, - uint64_t (*hash_func)(int, const void*)); + uint64_t (*hash_func)(int, const void *)); /** * @brief Write data into DHT. @@ -161,10 +170,14 @@ extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, * @param table Pointer to the \a DHT-object. * @param key Pointer to the key. * @param data Pointer to the data. + * @param proc If not NULL, returns the process number written to. + * @param index If not NULL, returns the index of the bucket where the data was + * written to. * @return int Returns either DHT_SUCCESS on success or correspondending error * value on eviction or error. */ -extern int DHT_write(DHT* table, void* key, void* data); +extern int DHT_write(DHT *table, void *key, void *data, uint32_t *proc, + uint32_t *index); /** * @brief Read data from DHT. @@ -187,8 +200,10 @@ extern int DHT_write(DHT* table, void* key, void* data); * @return int Returns either DHT_SUCCESS on success or correspondending error * value on read miss or error. */ -extern int DHT_read(DHT* table, void* key, void* destination); +extern int DHT_read(DHT *table, const void *key, void *destination); +extern int DHT_read_location(DHT *table, uint32_t proc, uint32_t index, + void *destination); /** * @brief Write current state of DHT to file. * @@ -203,7 +218,7 @@ extern int DHT_read(DHT* table, void* key, void* destination); * @return int Returns DHT_SUCCESS on succes, DHT_FILE_IO_ERROR if file can't be * opened/closed or DHT_WRITE_ERROR if file is not writable. */ -extern int DHT_to_file(DHT* table, const char* filename); +extern int DHT_to_file(DHT *table, const char *filename); /** * @brief Read state of DHT from file. @@ -223,7 +238,7 @@ extern int DHT_to_file(DHT* table, const char* filename); * file doesn't match expectation. This is possible if the data size or key size * is different. */ -extern int DHT_from_file(DHT* table, const char* filename); +extern int DHT_from_file(DHT *table, const char *filename); /** * @brief Free ressources of DHT. @@ -241,7 +256,7 @@ extern int DHT_from_file(DHT* table, const char* filename); * @return int Returns either DHT_SUCCESS on success or DHT_MPI_ERROR on * internal MPI error. */ -extern int DHT_free(DHT* table, int* eviction_counter, int* readerror_counter); +extern int DHT_free(DHT *table, int *eviction_counter, int *readerror_counter); /** * @brief Prints a table with statistics about current use of DHT. @@ -267,7 +282,7 @@ extern int DHT_free(DHT* table, int* eviction_counter, int* readerror_counter); * @return int Returns DHT_SUCCESS on success or DHT_MPI_ERROR on internal MPI * error. */ -extern int DHT_print_statistics(DHT* table); +extern int DHT_print_statistics(DHT *table); /** * @brief Determine destination rank and index. @@ -286,8 +301,8 @@ extern int DHT_print_statistics(DHT* table); * @param index_count Count of possible indeces. */ static void determine_dest(uint64_t hash, int comm_size, - unsigned int table_size, unsigned int* dest_rank, - uint64_t* index, unsigned int index_count); + unsigned int table_size, unsigned int *dest_rank, + uint64_t *index, unsigned int index_count); /** * @brief Set the occupied flag. @@ -296,7 +311,7 @@ static void determine_dest(uint64_t hash, int comm_size, * * @param flag_byte First byte of a bucket. */ -static void set_flag(char* flag_byte); +static void set_flag(char *flag_byte); /** * @brief Get the occupied flag. diff --git a/include/poet/DHT_Types.hpp b/include/poet/DHT_Types.hpp index be0ed295b..15eb9913e 100644 --- a/include/poet/DHT_Types.hpp +++ b/include/poet/DHT_Types.hpp @@ -2,7 +2,7 @@ #define DHT_TYPES_H_ namespace poet { -enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_IGNORE, DHT_TYPE_TOTAL }; +enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_TOTAL }; } #endif // DHT_TYPES_H_ diff --git a/include/poet/DHT_Wrapper.hpp b/include/poet/DHT_Wrapper.hpp index c1ebffcb1..f2ed226ab 100644 --- a/include/poet/DHT_Wrapper.hpp +++ b/include/poet/DHT_Wrapper.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-04-24 16:23:42 mluebke" +// Time-stamp: "Last modified 2023-08-01 13:48:34 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -23,10 +23,18 @@ #ifndef DHT_WRAPPER_H #define DHT_WRAPPER_H +#include "DataStructures.hpp" +#include "LookupKey.hpp" #include "poet/DHT_Types.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" #include #include +#include #include +#include +#include #include extern "C" { @@ -37,35 +45,7 @@ extern "C" { namespace poet { -struct DHT_SCNotation { - std::int8_t exp : 8; - std::int64_t significant : 56; -}; - -union DHT_Keyelement { - double fp_elemet; - DHT_SCNotation sc_notation; -}; - -using DHT_ResultObject = struct DHTResobj { - uint32_t length; - std::vector> keys; - std::vector> results; - std::vector needPhreeqc; -}; - -/** - * @brief Return user-defined md5sum - * - * This function will calculate a hashsum with the help of md5sum. Therefore the - * md5sum for a given key is calculated and divided into two 64-bit parts. These - * will be XORed and returned as the hash. - * - * @param key_size Size of key in bytes - * @param key Pointer to key - * @return uint64_t Hashsum as an unsigned 64-bit integer - */ -static uint64_t get_md5(int key_size, void *key); +using DHT_Location = std::pair; /** * @brief C++-Wrapper around DHT implementation @@ -76,6 +56,20 @@ static uint64_t get_md5(int key_size, void *key); */ class DHT_Wrapper { public: + using DHT_ResultObject = struct DHTResobj { + uint32_t length; + std::vector keys; + std::vector> results; + std::vector old_values; + std::vector needPhreeqc; + std::vector locations; + }; + + static constexpr std::int32_t DHT_KEY_INPUT_CUSTOM = + std::numeric_limits::min(); + + static constexpr int DHT_KEY_SIGNIF_DEFAULT = 5; + /** * @brief Construct a new dht wrapper object * @@ -91,8 +85,9 @@ public: * for key creation. * @param data_count Count of data entries */ - DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size, - const std::vector &key_indices, + DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, + const NamedVector &key_species, + const std::vector &key_indices, uint32_t data_count); /** * @brief Destroy the dht wrapper object @@ -105,6 +100,9 @@ public: */ ~DHT_Wrapper(); + DHT_Wrapper &operator=(const DHT_Wrapper &) = delete; + DHT_Wrapper(const DHT_Wrapper &) = delete; + /** * @brief Check if values of workpackage are stored in DHT * @@ -123,7 +121,7 @@ public: */ auto checkDHT(int length, double dt, const std::vector &work_package, std::vector &curr_mapping) - -> const poet::DHT_ResultObject &; + -> const DHT_ResultObject &; /** * @brief Write simulated values into DHT @@ -183,13 +181,6 @@ public: */ auto getHits() { return this->dht_hits; }; - /** - * @brief Get the Misses object - * - * @return uint64_t Count of read misses - */ - auto getMisses() { return this->dht_miss; }; - /** * @brief Get the Evictions object * @@ -197,32 +188,56 @@ public: */ auto getEvictions() { return this->dht_evictions; }; - void SetSignifVector(std::vector signif_vec); - void SetPropTypeVector(std::vector prop_type_vec); - - void setBaseTotals(const std::array &bt) { - this->base_totals = bt; + void resetCounter() { + this->dht_hits = 0; + this->dht_evictions = 0; } + void SetSignifVector(std::vector signif_vec); + + auto getDataCount() { return this->data_count; } + auto getCommunicator() { return this->communicator; } + DHT *getDHT() { return this->dht_object; }; + + DHT_ResultObject &getDHTResults() { return this->dht_results; } + + const auto &getKeyElements() const { return this->input_key_elements; } + const auto &getKeySpecies() const { return this->key_species; } + + void setBaseTotals(double tot_h, double tot_o) { + this->base_totals = {tot_h, tot_o}; + } + + std::uint32_t getInputCount() const { + return this->input_key_elements.size(); + } + + std::uint32_t getOutputCount() const { return this->data_count; } + private: uint32_t key_count; uint32_t data_count; DHT *dht_object; + MPI_Comm communicator; - std::vector fuzzForDHT(int var_count, void *key, double dt); + LookupKey fuzzForDHT(int var_count, void *key, double dt); + + std::vector + outputToInputAndRates(const std::vector &old_results, + const std::vector &new_results); + + std::vector + inputAndRatesToOutput(const std::vector &dht_data); uint32_t dht_hits = 0; - uint32_t dht_miss = 0; uint32_t dht_evictions = 0; - std::vector dht_signif_vector; - std::vector dht_prop_type_vector; - std::vector input_key_elements; + NamedVector key_species; - static constexpr int DHT_KEY_SIGNIF_DEFAULT = 5; - static constexpr int DHT_KEY_SIGNIF_TOTALS = 10; - static constexpr int DHT_KEY_SIGNIF_CHARGE = 3; + std::vector dht_signif_vector; + std::vector dht_prop_type_vector; + std::vector input_key_elements; DHT_ResultObject dht_results; diff --git a/include/poet/DataStructures.hpp b/include/poet/DataStructures.hpp new file mode 100644 index 000000000..ec323d6fe --- /dev/null +++ b/include/poet/DataStructures.hpp @@ -0,0 +1,35 @@ +#ifndef DATASTRUCTURES_H_ +#define DATASTRUCTURES_H_ + +#include +#include +#include +#include + +namespace poet { + +template class NamedVector { +public: + void insert(std::pair to_insert) { + this->names.push_back(to_insert.first); + this->values.push_back(to_insert.second); + container_size++; + } + + bool empty() const { return this->container_size == 0; } + + std::size_t size() const { return this->container_size; } + + value_type &operator[](std::size_t i) { return values[i]; } + + const std::vector &getNames() const { return this->names; } + const std::vector &getValues() const { return this->values; } + +private: + std::size_t container_size{0}; + + std::vector names{}; + std::vector values{}; +}; +} // namespace poet +#endif // DATASTRUCTURES_H_ diff --git a/include/poet/Interpolation.hpp b/include/poet/Interpolation.hpp new file mode 100644 index 000000000..c80cdc97b --- /dev/null +++ b/include/poet/Interpolation.hpp @@ -0,0 +1,266 @@ +// Time-stamp: "Last modified 2023-08-01 18:10:48 mluebke" + +#ifndef INTERPOLATION_H_ +#define INTERPOLATION_H_ + +#include "DHT.h" +#include "DHT_Wrapper.hpp" +#include "DataStructures.hpp" +#include "LookupKey.hpp" +#include "poet/DHT_Wrapper.hpp" +#include "poet/Rounding.hpp" +#include +#include +#include +#include +#include +#include +#include +extern "C" { +#include "poet/DHT.h" +} + +#include "poet/LookupKey.hpp" +#include +#include +#include +#include + +namespace poet { + +class ProximityHashTable { +public: + using bucket_indicator = std::uint64_t; + + ProximityHashTable(uint32_t key_size, uint32_t data_size, + uint32_t entry_count, uint32_t size_per_process, + MPI_Comm communicator); + ~ProximityHashTable(); + + // delete assign and copy operator + ProximityHashTable &operator=(const ProximityHashTable *) = delete; + ProximityHashTable(const ProximityHashTable &) = delete; + + struct PHT_Result { + std::uint32_t size; + std::vector> in_values; + std::vector> out_values; + + std::uint32_t getSize() const { + std::uint32_t sum{0}; + for (const auto &results : out_values) { + sum += results.size() * sizeof(double); + } + return sum; + } + }; + + void setSourceDHT(DHT *src) { + this->source_dht = src; + this->dht_key_count = src->key_size / sizeof(Lookup_Keyelement); + this->dht_data_count = src->data_size / sizeof(double); + this->dht_buffer.resize(src->data_size + src->key_size); + } + + void writeLocationToPHT(LookupKey key, DHT_Location location); + + const PHT_Result &query(const LookupKey &key, + const std::vector &signif, + std::uint32_t min_entries_needed, + std::uint32_t input_count, + std::uint32_t output_count); + + std::uint64_t getLocations(const LookupKey &key); + + void getEntriesFromLocation(const PHT_Result &locations, + const std::vector &signif); + + void writeStats() { DHT_print_statistics(this->prox_ht); } + + DHT *getDHTObject() { return this->prox_ht; } + + auto getPHTWriteTime() const -> double { return this->pht_write_t; }; + auto getPHTReadTime() const -> double { return this->pht_read_t; }; + auto getDHTGatherTime() const -> double { return this->pht_gather_dht_t; }; + + auto getLocalCacheHits() const -> std::vector { + return this->all_cache_hits; + } + + void storeAndResetCounter() { + all_cache_hits.push_back(cache_hits); + cache_hits = 0; + } + +#if POET_PHT_ADD + void incrementReadCounter(const LookupKey &key); +#endif + +private: + enum { INTERP_CB_OK, INTERP_CB_FULL, INTERP_CB_ALREADY_IN }; + + static int PHT_callback_function(int in_data_size, void *in_data, + int out_data_size, void *out_data); + + static std::vector convertKeysFromDHT(Lookup_Keyelement *keys_in, + std::uint32_t key_size); + + static bool similarityCheck(const LookupKey &fine, const LookupKey &coarse, + const std::vector &signif); + + char *bucket_store; + + class Cache + : private std::unordered_map { + public: + void operator()(const LookupKey &key, const PHT_Result val); + + std::pair operator[](const LookupKey &key); + void flush() { this->clear(); } + + protected: + private: + static constexpr std::int64_t MAX_CACHE_SIZE = 100E6; + + std::int64_t free_mem{MAX_CACHE_SIZE}; + + std::list lru_queue; + using lru_iterator = typename std::list::iterator; + + std::unordered_map keyfinder; + }; + + Cache localCache; + DHT *prox_ht; + + std::uint32_t dht_evictions = 0; + + DHT *source_dht = nullptr; + + PHT_Result lookup_results; + std::vector dht_buffer; + + std::uint32_t dht_key_count; + std::uint32_t dht_data_count; + + MPI_Comm communicator; + + double pht_write_t = 0.; + double pht_read_t = 0.; + double pht_gather_dht_t = 0.; + + std::uint32_t cache_hits{0}; + std::vector all_cache_hits{}; +}; + +class InterpolationModule { +public: + using InterpFunction = std::vector (*)( + const std::vector &, const std::vector &, + const std::vector> &, + const std::vector> &); + + InterpolationModule(std::uint32_t entries_per_bucket, + std::uint64_t size_per_process, + std::uint32_t min_entries_needed, DHT_Wrapper &dht, + const NamedVector &interp_key_signifs, + const std::vector &dht_key_indices); + + enum result_status { RES_OK, INSUFFICIENT_DATA, NOT_NEEDED }; + + struct InterpolationResult { + std::vector> results; + std::vector status; + + void ResultsToWP(std::vector &currWP); + }; + + void setInterpolationFunction(InterpFunction func) { + this->f_interpolate = func; + } + + void setMinEntriesNeeded(std::uint32_t entries) { + this->min_entries_needed = entries; + } + + auto getMinEntriesNeeded() { return this->min_entries_needed; } + + void writePairs(const DHT_Wrapper::DHT_ResultObject &in); + + void tryInterpolation(DHT_Wrapper::DHT_ResultObject &dht_results, + std::vector &curr_mapping); + + void resultsToWP(std::vector &work_package) const; + + auto getPHTWriteTime() const { return pht->getPHTWriteTime(); }; + auto getPHTReadTime() const { return pht->getPHTReadTime(); }; + auto getDHTGatherTime() const { return pht->getDHTGatherTime(); }; + auto getInterpolationTime() const { return this->interp_t; }; + + auto getInterpolationCount() const -> std::uint32_t { + return this->interpolations; + } + + auto getPHTLocalCacheHits() const -> std::vector { + return this->pht->getLocalCacheHits(); + } + + void resetCounter() { + this->interpolations = 0; + this->pht->storeAndResetCounter(); + } + + void writePHTStats() { this->pht->writeStats(); } + void dumpPHTState(const std::string &filename) { + DHT_to_file(this->pht->getDHTObject(), filename.c_str()); + } + + static constexpr std::uint32_t COARSE_DIFF = 2; + static constexpr std::uint32_t COARSE_SIGNIF_DEFAULT = + DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT - COARSE_DIFF; + +private: + void initPHT(std::uint32_t key_count, std::uint32_t entries_per_bucket, + std::uint32_t size_per_process, MPI_Comm communicator); + + static std::vector dummy(const std::vector &, + const std::vector &, + const std::vector> &, + const std::vector> &) { + return {}; + } + + double interp_t = 0.; + + std::uint32_t interpolations{0}; + + InterpFunction f_interpolate = dummy; + + std::uint32_t min_entries_needed = 5; + + std::unique_ptr pht; + + DHT_Wrapper &dht_instance; + + NamedVector key_signifs; + std::vector key_indices; + + InterpolationResult interp_result; + PHT_Rounder rounder; + + LookupKey roundKey(const LookupKey &in_key) { + LookupKey out_key; + + for (std::uint32_t i = 0; i < key_indices.size(); i++) { + out_key.push_back(rounder.round(in_key[key_indices[i]], key_signifs[i])); + } + + // timestep + out_key.push_back(in_key.back()); + + return out_key; + } +}; +} // namespace poet + +#endif // INTERPOLATION_H_ diff --git a/include/poet/LookupKey.hpp b/include/poet/LookupKey.hpp new file mode 100644 index 000000000..c3366735f --- /dev/null +++ b/include/poet/LookupKey.hpp @@ -0,0 +1,50 @@ +// Time-stamp: "Last modified 2023-07-26 10:20:10 mluebke" + +#ifndef LOOKUPKEY_H_ +#define LOOKUPKEY_H_ + +#include "poet/HashFunctions.hpp" +#include +#include +#include + +namespace poet { + +struct Lookup_SC_notation { + std::int8_t exp : 8; + std::int64_t significant : 56; +}; + +union Lookup_Keyelement { + double fp_element; + Lookup_SC_notation sc_notation; + + bool operator==(const Lookup_Keyelement &other) const { + return std::memcmp(this, &other, sizeof(Lookup_Keyelement)) == 0 ? true + : false; + } +}; + +class LookupKey : public std::vector { +public: + explicit LookupKey(size_type count); + + using std::vector::vector; + + std::vector to_double() const; + static Lookup_SC_notation round_from_double(const double in, + std::uint32_t signif); + static double to_double(const Lookup_SC_notation in); +}; + +struct LookupKeyHasher { + std::uint64_t operator()(const LookupKey &k) const { + const uint32_t key_size = k.size() * sizeof(Lookup_Keyelement); + + return poet::Murmur2_64A(key_size, k.data()); + } +}; + +} // namespace poet + +#endif // LOOKUPKEY_H_ diff --git a/include/poet/RInsidePOET.hpp b/include/poet/RInsidePOET.hpp index a1d2b8306..fb2102685 100644 --- a/include/poet/RInsidePOET.hpp +++ b/include/poet/RInsidePOET.hpp @@ -1,7 +1,15 @@ #ifndef RPOET_H_ #define RPOET_H_ +#include "DataStructures.hpp" + #include +#include +#include +#include +#include +#include +#include class RInsidePOET : public RInside { public: @@ -14,6 +22,38 @@ public: RInsidePOET(RInsidePOET const &) = delete; void operator=(RInsidePOET const &) = delete; + inline bool checkIfExists(const std::string &R_name, + const std::string &where) { + return Rcpp::as( + this->parseEval("'" + R_name + "' %in% names(" + where + ")")); + } + + template inline T getSTL(const std::string &R_name) { + return Rcpp::as(RInside::operator[](R_name)); + } + + template + poet::NamedVector wrapNamedVector(const std::string &R_name) { + std::size_t name_size = this->parseEval("length(names(" + R_name + "))"); + if (name_size == 0) { + throw std::runtime_error("wrapNamedVector: " + R_name + + " is not a named vector!"); + } + + auto names = Rcpp::as>( + this->parseEval("names(" + R_name + ")")); + auto values = Rcpp::as(this->parseEval(R_name)); + + poet::NamedVector ret_map; + + for (std::size_t i = 0; i < names.size(); i++) { + ret_map.insert( + std::make_pair(names[i], static_cast(values[i]))); + } + + return ret_map; + } + private: RInsidePOET() : RInside(){}; }; diff --git a/include/poet/Rounding.hpp b/include/poet/Rounding.hpp new file mode 100644 index 000000000..3f659290e --- /dev/null +++ b/include/poet/Rounding.hpp @@ -0,0 +1,92 @@ +#ifndef ROUNDING_H_ +#define ROUNDING_H_ + +#include "LookupKey.hpp" +#include +#include + +namespace poet { + +constexpr std::int8_t AQUEOUS_EXP = -13; + +template +class IRounding { +public: + virtual Output round(const Input &, std::uint32_t signif) = 0; + virtual ConvertTo convert(const Output &) = 0; +}; + +class DHT_Rounder { +public: + Lookup_Keyelement round(const double &value, std::uint32_t signif, + bool is_ho) { + std::int8_t exp = + static_cast(std::floor(std::log10(std::fabs(value)))); + + if (!is_ho) { + if (exp < AQUEOUS_EXP) { + return {.sc_notation = {0, 0}}; + } + + std::int8_t diff = exp - signif + 1; + if (diff < AQUEOUS_EXP) { + signif -= AQUEOUS_EXP - diff; + } + } + + Lookup_Keyelement elem; + elem.sc_notation.exp = exp; + elem.sc_notation.significant = + static_cast(value * std::pow(10, signif - exp - 1)); + + return elem; + } + + double convert(const Lookup_Keyelement &key_elem) { + std::int32_t normalized_exp = static_cast( + -std::log10(std::fabs(key_elem.sc_notation.significant))); + + // add stored exponent to normalized exponent + normalized_exp += key_elem.sc_notation.exp; + + // return significant times 10 to the power of exponent + return key_elem.sc_notation.significant * std::pow(10., normalized_exp); + } +}; + +class PHT_Rounder : public IRounding { +public: + Lookup_Keyelement round(const Lookup_Keyelement &value, + std::uint32_t signif) { + Lookup_Keyelement new_val = value; + + std::uint32_t diff_signif = + static_cast( + std::ceil(std::log10(std::abs(value.sc_notation.significant)))) - + signif; + + new_val.sc_notation.significant = static_cast( + value.sc_notation.significant / std::pow(10., diff_signif)); + + if (new_val.sc_notation.significant == 0) { + new_val.sc_notation.exp = 0; + } + + return new_val; + } + + double convert(const Lookup_Keyelement &key_elem) { + std::int32_t normalized_exp = static_cast( + -std::log10(std::fabs(key_elem.sc_notation.significant))); + + // add stored exponent to normalized exponent + normalized_exp += key_elem.sc_notation.exp; + + // return significant times 10 to the power of exponent + return key_elem.sc_notation.significant * std::pow(10., normalized_exp); + } +}; + +} // namespace poet + +#endif // ROUNDING_H_ diff --git a/include/poet/SimParams.hpp b/include/poet/SimParams.hpp index 3e7483e31..8a28624df 100644 --- a/include/poet/SimParams.hpp +++ b/include/poet/SimParams.hpp @@ -24,17 +24,20 @@ #include #include #include +#include #include +#include "DataStructures.hpp" +#include "RInsidePOET.hpp" #include "argh.hpp" // Argument handler https://github.com/adishavit/argh #include #include // BSD-licenced /** Standard DHT Size. Defaults to 1 GB (1000 MB) */ -constexpr uint32_t DHT_SIZE_PER_PROCESS_MB = 1E3; +constexpr uint32_t DHT_SIZE_PER_PROCESS_MB = 1.5E3; /** Standard work package size */ -#define WORK_PACKAGE_SIZE_DEFAULT 5 +#define WORK_PACKAGE_SIZE_DEFAULT 32 namespace poet { @@ -70,6 +73,8 @@ typedef struct { /** indicating whether the progress bar during chemistry simulation should be * printed or not */ bool print_progressbar; + + bool interp_enabled; } t_simparams; using GridParams = struct s_GridParams { @@ -101,13 +106,24 @@ using DiffusionParams = struct s_DiffusionParams { s_DiffusionParams(RInside &R); }; -using ChemistryParams = struct s_ChemistryParams { +struct ChemistryParams { std::string database_path; std::string input_script; - std::vector dht_species; - std::vector dht_signif; - s_ChemistryParams(RInside &R); + bool use_dht; + std::uint64_t dht_size; + int dht_snaps; + std::string dht_file; + std::string dht_outdir; + NamedVector dht_signifs; + + bool use_interp; + std::uint64_t pht_size; + std::uint32_t pht_max_entries; + std::uint32_t interp_min_entries; + NamedVector pht_signifs; + + void initFromR(RInsidePOET &R); }; /** @@ -159,7 +175,7 @@ public: * @return int Returns with 0 if no error occured, otherwise value less than 0 * is returned. */ - int parseFromCmdl(char *argv[], RInside &R); + int parseFromCmdl(char *argv[], RInsidePOET &R); /** * @brief Init std::vector values @@ -193,25 +209,10 @@ public: */ auto getDHTSignifVector() const { return this->dht_signif_vector; }; - /** - * @brief Get the DHT_Prop_Type_Vector - * - * Returns a vector indicating of which type a variable of a grid cell is. - * - * @return std::vector Vector if strings defining a type of a - * variable - */ - auto getDHTPropTypeVector() const { return this->dht_prop_type_vector; }; + auto getPHTSignifVector() const { return this->pht_signif_vector; }; - /** - * @brief Return name of DHT snapshot. - * - * Name of the DHT file which is used to initialize the DHT with a previously - * written snapshot. - * - * @return std::string Absolute paht to the DHT snapshot - */ - auto getDHTFile() const { return this->dht_file; }; + auto getPHTBucketSize() const { return this->pht_bucket_size; }; + auto getPHTMinEntriesNeeded() const { return this->pht_min_entries_needed; }; /** * @brief Get the filesim name @@ -233,23 +234,30 @@ public: */ auto getOutDir() const { return this->out_dir; }; + const auto &getChemParams() const { return chem_params; } + private: std::list validateOptions(argh::parser cmdl); - const std::set flaglist{"ignore-result", "dht", "dht-nolog", "P", - "progress"}; - const std::set paramlist{"work-package-size", "dht-signif", - "dht-strategy", "dht-size", - "dht-snaps", "dht-file"}; + const std::set flaglist{ + "ignore-result", "dht", "dht-nolog", "P", "progress", "interp"}; + const std::set paramlist{ + "work-package-size", "dht-signif", "dht-strategy", + "dht-size", "dht-snaps", "dht-file", + "interp-size", "interp-min", "interp-bucket-entries"}; t_simparams simparams; std::vector dht_signif_vector; - std::vector dht_prop_type_vector; + std::vector pht_signif_vector; + + uint32_t pht_bucket_size; + uint32_t pht_min_entries_needed; - std::string dht_file; std::string filesim; std::string out_dir; + + ChemistryParams chem_params; }; } // namespace poet #endif // PARSER_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e273c28f8..74a61c8c7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,9 +2,6 @@ file(GLOB_RECURSE poet_lib_SRC CONFIGURE_DEPENDS "*.cpp" "*.c") -find_library(MATH_LIBRARY m) -find_library(CRYPTO_LIBRARY crypto) - add_library(poet_lib ${poet_lib_SRC}) target_include_directories(poet_lib PUBLIC ${PROJECT_SOURCE_DIR}/include) target_link_libraries(poet_lib PUBLIC @@ -20,3 +17,9 @@ option(POET_DHT_DEBUG "Build with DHT debug info" OFF) if(POET_DHT_DEBUG) target_compile_definitions(poet_lib PRIVATE DHT_STATISTICS) endif() + +option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) + +if (POET_PHT_ADDITIONAL_INFO) + target_compile_definitions(poet_lib PRIVATE POET_PHT_ADD) +endif() diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp index a42d8151c..52a315734 100644 --- a/src/ChemistryModule/ChemistryModule.cpp +++ b/src/ChemistryModule/ChemistryModule.cpp @@ -1,20 +1,167 @@ #include "poet/ChemistryModule.hpp" #include "PhreeqcRM.h" #include "poet/DHT_Wrapper.hpp" +#include "poet/Interpolation.hpp" #include #include +#include #include #include +#include #include #include #include #include #include +constexpr uint32_t MB_FACTOR = 1E6; + +std::vector +inverseDistanceWeighting(const std::vector &to_calc, + const std::vector &from, + const std::vector> &input, + const std::vector> &output) { + std::vector results = from; + + const std::uint32_t buffer_size = input.size() + 1; + double buffer[buffer_size]; + double from_rescaled; + + const std::uint32_t data_set_n = input.size(); + double rescaled[to_calc.size()][data_set_n + 1]; + double weights[data_set_n]; + + // rescaling over all key elements + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + const auto output_comp_i = to_calc[key_comp_i]; + + // rescale input between 0 and 1 + for (int point_i = 0; point_i < data_set_n; point_i++) { + rescaled[key_comp_i][point_i] = input[point_i][key_comp_i]; + } + + rescaled[key_comp_i][data_set_n] = from[output_comp_i]; + + const double min = *std::min_element(rescaled[key_comp_i], + rescaled[key_comp_i] + data_set_n + 1); + const double max = *std::max_element(rescaled[key_comp_i], + rescaled[key_comp_i] + data_set_n + 1); + + for (int point_i = 0; point_i < data_set_n; point_i++) { + rescaled[key_comp_i][point_i] = + ((max - min) != 0 + ? (rescaled[key_comp_i][point_i] - min) / (max - min) + : 0); + } + rescaled[key_comp_i][data_set_n] = + ((max - min) != 0 ? (from[output_comp_i] - min) / (max - min) : 0); + } + + // calculate distances for each data set + double inv_sum = 0; + for (int point_i = 0; point_i < data_set_n; point_i++) { + double distance = 0; + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + distance += std::pow( + rescaled[key_comp_i][point_i] - rescaled[key_comp_i][data_set_n], 2); + } + weights[point_i] = 1 / std::sqrt(distance); + assert(!std::isnan(weights[point_i])); + inv_sum += weights[point_i]; + } + + assert(!std::isnan(inv_sum)); + + // actual interpolation + // bool has_h = false; + // bool has_o = false; + + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + const auto output_comp_i = to_calc[key_comp_i]; + double key_delta = 0; + + // if (interp_i == 0) { + // has_h = true; + // } + + // if (interp_i == 1) { + // has_o = true; + // } + + for (int j = 0; j < data_set_n; j++) { + key_delta += weights[j] * input[j][key_comp_i]; + } + + key_delta /= inv_sum; + + results[output_comp_i] = from[output_comp_i] + key_delta; + } + + // if (!has_h) { + // double new_val = 0; + // for (int j = 0; j < data_set_n; j++) { + // new_val += weights[j] * output[j][0]; + // } + // results[0] = new_val / inv_sum; + // } + + // if (!has_h) { + // double new_val = 0; + // for (int j = 0; j < data_set_n; j++) { + // new_val += weights[j] * output[j][1]; + // } + // results[1] = new_val / inv_sum; + // } + + // for (std::uint32_t i = 0; i < to_calc.size(); i++) { + // const std::uint32_t interp_i = to_calc[i]; + + // // rescale input between 0 and 1 + // for (int j = 0; j < input.size(); j++) { + // buffer[j] = input[j].at(i); + // } + + // buffer[buffer_size - 1] = from[interp_i]; + + // const double min = *std::min_element(buffer, buffer + buffer_size); + // const double max = *std::max_element(buffer, buffer + buffer_size); + + // for (int j = 0; j < input.size(); j++) { + // buffer[j] = ((max - min) != 0 ? (buffer[j] - min) / (max - min) : 1); + // } + // from_rescaled = + // ((max - min) != 0 ? (from[interp_i] - min) / (max - min) : 0); + + // double inv_sum = 0; + + // // calculate distances for each point + // for (int i = 0; i < input.size(); i++) { + // const double distance = std::pow(buffer[i] - from_rescaled, 2); + + // buffer[i] = distance > 0 ? (1 / std::sqrt(distance)) : 0; + // inv_sum += buffer[i]; + // } + // // calculate new values + // double new_val = 0; + // for (int i = 0; i < output.size(); i++) { + // new_val += buffer[i] * output[i][interp_i]; + // } + // results[interp_i] = new_val / inv_sum; + // if (std::isnan(results[interp_i])) { + // std::cout << "nan with new_val = " << output[0][i] << std::endl; + // } + // } + + return results; +} + poet::ChemistryModule::ChemistryModule(uint32_t nxyz, uint32_t wp_size, + std::uint32_t maxiter, + const ChemistryParams &chem_param, MPI_Comm communicator) - : PhreeqcRM(nxyz, 1), group_comm(communicator), wp_size(wp_size) { + : PhreeqcRM(nxyz, 1), group_comm(communicator), wp_size(wp_size), + params(chem_param) { MPI_Comm_size(communicator, &this->comm_size); MPI_Comm_rank(communicator, &this->comm_rank); @@ -24,7 +171,10 @@ poet::ChemistryModule::ChemistryModule(uint32_t nxyz, uint32_t wp_size, if (!is_sequential && is_master) { MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, this->group_comm); + MPI_Bcast(&maxiter, 1, MPI_UINT32_T, 0, this->group_comm); } + + this->file_pad = std::ceil(std::log10(maxiter + 1)); } poet::ChemistryModule::~ChemistryModule() { @@ -34,11 +184,15 @@ poet::ChemistryModule::~ChemistryModule() { } poet::ChemistryModule -poet::ChemistryModule::createWorker(MPI_Comm communicator) { +poet::ChemistryModule::createWorker(MPI_Comm communicator, + const ChemistryParams &chem_param) { uint32_t wp_size; MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, communicator); - return ChemistryModule(wp_size, wp_size, communicator); + std::uint32_t maxiter; + MPI_Bcast(&maxiter, 1, MPI_UINT32_T, 0, communicator); + + return ChemistryModule(wp_size, wp_size, maxiter, chem_param, communicator); } void poet::ChemistryModule::RunInitFile(const std::string &input_script_path) { @@ -122,7 +276,9 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { } // now sort the new values - std::sort(new_solution_names.begin() + 4, new_solution_names.end()); + std::sort(new_solution_names.begin() + 3, new_solution_names.end()); + this->SetPOETSolutionNames(new_solution_names); + this->speciesPerModule[0] = new_solution_names.size(); // and append other processes than solutions std::vector new_prop_names = new_solution_names; @@ -133,8 +289,6 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { this->prop_names = std::move(new_prop_names); this->prop_count = prop_names.size(); - this->SetPOETSolutionNames(new_solution_names); - if (is_master) { this->n_cells = trans_field.GetRequestedVecSize(); chem_field = Field(n_cells); @@ -151,162 +305,186 @@ void poet::ChemistryModule::initializeField(const Field &trans_field) { std::vector> initial_values; - for (int i = 0; i < phreeqc_init.size(); i++) { + for (const auto &vec : trans_field.As2DVector()) { + initial_values.push_back(vec); + } + + this->base_totals = {initial_values.at(0).at(0), + initial_values.at(1).at(0)}; + ChemBCast(base_totals.data(), 2, MPI_DOUBLE); + + for (int i = speciesPerModule[0]; i < phreeqc_init.size(); i++) { std::vector init(n_cells, phreeqc_init[i]); initial_values.push_back(std::move(init)); } chem_field.InitFromVec(initial_values, prop_names); + } else { + ChemBCast(base_totals.data(), 2, MPI_DOUBLE); + } + + if (this->params.use_dht || this->params.use_interp) { + initializeDHT(this->params.dht_size, this->params.dht_signifs); + setDHTSnapshots(this->params.dht_snaps, this->params.dht_outdir); + setDHTReadFile(this->params.dht_file); + + this->dht_enabled = this->params.use_dht; + + if (this->params.use_interp) { + initializeInterp(this->params.pht_max_entries, this->params.pht_size, + this->params.interp_min_entries, + this->params.pht_signifs); + this->interp_enabled = this->params.use_interp; + } } } -void poet::ChemistryModule::SetDHTEnabled( - bool enable, uint32_t size_mb, - const std::vector &key_species) { - +void poet::ChemistryModule::initializeDHT( + uint32_t size_mb, const NamedVector &key_species) { constexpr uint32_t MB_FACTOR = 1E6; - std::vector key_inidices; + + this->dht_enabled = true; + + MPI_Comm dht_comm; if (this->is_master) { - int ftype = CHEM_DHT_ENABLE; - PropagateFunctionType(ftype); - ChemBCast(&enable, 1, MPI_CXX_BOOL); - ChemBCast(&size_mb, 1, MPI_UINT32_T); - - key_inidices = parseDHTSpeciesVec(key_species); - int vec_size = key_inidices.size(); - ChemBCast(&vec_size, 1, MPI_INT); - ChemBCast(key_inidices.data(), key_inidices.size(), MPI_UINT32_T); - } else { - int vec_size; - ChemBCast(&vec_size, 1, MPI_INT); - key_inidices.resize(vec_size); - ChemBCast(key_inidices.data(), vec_size, MPI_UINT32_T); + MPI_Comm_split(this->group_comm, MPI_UNDEFINED, this->comm_rank, &dht_comm); + return; } - this->dht_enabled = enable; - - if (enable) { - MPI_Comm dht_comm; - - if (this->is_master) { - MPI_Comm_split(this->group_comm, MPI_UNDEFINED, this->comm_rank, - &dht_comm); - return; - } + if (!this->is_master) { MPI_Comm_split(this->group_comm, 1, this->comm_rank, &dht_comm); + auto map_copy = key_species; + + if (key_species.empty()) { + const auto &all_species = this->prop_names; + for (std::size_t i = 0; i < all_species.size(); i++) { + map_copy.insert(std::make_pair(all_species[i], + DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT)); + } + } + + auto key_indices = parseDHTSpeciesVec(key_species, this->prop_names); + if (this->dht) { delete this->dht; } - const uint32_t dht_size = size_mb * MB_FACTOR; + const std::uint64_t dht_size = size_mb * MB_FACTOR; - this->dht = - new DHT_Wrapper(dht_comm, dht_size, key_inidices, this->prop_count); - // this->dht->setBaseTotals(this->base_totals); + this->dht = new DHT_Wrapper(dht_comm, dht_size, map_copy, key_indices, + this->prop_count); + this->dht->setBaseTotals(base_totals.at(0), base_totals.at(1)); } } -inline std::vector poet::ChemistryModule::parseDHTSpeciesVec( - const std::vector &species_vec) const { - std::vector species_indices; +inline std::vector poet::ChemistryModule::parseDHTSpeciesVec( + const NamedVector &key_species, + const std::vector &to_compare) const { + std::vector species_indices; + species_indices.reserve(key_species.size()); - if (species_vec.empty()) { - species_indices.resize(this->prop_count); - - int i = 0; - std::generate(species_indices.begin(), species_indices.end(), - [&] { return i++; }); - return species_indices; - } - - species_indices.reserve(species_vec.size()); - - for (const auto &name : species_vec) { - auto it = std::find(this->prop_names.begin(), this->prop_names.end(), name); - if (it == prop_names.end()) { - throw std::runtime_error( - "DHT species name was not found in prop name vector!"); + for (const auto &species : key_species.getNames()) { + auto it = std::find(to_compare.begin(), to_compare.end(), species); + if (it == to_compare.end()) { + species_indices.push_back(DHT_Wrapper::DHT_KEY_INPUT_CUSTOM); + continue; } - const std::uint32_t index = it - prop_names.begin(); + const std::uint32_t index = it - to_compare.begin(); species_indices.push_back(index); } return species_indices; } -void poet::ChemistryModule::SetDHTSnaps(int type, const std::string &out_dir) { +void poet::ChemistryModule::BCastStringVec(std::vector &io) { + if (this->is_master) { - int ftype = CHEM_DHT_SNAPS; - PropagateFunctionType(ftype); + int vec_size = io.size(); + ChemBCast(&vec_size, 1, MPI_INT); - int str_size = out_dir.size(); - - ChemBCast(&type, 1, MPI_INT); - ChemBCast(&str_size, 1, MPI_INT); - ChemBCast(const_cast(out_dir.data()), str_size, MPI_CHAR); - } - - this->dht_snaps_type = type; - this->dht_file_out_dir = out_dir; -} - -void poet::ChemistryModule::SetDHTSignifVector( - std::vector &signif_vec) { - if (this->is_master) { - int ftype = CHEM_DHT_SIGNIF_VEC; - PropagateFunctionType(ftype); - - int data_count = signif_vec.size(); - ChemBCast(&data_count, 1, MPI_INT); - ChemBCast(signif_vec.data(), signif_vec.size(), MPI_UINT32_T); - - return; - } - - int data_count; - ChemBCast(&data_count, 1, MPI_INT); - signif_vec.resize(data_count); - ChemBCast(signif_vec.data(), data_count, MPI_UINT32_T); - - this->dht->SetSignifVector(signif_vec); -} - -void poet::ChemistryModule::SetDHTPropTypeVector( - std::vector proptype_vec) { - if (this->is_master) { - if (proptype_vec.size() != this->prop_count) { - throw std::runtime_error("Prop type vector sizes mismatches prop count."); + for (const auto &value : io) { + int buf_size = value.size() + 1; + ChemBCast(&buf_size, 1, MPI_INT); + ChemBCast(const_cast(value.c_str()), buf_size, MPI_CHAR); } + } else { + int vec_size; + ChemBCast(&vec_size, 1, MPI_INT); - int ftype = CHEM_DHT_PROP_TYPE_VEC; - PropagateFunctionType(ftype); - ChemBCast(proptype_vec.data(), proptype_vec.size(), MPI_UINT32_T); + io.resize(vec_size); + for (int i = 0; i < vec_size; i++) { + int buf_size; + ChemBCast(&buf_size, 1, MPI_INT); + char buf[buf_size]; + ChemBCast(buf, buf_size, MPI_CHAR); + io[i] = std::string{buf}; + } + } +} + +void poet::ChemistryModule::setDHTSnapshots(int type, + const std::string &out_dir) { + if (this->is_master) { return; } - this->dht->SetPropTypeVector(proptype_vec); + this->dht_file_out_dir = out_dir; + this->dht_snaps_type = type; } -void poet::ChemistryModule::ReadDHTFile(const std::string &input_file) { +void poet::ChemistryModule::setDHTReadFile(const std::string &input_file) { if (this->is_master) { - int ftype = CHEM_DHT_READ_FILE; - PropagateFunctionType(ftype); - int str_size = input_file.size(); - - ChemBCast(&str_size, 1, MPI_INT); - ChemBCast(const_cast(input_file.data()), str_size, MPI_CHAR); + return; } - if (!this->dht_enabled) { - throw std::runtime_error("DHT file cannot be read. DHT is not enabled."); + if (!input_file.empty()) { + WorkerReadDHTDump(input_file); } +} + +void poet::ChemistryModule::initializeInterp( + std::uint32_t bucket_size, std::uint32_t size_mb, std::uint32_t min_entries, + const NamedVector &key_species) { if (!this->is_master) { - WorkerReadDHTDump(input_file); + + constexpr uint32_t MB_FACTOR = 1E6; + + assert(this->dht); + + this->interp_enabled = true; + + auto map_copy = key_species; + + if (key_species.empty()) { + map_copy = this->dht->getKeySpecies(); + for (std::size_t i = 0; i < map_copy.size(); i++) { + const std::uint32_t signif = + map_copy[i] - (map_copy[i] > InterpolationModule::COARSE_DIFF + ? InterpolationModule::COARSE_DIFF + : 0); + map_copy[i] = signif; + } + } + + auto key_indices = + parseDHTSpeciesVec(key_species, dht->getKeySpecies().getNames()); + + if (this->interp) { + this->interp.reset(); + } + + const uint64_t pht_size = size_mb * MB_FACTOR; + + interp = std::make_unique( + bucket_size, pht_size, min_entries, *(this->dht), map_copy, + key_indices); + + interp->setInterpolationFunction(inverseDistanceWeighting); } } diff --git a/src/ChemistryModule/DHT_Wrapper.cpp b/src/ChemistryModule/DHT_Wrapper.cpp deleted file mode 100644 index 129b45340..000000000 --- a/src/ChemistryModule/DHT_Wrapper.cpp +++ /dev/null @@ -1,226 +0,0 @@ -/* -** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of -** Potsdam) -** -** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam) -** -** POET is free software; you can redistribute it and/or modify it under the -** terms of the GNU General Public License as published by the Free Software -** Foundation; either version 2 of the License, or (at your option) any later -** version. -** -** POET is distributed in the hope that it will be useful, but WITHOUT ANY -** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -** A PARTICULAR PURPOSE. See the GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License along with -** this program; if not, write to the Free Software Foundation, Inc., 51 -** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -*/ - -#include "poet/DHT_Wrapper.hpp" -#include "poet/DHT_Types.hpp" -#include "poet/HashFunctions.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace poet; -using namespace std; - -inline DHT_SCNotation round_key_element(double value, std::uint32_t signif) { - std::int8_t exp = - static_cast(std::floor(std::log10(std::fabs(value)))); - std::int64_t significant = - static_cast(value * std::pow(10, signif - exp - 1)); - - DHT_SCNotation elem; - elem.exp = exp; - elem.significant = significant; - - return elem; -} - -DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size, - const std::vector &key_indices, - uint32_t data_count) - : key_count(key_indices.size()), data_count(data_count), - input_key_elements(key_indices) { - // initialize DHT object - uint32_t key_size = (key_count + 1) * sizeof(DHT_Keyelement); - uint32_t data_size = data_count * sizeof(double); - uint32_t buckets_per_process = dht_size / (1 + data_size + key_size); - dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, - &poet::Murmur2_64A); - - this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); - this->dht_signif_vector[0] = DHT_KEY_SIGNIF_TOTALS; - this->dht_signif_vector[1] = DHT_KEY_SIGNIF_TOTALS; - this->dht_signif_vector[2] = DHT_KEY_SIGNIF_CHARGE; - - this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); - this->dht_prop_type_vector[0] = DHT_TYPE_TOTAL; - this->dht_prop_type_vector[1] = DHT_TYPE_TOTAL; - this->dht_prop_type_vector[2] = DHT_TYPE_CHARGE; -} - -DHT_Wrapper::~DHT_Wrapper() { - // free DHT - DHT_free(dht_object, NULL, NULL); -} -auto DHT_Wrapper::checkDHT(int length, double dt, - const std::vector &work_package, - std::vector &curr_mapping) - -> const poet::DHT_ResultObject & { - - dht_results.length = length; - dht_results.keys.resize(length); - dht_results.results.resize(length); - dht_results.needPhreeqc.resize(length); - - std::vector new_mapping; - - // loop over every grid cell contained in work package - for (int i = 0; i < length; i++) { - // point to current grid cell - void *key = (void *)&(work_package[i * this->data_count]); - auto &data = dht_results.results[i]; - auto &key_vector = dht_results.keys[i]; - - data.resize(this->data_count); - key_vector = fuzzForDHT(this->key_count, key, dt); - - // overwrite input with data from DHT, IF value is found in DHT - int res = DHT_read(this->dht_object, key_vector.data(), data.data()); - - switch (res) { - case DHT_SUCCESS: - dht_results.needPhreeqc[i] = false; - this->dht_hits++; - break; - case DHT_READ_MISS: - dht_results.needPhreeqc[i] = true; - new_mapping.push_back(curr_mapping[i]); - this->dht_miss++; - break; - } - } - - curr_mapping = std::move(new_mapping); - - return dht_results; -} - -void DHT_Wrapper::fillDHT(int length, const std::vector &work_package) { - // loop over every grid cell contained in work package - for (int i = 0; i < length; i++) { - // If true grid cell was simulated, needs to be inserted into dht - if (dht_results.needPhreeqc[i]) { - const auto &key = dht_results.keys[i]; - void *data = (void *)&(work_package[i * this->data_count]); - // fuzz data (round, logarithm etc.) - - // insert simulated data with fuzzed key into DHT - int res = DHT_write(this->dht_object, (void *)key.data(), data); - - // if data was successfully written ... - if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { - dht_evictions++; - } - } - } -} - -void DHT_Wrapper::resultsToWP(std::vector &work_package) { - for (int i = 0; i < dht_results.length; i++) { - if (!dht_results.needPhreeqc[i]) { - std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), - work_package.begin() + (data_count * i)); - } - } -} - -int DHT_Wrapper::tableToFile(const char *filename) { - int res = DHT_to_file(dht_object, filename); - return res; -} - -int DHT_Wrapper::fileToTable(const char *filename) { - int res = DHT_from_file(dht_object, filename); - if (res != DHT_SUCCESS) - return res; - -#ifdef DHT_STATISTICS - DHT_print_statistics(dht_object); -#endif - - return DHT_SUCCESS; -} - -void DHT_Wrapper::printStatistics() { - int res; - - res = DHT_print_statistics(dht_object); - - if (res != DHT_SUCCESS) { - // MPI ERROR ... WHAT TO DO NOW? - // RUNNING CIRCLES WHILE SCREAMING - } -} - -std::vector DHT_Wrapper::fuzzForDHT(int var_count, void *key, - double dt) { - constexpr double zero_val = 10E-14; - - std::vector vecFuzz(var_count + 1); - std::memset(&vecFuzz[0], 0, sizeof(DHT_Keyelement) * var_count); - - int totals_i = 0; - // introduce fuzzing to allow more hits in DHT - // loop over every variable of grid cell - for (std::uint32_t i = 0; i < input_key_elements.size(); i++) { - double curr_key = ((double *)key)[input_key_elements[i]]; - if (curr_key != 0) { - if (curr_key < zero_val && - this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { - continue; - } - if (this->dht_prop_type_vector[i] == DHT_TYPE_IGNORE) { - continue; - } - if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { - curr_key -= base_totals[totals_i++]; - } - vecFuzz[i].sc_notation = round_key_element(curr_key, dht_signif_vector[i]); - } - } - // if timestep differs over iterations set current current time step at the - // end of fuzzing buffer - vecFuzz[var_count].fp_elemet = dt; - - return vecFuzz; -} - -void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) { - if (signif_vec.size() != this->key_count) { - throw std::runtime_error( - "Significant vector size mismatches count of key elements."); - } - - this->dht_signif_vector = signif_vec; -} -void poet::DHT_Wrapper::SetPropTypeVector(std::vector prop_type_vec) { - if (prop_type_vec.size() != this->key_count) { - throw std::runtime_error( - "Prop type vector size mismatches count of key elements."); - } - - this->dht_prop_type_vector = prop_type_vec; -} diff --git a/src/ChemistryModule/MasterFunctions.cpp b/src/ChemistryModule/MasterFunctions.cpp index 316b64cab..b40341dbd 100644 --- a/src/ChemistryModule/MasterFunctions.cpp +++ b/src/ChemistryModule/MasterFunctions.cpp @@ -2,6 +2,7 @@ #include "poet/ChemistryModule.hpp" #include +#include #include #include #include @@ -62,19 +63,136 @@ std::vector poet::ChemistryModule::GetWorkerIdleTimings() const { std::vector poet::ChemistryModule::GetWorkerDHTHits() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_HITS); -} - -std::vector poet::ChemistryModule::GetWorkerDHTMiss() const { - int type = CHEM_PERF; + type = WORKER_DHT_HITS; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_MISS); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_DHT_HITS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_DHT_HITS, + this->group_comm, NULL); + + return ret; } std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_EVICTIONS); + type = WORKER_DHT_EVICTIONS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_DHT_EVICTIONS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_DHT_EVICTIONS, + this->group_comm, NULL); + + return ret; +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationWriteTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_WRITE); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationReadTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_READ); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationGatherTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_GATHER); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationFunctionCallTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_FC); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationCalls() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + type = WORKER_IP_CALLS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_IP_CALLS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_IP_CALLS, + this->group_comm, NULL); + + return ret; +} + +std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + type = WORKER_PHT_CACHE_HITS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, type, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, type, + this->group_comm, NULL); + + return ret; +} + +inline std::vector shuffleField(const std::vector &in_field, + uint32_t size_per_prop, + uint32_t prop_count, + uint32_t wp_count) { + std::vector out_buffer(in_field.size()); + uint32_t write_i = 0; + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_buffer[(write_i * prop_count) + k] = + in_field[(k * size_per_prop) + j]; + } + write_i++; + } + } + return out_buffer; +} + +inline void unshuffleField(const std::vector &in_buffer, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count, std::vector &out_field) { + uint32_t read_i = 0; + + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_field[(k * size_per_prop) + j] = + in_buffer[(read_i * prop_count) + k]; + } + read_i++; + } + } } inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { @@ -190,12 +308,15 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, } void poet::ChemistryModule::RunCells() { + double start_t{MPI_Wtime()}; if (this->is_sequential) { MasterRunSequential(); return; } MasterRunParallel(); + double end_t{MPI_Wtime()}; + this->chem_t += end_t - start_t; } void poet::ChemistryModule::MasterRunSequential() { @@ -211,7 +332,6 @@ void poet::ChemistryModule::MasterRunSequential() { void poet::ChemistryModule::MasterRunParallel() { /* declare most of the needed variables here */ - double chem_a, chem_b; double seq_a, seq_b, seq_c, seq_d; double worker_chemistry_a, worker_chemistry_b; double sim_e_chemistry, sim_f_chemistry; @@ -227,9 +347,6 @@ void poet::ChemistryModule::MasterRunParallel() { double dt = this->PhreeqcRM::GetTimeStep(); static uint32_t iteration = 0; - /* start time measurement of whole chemistry simulation */ - chem_a = MPI_Wtime(); - /* start time measurement of sequential part */ seq_a = MPI_Wtime(); @@ -308,8 +425,6 @@ void poet::ChemistryModule::MasterRunParallel() { for (int i = 1; i < this->comm_size; i++) { MPI_Send(NULL, 0, MPI_DOUBLE, i, LOOP_END, this->group_comm); } - chem_b = MPI_Wtime(); - this->chem_t += chem_b - chem_a; this->simtime += dt; iteration++; @@ -324,7 +439,8 @@ std::vector poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, uint32_t wp_size) const { bool mod_pkgs = (n_cells % wp_size) != 0; - uint32_t n_packages = (uint32_t)(n_cells / wp_size) + mod_pkgs; + uint32_t n_packages = + (uint32_t)(n_cells / wp_size) + static_cast(mod_pkgs); std::vector wp_sizes_vector(n_packages, 0); @@ -334,12 +450,3 @@ poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, return wp_sizes_vector; } - -void poet::ChemistryModule::setProgressBarPrintout(bool enabled) { - if (is_master) { - int type = CHEM_PROGRESSBAR; - ChemBCast(&type, 1, MPI_INT); - ChemBCast(&enabled, 1, MPI_CXX_BOOL); - } - this->print_progessbar = enabled; -} diff --git a/src/ChemistryModule/SurrogateModels/CMakeLists.txt b/src/ChemistryModule/SurrogateModels/CMakeLists.txt new file mode 100644 index 000000000..334ec98b5 --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/CMakeLists.txt @@ -0,0 +1,21 @@ +file(GLOB surrogate_models_SRC + CONFIGURE_DEPENDS + "*.cpp" "*.c") + +find_library(MATH_LIBRARY m) + +add_library(surrogate_models ${surrogate_models_SRC}) +target_include_directories(surrogate_models PUBLIC ${PROJECT_SOURCE_DIR}/include) +target_link_libraries(surrogate_models PUBLIC + MPI::MPI_CXX ${MATH_LIBRARY}) +target_compile_definitions(surrogate_models PUBLIC OMPI_SKIP_MPICXX) + +option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) + +if (POET_PHT_ADDITIONAL_INFO) + target_compile_definitions(surrogate_models PRIVATE POET_PHT_ADD) +endif() + +if(POET_DHT_DEBUG) + target_compile_definitions(surrogate_models PRIVATE DHT_STATISTICS) +endif() diff --git a/src/ChemistryModule/DHT.c b/src/ChemistryModule/SurrogateModels/DHT.c similarity index 68% rename from src/ChemistryModule/DHT.c rename to src/ChemistryModule/SurrogateModels/DHT.c index 485f50e18..5275cb02c 100644 --- a/src/ChemistryModule/DHT.c +++ b/src/ChemistryModule/SurrogateModels/DHT.c @@ -1,3 +1,4 @@ +/// Time-stamp: "Last modified 2023-06-28 15:58:19 mluebke" /* ** Copyright (C) 2017-2021 Max Luebke (University of Potsdam) ** @@ -15,10 +16,12 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include #include #include +#include #include #include #include @@ -52,7 +55,8 @@ static int read_flag(char flag_byte) { } DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, - unsigned int key_size, uint64_t (*hash_func)(int, const void *)) { + unsigned int key_size, + uint64_t (*hash_func)(int, const void *)) { DHT *object; MPI_Win window; void *mem_alloc; @@ -61,17 +65,20 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, // calculate how much bytes for the index are needed to address count of // buckets per process index_bytes = (int)ceil(log2(size)); - if (index_bytes % 8 != 0) index_bytes = index_bytes + (8 - (index_bytes % 8)); + if (index_bytes % 8 != 0) + index_bytes = index_bytes + (8 - (index_bytes % 8)); // allocate memory for dht-object object = (DHT *)malloc(sizeof(DHT)); - if (object == NULL) return NULL; + if (object == NULL) + return NULL; // every memory allocation has 1 additional byte for flags etc. if (MPI_Alloc_mem(size * (1 + data_size + key_size), MPI_INFO_NULL, &mem_alloc) != 0) return NULL; - if (MPI_Comm_size(comm, &comm_size) != 0) return NULL; + if (MPI_Comm_size(comm, &comm_size) != 0) + return NULL; // since MPI_Alloc_mem doesn't provide memory allocation with the memory set // to zero, we're doing this here @@ -104,7 +111,8 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, DHT_stats *stats; stats = (DHT_stats *)malloc(sizeof(DHT_stats)); - if (stats == NULL) return NULL; + if (stats == NULL) + return NULL; object->stats = stats; object->stats->writes_local = (int *)calloc(comm_size, sizeof(int)); @@ -118,7 +126,106 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, return object; } -int DHT_write(DHT *table, void *send_key, void *send_data) { +void DHT_set_accumulate_callback(DHT *table, + int (*callback_func)(int, void *, int, + void *)) { + table->accumulate_callback = callback_func; +} + +int DHT_write_accumulate(DHT *table, const void *send_key, int data_size, + void *send_data, uint32_t *proc, uint32_t *index, + int *callback_ret) { + unsigned int dest_rank, i; + int result = DHT_SUCCESS; + +#ifdef DHT_STATISTICS + table->stats->w_access++; +#endif + + // determine destination rank and index by hash of key + determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, + table->table_size, &dest_rank, table->index, + table->index_count); + + // concatenating key with data to write entry to DHT + set_flag((char *)table->send_entry); + memcpy((char *)table->send_entry + 1, (char *)send_key, table->key_size); + /* memcpy((char *)table->send_entry + table->key_size + 1, (char *)send_data, + */ + /* table->data_size); */ + + // locking window of target rank with exclusive lock + if (MPI_Win_lock(MPI_LOCK_EXCLUSIVE, dest_rank, 0, table->window) != 0) + return DHT_MPI_ERROR; + for (i = 0; i < table->index_count; i++) { + if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, + MPI_BYTE, dest_rank, table->index[i], + 1 + table->data_size + table->key_size, MPI_BYTE, + table->window) != 0) + return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + // increment eviction counter if receiving key doesn't match sending key + // entry has write flag and last index is reached. + if (read_flag(*(char *)table->recv_entry)) { + if (memcmp(send_key, (char *)table->recv_entry + 1, table->key_size) != + 0) { + if (i == (table->index_count) - 1) { + table->evictions += 1; +#ifdef DHT_STATISTICS + table->stats->evictions += 1; +#endif + result = DHT_WRITE_SUCCESS_WITH_EVICTION; + break; + } + } else + break; + } else { +#ifdef DHT_STATISTICS + table->stats->writes_local[dest_rank]++; +#endif + break; + } + } + + if (result == DHT_WRITE_SUCCESS_WITH_EVICTION) { + memset((char *)table->send_entry + 1 + table->key_size, '\0', + table->data_size); + } else { + memcpy((char *)table->send_entry + 1 + table->key_size, + (char *)table->recv_entry + 1 + table->key_size, table->data_size); + } + + *callback_ret = table->accumulate_callback( + data_size, (char *)send_data, table->data_size, + (char *)table->send_entry + 1 + table->key_size); + + // put data to DHT (with last selected index by value i) + if (*callback_ret == 0) { + if (MPI_Put(table->send_entry, 1 + table->data_size + table->key_size, + MPI_BYTE, dest_rank, table->index[i], + 1 + table->data_size + table->key_size, MPI_BYTE, + table->window) != 0) + return DHT_MPI_ERROR; + } + // unlock window of target rank + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + if (proc) { + *proc = dest_rank; + } + + if (index) { + *index = table->index[i]; + } + + return result; +} + +int DHT_write(DHT *table, void *send_key, void *send_data, uint32_t *proc, + uint32_t *index) { unsigned int dest_rank, i; int result = DHT_SUCCESS; @@ -146,7 +253,8 @@ int DHT_write(DHT *table, void *send_key, void *send_data) { 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR; - if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // increment eviction counter if receiving key doesn't match sending key // entry has write flag and last index is reached. @@ -178,12 +286,21 @@ int DHT_write(DHT *table, void *send_key, void *send_data) { table->window) != 0) return DHT_MPI_ERROR; // unlock window of target rank - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + if (proc) { + *proc = dest_rank; + } + + if (index) { + *index = table->index[i]; + } return result; } -int DHT_read(DHT *table, void *send_key, void *destination) { +int DHT_read(DHT *table, const void *send_key, void *destination) { unsigned int dest_rank, i; #ifdef DHT_STATISTICS @@ -205,7 +322,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR; - if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // increment read error counter if write flag isn't set ... if ((read_flag(*(char *)table->recv_entry)) == 0) { @@ -214,7 +332,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { table->stats->read_misses += 1; #endif // unlock window and return - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; return DHT_READ_MISS; } @@ -227,7 +346,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { table->stats->read_misses += 1; #endif // unlock window an return - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; return DHT_READ_MISS; } } else @@ -235,7 +355,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { } // unlock window of target rank - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // if matching key was found copy data into memory of passed pointer memcpy((char *)destination, (char *)table->recv_entry + table->key_size + 1, @@ -244,6 +365,34 @@ int DHT_read(DHT *table, void *send_key, void *destination) { return DHT_SUCCESS; } +int DHT_read_location(DHT *table, uint32_t proc, uint32_t index, + void *destination) { + const uint32_t bucket_size = table->data_size + table->key_size + 1; + +#ifdef DHT_STATISTICS + table->stats->r_access++; +#endif + + // locking window of target rank with shared lock + if (MPI_Win_lock(MPI_LOCK_SHARED, proc, 0, table->window) != 0) + return DHT_MPI_ERROR; + // receive data + if (MPI_Get(table->recv_entry, bucket_size, MPI_BYTE, proc, index, + bucket_size, MPI_BYTE, table->window) != 0) { + return DHT_MPI_ERROR; + } + + // unlock window of target rank + if (MPI_Win_unlock(proc, table->window) != 0) + return DHT_MPI_ERROR; + + // if matching key was found copy data into memory of passed pointer + memcpy((char *)destination, (char *)table->recv_entry + 1 + table->key_size, + table->data_size); + + return DHT_SUCCESS; +} + int DHT_to_file(DHT *table, const char *filename) { // open file MPI_File file; @@ -257,17 +406,15 @@ int DHT_to_file(DHT *table, const char *filename) { // write header (key_size and data_size) if (rank == 0) { - if (MPI_File_write(file, &table->key_size, 1, MPI_INT, MPI_STATUS_IGNORE) != - 0) + if (MPI_File_write_shared(file, &table->key_size, 1, MPI_INT, + MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR; - if (MPI_File_write(file, &table->data_size, 1, MPI_INT, - MPI_STATUS_IGNORE) != 0) + if (MPI_File_write_shared(file, &table->data_size, 1, MPI_INT, + MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR; } - // seek file pointer behind header for all processes - if (MPI_File_seek_shared(file, DHT_FILEHEADER_SIZE, MPI_SEEK_SET) != 0) - return DHT_FILE_IO_ERROR; + MPI_Barrier(table->communicator); char *ptr; int bucket_size = table->key_size + table->data_size + 1; @@ -283,8 +430,12 @@ int DHT_to_file(DHT *table, const char *filename) { return DHT_FILE_WRITE_ERROR; } } + + MPI_Barrier(table->communicator); + // close file - if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_close(&file) != 0) + return DHT_FILE_IO_ERROR; return DHT_SUCCESS; } @@ -303,7 +454,8 @@ int DHT_from_file(DHT *table, const char *filename) { return DHT_FILE_IO_ERROR; // get file size - if (MPI_File_get_size(file, &f_size) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_get_size(file, &f_size) != 0) + return DHT_FILE_IO_ERROR; MPI_Comm_rank(table->communicator, &rank); @@ -322,8 +474,10 @@ int DHT_from_file(DHT *table, const char *filename) { return DHT_FILE_READ_ERROR; // compare if written header data and key size matches current sizes - if (*(int *)buffer != table->key_size) return DHT_WRONG_FILE; - if (*(int *)(buffer + 4) != table->data_size) return DHT_WRONG_FILE; + if (*(int *)buffer != table->key_size) + return DHT_WRONG_FILE; + if (*(int *)(buffer + 4) != table->data_size) + return DHT_WRONG_FILE; // set offset for each process offset = bucket_size * table->comm_size; @@ -348,14 +502,16 @@ int DHT_from_file(DHT *table, const char *filename) { // extract key and data and write to DHT key = buffer; data = (buffer + table->key_size); - if (DHT_write(table, key, data) == DHT_MPI_ERROR) return DHT_MPI_ERROR; + if (DHT_write(table, key, data, NULL, NULL) == DHT_MPI_ERROR) + return DHT_MPI_ERROR; // increment current position cur_pos += offset; } free(buffer); - if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_close(&file) != 0) + return DHT_FILE_IO_ERROR; return DHT_SUCCESS; } @@ -377,8 +533,10 @@ int DHT_free(DHT *table, int *eviction_counter, int *readerror_counter) { return DHT_MPI_ERROR; *readerror_counter = buf; } - if (MPI_Win_free(&(table->window)) != 0) return DHT_MPI_ERROR; - if (MPI_Free_mem(table->mem_alloc) != 0) return DHT_MPI_ERROR; + if (MPI_Win_free(&(table->window)) != 0) + return DHT_MPI_ERROR; + if (MPI_Free_mem(table->mem_alloc) != 0) + return DHT_MPI_ERROR; free(table->recv_entry); free(table->send_entry); free(table->index); @@ -407,7 +565,8 @@ int DHT_print_statistics(DHT *table) { #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" // obtaining all values from all processes in the communicator - if (rank == 0) read_misses = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + read_misses = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->read_misses, 1, MPI_INT, read_misses, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -416,7 +575,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->read_misses = 0; - if (rank == 0) evictions = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + evictions = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->evictions, 1, MPI_INT, evictions, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -425,7 +585,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->evictions = 0; - if (rank == 0) w_access = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + w_access = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->w_access, 1, MPI_INT, w_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -434,7 +595,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->w_access = 0; - if (rank == 0) r_access = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + r_access = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->r_access, 1, MPI_INT, r_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -443,13 +605,14 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->r_access = 0; - if (rank == 0) written_buckets = (int *)calloc(table->comm_size, sizeof(int)); + if (rank == 0) + written_buckets = (int *)calloc(table->comm_size, sizeof(int)); if (MPI_Reduce(table->stats->writes_local, written_buckets, table->comm_size, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR; - if (rank == 0) { // only process with rank 0 will print out results as a - // table + if (rank == 0) { // only process with rank 0 will print out results as a + // table int sum_written_buckets = 0; for (int i = 0; i < table->comm_size; i++) { diff --git a/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp new file mode 100644 index 000000000..86df22d49 --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/DHT_Wrapper.cpp @@ -0,0 +1,300 @@ +// Time-stamp: "Last modified 2023-08-01 13:41:57 mluebke" + +/* +** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of +** Potsdam) +** +** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam) +** +** POET is free software; you can redistribute it and/or modify it under the +** terms of the GNU General Public License as published by the Free Software +** Foundation; either version 2 of the License, or (at your option) any later +** version. +** +** POET is distributed in the hope that it will be useful, but WITHOUT ANY +** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +** A PARTICULAR PURPOSE. See the GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along with +** this program; if not, write to the Free Software Foundation, Inc., 51 +** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + +#include "poet/DHT_Wrapper.hpp" +#include "poet/DHT_Types.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/Interpolation.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +namespace poet { + +DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, + const NamedVector &key_species, + const std::vector &key_indices, + uint32_t data_count) + : key_count(key_indices.size()), data_count(data_count), + input_key_elements(key_indices), communicator(dht_comm), + key_species(key_species) { + // initialize DHT object + // key size = count of key elements + timestep + uint32_t key_size = (key_count + 1) * sizeof(Lookup_Keyelement); + uint32_t data_size = + (data_count + input_key_elements.size()) * sizeof(double); + uint32_t buckets_per_process = + static_cast(dht_size / (data_size + key_size)); + dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, + &poet::Murmur2_64A); + + dht_signif_vector = key_species.getValues(); + + // this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); + + this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); + + const auto key_names = key_species.getNames(); + + auto tot_h = std::find(key_names.begin(), key_names.end(), "H"); + if (tot_h != key_names.end()) { + this->dht_prop_type_vector[tot_h - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto tot_o = std::find(key_names.begin(), key_names.end(), "O"); + if (tot_o != key_names.end()) { + this->dht_prop_type_vector[tot_o - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto charge = std::find(key_names.begin(), key_names.end(), "Charge"); + if (charge != key_names.end()) { + this->dht_prop_type_vector[charge - key_names.begin()] = DHT_TYPE_CHARGE; + } +} + +DHT_Wrapper::~DHT_Wrapper() { + // free DHT + DHT_free(dht_object, NULL, NULL); +} +auto DHT_Wrapper::checkDHT(int length, double dt, + const std::vector &work_package, + std::vector &curr_mapping) + -> const DHT_ResultObject & { + + dht_results.length = length; + dht_results.keys.resize(length); + dht_results.results.resize(length); + dht_results.needPhreeqc.resize(length); + + std::vector bucket_writer(this->data_count + + input_key_elements.size()); + std::vector new_mapping; + + // loop over every grid cell contained in work package + for (int i = 0; i < length; i++) { + // point to current grid cell + void *key = (void *)&(work_package[i * this->data_count]); + auto &data = dht_results.results[i]; + auto &key_vector = dht_results.keys[i]; + + // data.resize(this->data_count); + key_vector = fuzzForDHT(this->key_count, key, dt); + + // overwrite input with data from DHT, IF value is found in DHT + int res = + DHT_read(this->dht_object, key_vector.data(), bucket_writer.data()); + + switch (res) { + case DHT_SUCCESS: + dht_results.results[i] = inputAndRatesToOutput(bucket_writer); + dht_results.needPhreeqc[i] = false; + this->dht_hits++; + break; + case DHT_READ_MISS: + dht_results.needPhreeqc[i] = true; + new_mapping.push_back(curr_mapping[i]); + dht_results.results[i] = std::vector{ + &work_package[i * this->data_count], + &work_package[i * this->data_count] + this->data_count}; + + // HACK: apply normalization to total H and O in results field of DHT + // dht_results.results[i][0] -= base_totals[0]; + // dht_results.results[i][1] -= base_totals[1]; + break; + } + } + + curr_mapping = std::move(new_mapping); + dht_results.old_values = work_package; + + return dht_results; +} + +void DHT_Wrapper::fillDHT(int length, const std::vector &work_package) { + + // loop over every grid cell contained in work package + dht_results.locations.resize(length); + for (int i = 0; i < length; i++) { + // If true grid cell was simulated, needs to be inserted into dht + if (dht_results.needPhreeqc[i]) { + + // check if calcite or dolomite is absent and present, resp.n and vice + // versa in input/output. If this is the case -> Do not write to DHT! + // HACK: hardcoded, should be fixed! + if ((dht_results.old_values[i * this->data_count + 7] == 0) != + (work_package[i * this->data_count + 7] == 0)) { + dht_results.needPhreeqc[i] = false; + continue; + } + + if ((dht_results.old_values[i * this->data_count + 9] == 0) != + (work_package[i * this->data_count + 9] == 0)) { + dht_results.needPhreeqc[i] = false; + continue; + } + + uint32_t proc, index; + const auto &key = dht_results.keys[i]; + const auto curr_old_data = std::vector( + dht_results.old_values.begin() + (i * this->data_count), + dht_results.old_values.begin() + ((i + 1) * this->data_count)); + const auto curr_new_data = std::vector( + work_package.begin() + (i * this->data_count), + work_package.begin() + ((i + 1) * this->data_count)); + const auto data = outputToInputAndRates(curr_old_data, curr_new_data); + // void *data = (void *)&(work_package[i * this->data_count]); + // fuzz data (round, logarithm etc.) + + // insert simulated data with fuzzed key into DHT + int res = DHT_write(this->dht_object, (void *)(key.data()), + const_cast(data.data()), &proc, &index); + + dht_results.locations[i] = {proc, index}; + + // if data was successfully written ... + if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { + dht_evictions++; + } + } + } +} + +std::vector +DHT_Wrapper::outputToInputAndRates(const std::vector &old_results, + const std::vector &new_results) { + const int prefix_size = this->input_key_elements.size(); + + std::vector output(prefix_size + this->data_count); + std::copy(new_results.begin(), new_results.end(), + output.begin() + prefix_size); + + for (int i = 0; i < prefix_size; i++) { + const int data_elem_i = input_key_elements[i]; + output[i] = old_results[data_elem_i]; + output[prefix_size + data_elem_i] -= old_results[data_elem_i]; + } + + return output; +} + +std::vector +DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data) { + const int prefix_size = this->input_key_elements.size(); + + std::vector output{dht_data.begin() + prefix_size, dht_data.end()}; + + for (int i = 0; i < prefix_size; i++) { + const int data_elem_i = input_key_elements[i]; + output[data_elem_i] += dht_data[i]; + } + + return output; +} + +void DHT_Wrapper::resultsToWP(std::vector &work_package) { + for (int i = 0; i < dht_results.length; i++) { + if (!dht_results.needPhreeqc[i]) { + std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), + work_package.begin() + (data_count * i)); + } + } +} + +int DHT_Wrapper::tableToFile(const char *filename) { + int res = DHT_to_file(dht_object, filename); + return res; +} + +int DHT_Wrapper::fileToTable(const char *filename) { + int res = DHT_from_file(dht_object, filename); + if (res != DHT_SUCCESS) + return res; + +#ifdef DHT_STATISTICS + DHT_print_statistics(dht_object); +#endif + + return DHT_SUCCESS; +} + +void DHT_Wrapper::printStatistics() { + int res; + + res = DHT_print_statistics(dht_object); + + if (res != DHT_SUCCESS) { + // MPI ERROR ... WHAT TO DO NOW? + // RUNNING CIRCLES WHILE SCREAMING + } +} + +LookupKey DHT_Wrapper::fuzzForDHT(int var_count, void *key, double dt) { + const auto c_zero_val = std::pow(10, AQUEOUS_EXP); + + const Lookup_Keyelement dummy = {.0}; + LookupKey vecFuzz(var_count + 1, dummy); + DHT_Rounder rounder; + + int totals_i = 0; + // introduce fuzzing to allow more hits in DHT + // loop over every variable of grid cell + for (std::uint32_t i = 0; i < input_key_elements.size(); i++) { + if (input_key_elements[i] == DHT_KEY_INPUT_CUSTOM) { + continue; + } + double curr_key = ((double *)key)[input_key_elements[i]]; + if (curr_key != 0) { + if (curr_key < c_zero_val && + this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { + continue; + } + if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { + curr_key -= base_totals[totals_i++]; + } + vecFuzz[i] = + rounder.round(curr_key, dht_signif_vector[i], + this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); + } + } + // add timestep to the end of the key as double value + vecFuzz[var_count].fp_element = dt; + + return vecFuzz; +} + +void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) { + if (signif_vec.size() != this->key_count) { + throw std::runtime_error( + "Significant vector size mismatches count of key elements."); + } + + this->dht_signif_vector = signif_vec; +} +} // namespace poet diff --git a/src/ChemistryModule/HashFunctions.cpp b/src/ChemistryModule/SurrogateModels/HashFunctions.cpp similarity index 97% rename from src/ChemistryModule/HashFunctions.cpp rename to src/ChemistryModule/SurrogateModels/HashFunctions.cpp index b2739aea6..431cd286f 100644 --- a/src/ChemistryModule/HashFunctions.cpp +++ b/src/ChemistryModule/SurrogateModels/HashFunctions.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-04-24 16:56:18 mluebke" +// Time-stamp: "Last modified 2023-04-24 23:20:55 mluebke" /* **----------------------------------------------------------------------------- ** MurmurHash2 was written by Austin Appleby, and is placed in the public diff --git a/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp new file mode 100644 index 000000000..0d139ac3e --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/InterpolationModule.cpp @@ -0,0 +1,197 @@ +// Time-stamp: "Last modified 2023-08-01 17:54:53 mluebke" + +#include "poet/DHT_Wrapper.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/Interpolation.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +extern "C" { +#include "poet/DHT.h" +} + +namespace poet { + +InterpolationModule::InterpolationModule( + std::uint32_t entries_per_bucket, std::uint64_t size_per_process, + std::uint32_t min_entries_needed, DHT_Wrapper &dht, + const NamedVector &interp_key_signifs, + const std::vector &dht_key_indices) + : dht_instance(dht), key_signifs(interp_key_signifs), + key_indices(dht_key_indices), min_entries_needed(min_entries_needed) { + + initPHT(this->key_signifs.size(), entries_per_bucket, size_per_process, + dht.getCommunicator()); + + pht->setSourceDHT(dht.getDHT()); +} + +void InterpolationModule::initPHT(std::uint32_t key_count, + std::uint32_t entries_per_bucket, + std::uint32_t size_per_process, + MPI_Comm communicator) { + uint32_t key_size = key_count * sizeof(Lookup_Keyelement) + sizeof(double); + uint32_t data_size = sizeof(DHT_Location); + + pht = std::make_unique( + key_size, data_size, entries_per_bucket, size_per_process, communicator); +} + +void InterpolationModule::writePairs(const DHT_Wrapper::DHT_ResultObject &in) { + for (int i = 0; i < in.length; i++) { + if (in.needPhreeqc[i]) { + const auto coarse_key = roundKey(in.keys[i]); + pht->writeLocationToPHT(coarse_key, in.locations[i]); + } + } +} + +void InterpolationModule::tryInterpolation( + DHT_Wrapper::DHT_ResultObject &dht_results, + std::vector &curr_mapping) { + interp_result.status.resize(dht_results.length, NOT_NEEDED); + interp_result.results.resize(dht_results.length, {}); + + for (int i = 0; i < dht_results.length; i++) { + if (!dht_results.needPhreeqc[i]) { + interp_result.status[i] = NOT_NEEDED; + continue; + } + + auto pht_result = + pht->query(roundKey(dht_results.keys[i]), this->key_signifs.getValues(), + this->min_entries_needed, dht_instance.getInputCount(), + dht_instance.getOutputCount()); + + int pht_i = 0; + + while (pht_i < pht_result.size) { + if (pht_result.size < this->min_entries_needed) { + break; + } + + auto in_it = pht_result.in_values.begin() + pht_i; + auto out_it = pht_result.out_values.begin() + pht_i; + + bool same_sig_calcite = (pht_result.in_values[pht_i][7] == 0) == + (dht_results.results[i][7] == 0); + bool same_sig_dolomite = (pht_result.in_values[pht_i][8] == 0) == + (dht_results.results[i][9] == 0); + if (!same_sig_calcite || !same_sig_dolomite) { + pht_result.size -= 1; + pht_result.in_values.erase(in_it); + pht_result.out_values.erase(out_it); + continue; + } + + pht_i += 1; + } + + if (pht_result.size < this->min_entries_needed) { + interp_result.status[i] = INSUFFICIENT_DATA; + continue; + } + +#ifdef POET_PHT_ADD + this->pht->incrementReadCounter(roundKey(dht_results.keys[i])); +#endif + + double start_fc = MPI_Wtime(); + // mean water + // double mean_water = 0; + // for (int out_i = 0; out_i < pht_result.size; out_i++) { + // mean_water += pht_result.out_values[out_i][0]; + // } + // mean_water /= pht_result.size; + + auto calcMassBalance = [](const std::vector &input) { + double C, Ca, Mg; + C = input[3] + input[7] + 2 * input[9]; + Ca = input[4] + input[7] + input[9]; + Mg = input[6] + input[9]; + + return std::array{C, Ca, Mg}; + }; + + auto mass_in = calcMassBalance(dht_results.results[i]); + + const auto DHT_inputElements = dht_instance.getKeyElements(); + + // HACK: transform input elements to uint, as there shall no any user + // defined key species present yet + + // std::vector interp_inputElements; + // std::transform(DHT_inputElements.begin(), DHT_inputElements.end(), + // interp_inputElements.begin(), [](std::int32_t x) { + // if (x < 0) { + // x = 0; + // } + // return x; + // }); + + interp_result.results[i] = + f_interpolate(dht_instance.getKeyElements(), dht_results.results[i], + pht_result.in_values, pht_result.out_values); + + auto mass_out = calcMassBalance(interp_result.results[i]); + + double diff[3]; + + std::transform(mass_in.begin(), mass_in.end(), mass_out.begin(), diff, + std::minus<>()); + + bool exceeding = false; + for (const auto comp : diff) { + if (std::abs(comp) > 1e-10) { + exceeding = true; + break; + } + } + + if (exceeding) { + interp_result.status[i] = INSUFFICIENT_DATA; + continue; + } + + if (interp_result.results[i][7] < 0 || interp_result.results[i][9] < 0) { + interp_result.status[i] = INSUFFICIENT_DATA; + continue; + } + + // interp_result.results[i][0] = mean_water; + this->interp_t += MPI_Wtime() - start_fc; + + this->interpolations++; + + curr_mapping.erase(std::remove(curr_mapping.begin(), curr_mapping.end(), i), + curr_mapping.end()); + + dht_results.needPhreeqc[i] = false; + interp_result.status[i] = RES_OK; + } +} + +void InterpolationModule::resultsToWP(std::vector &work_package) const { + for (uint32_t i = 0; i < interp_result.status.size(); i++) { + if (interp_result.status[i] == RES_OK) { + const std::size_t length = + interp_result.results[i].end() - interp_result.results[i].begin(); + std::copy(interp_result.results[i].begin(), + interp_result.results[i].end(), + work_package.begin() + (length * i)); + } + } +} + +} // namespace poet diff --git a/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp new file mode 100644 index 000000000..e0f014755 --- /dev/null +++ b/src/ChemistryModule/SurrogateModels/ProximityHashTable.cpp @@ -0,0 +1,275 @@ +// Time-stamp: "Last modified 2023-08-01 17:11:42 mluebke" + +#include "poet/DHT_Wrapper.hpp" +#include "poet/HashFunctions.hpp" +#include "poet/Interpolation.hpp" +#include "poet/LookupKey.hpp" +#include "poet/Rounding.hpp" +#include +#include +#include +#include +#include +#include +#include +extern "C" { +#include "poet/DHT.h" +} + +namespace poet { + +ProximityHashTable::ProximityHashTable(uint32_t key_size, uint32_t data_size, + uint32_t entry_count, + uint32_t size_per_process, + MPI_Comm communicator_) + : communicator(communicator_) { + + data_size *= entry_count; + data_size += sizeof(bucket_indicator); + +#ifdef POET_PHT_ADD + data_size += sizeof(std::uint64_t); +#endif + + bucket_store = new char[data_size]; + + uint32_t buckets_per_process = + static_cast(size_per_process / (data_size + key_size)); + + this->prox_ht = DHT_create(communicator, buckets_per_process, data_size, + key_size, &poet::Murmur2_64A); + + DHT_set_accumulate_callback(this->prox_ht, PHT_callback_function); +} + +ProximityHashTable::~ProximityHashTable() { + delete[] bucket_store; + if (prox_ht) { + DHT_free(prox_ht, NULL, NULL); + } +} + +int ProximityHashTable::PHT_callback_function(int in_data_size, void *in_data, + int out_data_size, + void *out_data) { + const int max_elements_per_bucket = + static_cast((out_data_size - sizeof(bucket_indicator) +#ifdef POET_PHT_ADD + - sizeof(std::uint64_t) +#endif + ) / + in_data_size); + DHT_Location *input = reinterpret_cast(in_data); + + bucket_indicator *occupied_buckets = + reinterpret_cast(out_data); + DHT_Location *pairs = reinterpret_cast(occupied_buckets + 1); + + if (*occupied_buckets == max_elements_per_bucket) { + return INTERP_CB_FULL; + } + + for (bucket_indicator i = 0; i < *occupied_buckets; i++) { + if (pairs[i] == *input) { + return INTERP_CB_ALREADY_IN; + } + } + + pairs[(*occupied_buckets)++] = *input; + + return INTERP_CB_OK; +} + +void ProximityHashTable::writeLocationToPHT(LookupKey key, + DHT_Location location) { + + double start = MPI_Wtime(); + + // if (localCache[key].first) { + // return; + // } + + int ret_val; + + int status = DHT_write_accumulate(prox_ht, key.data(), sizeof(location), + &location, NULL, NULL, &ret_val); + + if (status == DHT_WRITE_SUCCESS_WITH_EVICTION) { + this->dht_evictions++; + } + + // if (ret_val == INTERP_CB_FULL) { + // localCache(key, {}); + // } + + this->pht_write_t += MPI_Wtime() - start; +} + +const ProximityHashTable::PHT_Result &ProximityHashTable::query( + const LookupKey &key, const std::vector &signif, + std::uint32_t min_entries_needed, std::uint32_t input_count, + std::uint32_t output_count) { + + double start_r = MPI_Wtime(); + const auto cache_ret = localCache[key]; + if (cache_ret.first) { + cache_hits++; + return (lookup_results = cache_ret.second); + } + + int res = DHT_read(prox_ht, key.data(), bucket_store); + this->pht_read_t += MPI_Wtime() - start_r; + + if (res != DHT_SUCCESS) { + this->lookup_results.size = 0; + return lookup_results; + } + + auto *bucket_element_count = + reinterpret_cast(bucket_store); + auto *bucket_elements = + reinterpret_cast(bucket_element_count + 1); + + if (*bucket_element_count < min_entries_needed) { + this->lookup_results.size = 0; + return lookup_results; + } + + lookup_results.size = *bucket_element_count; + auto locations = std::vector( + bucket_elements, bucket_elements + *(bucket_element_count)); + + lookup_results.in_values.clear(); + lookup_results.in_values.reserve(*bucket_element_count); + + lookup_results.out_values.clear(); + lookup_results.out_values.reserve(*bucket_element_count); + + for (const auto &loc : locations) { + double start_g = MPI_Wtime(); + DHT_read_location(source_dht, loc.first, loc.second, dht_buffer.data()); + this->pht_gather_dht_t += MPI_Wtime() - start_g; + + auto *buffer = reinterpret_cast(dht_buffer.data()); + + lookup_results.in_values.push_back( + std::vector(buffer, buffer + input_count)); + + buffer += input_count; + lookup_results.out_values.push_back( + std::vector(buffer, buffer + output_count)); + + // if (!similarityCheck(check_key, key, signif)) { + // // TODO: original stored location in PHT was overwritten in DHT. + // Need to + // // handle this! + // lookup_results.size--; + // if (lookup_results.size < min_entries_needed) { + // lookup_results.size = 0; + // break; + // } + // continue; + // } + + // auto input = convertKeysFromDHT(buffer_start, dht_key_count); + // // remove timestep from the key + // input.pop_back(); + // lookup_results.in_keys.push_back(input); + + // auto *data = reinterpret_cast(buffer + dht_key_count); + // lookup_results.out_values.push_back( + // std::vector(data, data + dht_data_count)); + } + + if (lookup_results.size != 0) { + localCache(key, lookup_results); + } + + return lookup_results; +} + +inline bool +ProximityHashTable::similarityCheck(const LookupKey &fine, + const LookupKey &coarse, + const std::vector &signif) { + + PHT_Rounder rounder; + + for (int i = 0; i < signif.size(); i++) { + if (!(rounder.round(fine[i], signif[i]) == coarse[i])) { + return false; + } + } + + return true; +} + +inline std::vector +ProximityHashTable::convertKeysFromDHT(Lookup_Keyelement *keys_in, + std::uint32_t key_size) { + std::vector output(key_size); + DHT_Rounder rounder; + for (int i = 0; i < key_size; i++) { + output[i] = rounder.convert(keys_in[i]); + } + + return output; +} + +void ProximityHashTable::Cache::operator()(const LookupKey &key, + const PHT_Result val) { + const auto elemIt = this->find(key); + + if (elemIt == this->end()) { + + if (this->free_mem < 0) { + const LookupKey &to_del = this->lru_queue.back(); + const auto elem_d = this->find(to_del); + this->free_mem += elem_d->second.getSize(); + this->erase(to_del); + this->keyfinder.erase(to_del); + this->lru_queue.pop_back(); + } + + this->insert({key, val}); + this->lru_queue.emplace_front(key); + this->keyfinder[key] = lru_queue.begin(); + this->free_mem -= val.getSize(); + return; + } + + elemIt->second = val; +} + +std::pair +ProximityHashTable::Cache::operator[](const LookupKey &key) { + const auto elemIt = this->find(key); + + if (elemIt == this->end()) { + return {false, {}}; + } + + this->lru_queue.splice(lru_queue.begin(), lru_queue, this->keyfinder[key]); + return {true, elemIt->second}; +} + +#ifdef POET_PHT_ADD +static int PHT_increment_counter(int in_data_size, void *in_data, + int out_data_size, void *out_data) { + char *start = reinterpret_cast(out_data); + std::uint64_t *counter = reinterpret_cast( + start + out_data_size - sizeof(std::uint64_t)); + *counter += 1; + + return 0; +} + +void ProximityHashTable::incrementReadCounter(const LookupKey &key) { + auto *old_func_ptr = this->prox_ht->accumulate_callback; + DHT_set_accumulate_callback(prox_ht, PHT_increment_counter); + int ret, dummy; + DHT_write_accumulate(prox_ht, key.data(), 0, NULL, NULL, NULL, &ret); + DHT_set_accumulate_callback(prox_ht, old_func_ptr); +} +#endif +} // namespace poet diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/ChemistryModule/WorkerFunctions.cpp index 4f5a59fa0..a87b07a86 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/ChemistryModule/WorkerFunctions.cpp @@ -1,10 +1,13 @@ -// Time-stamp: "Last modified 2023-07-12 12:56:17 mluebke" +// Time-stamp: "Last modified 2023-08-01 17:22:20 mluebke" #include "IrmResult.h" #include "poet/ChemistryModule.hpp" +#include "poet/DHT_Wrapper.hpp" +#include "poet/Interpolation.hpp" #include -#include +#include +#include #include #include #include @@ -15,6 +18,147 @@ #include #include +namespace poet { + +//std::vector +//inverseDistanceWeighting(const std::vector &to_calc, +// const std::vector &from, +// const std::vector> &input, +// const std::vector> &output) { +// std::vector results = from; +// +// const std::uint32_t buffer_size = input.size() + 1; +// double buffer[buffer_size]; +// double from_rescaled; +// +// const std::uint32_t data_set_n = input.size(); +// double rescaled[to_calc.size()][data_set_n + 1]; +// double weights[data_set_n]; +// +// // rescaling over all key elements +// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { +// const auto output_comp_i = to_calc[key_comp_i]; +// +// // rescale input between 0 and 1 +// for (int point_i = 0; point_i < data_set_n; point_i++) { +// rescaled[key_comp_i][point_i] = input[point_i][key_comp_i]; +// } +// +// rescaled[key_comp_i][data_set_n] = from[output_comp_i]; +// +// const double min = *std::min_element(rescaled[key_comp_i], +// rescaled[key_comp_i] + data_set_n + 1); +// const double max = *std::max_element(rescaled[key_comp_i], +// rescaled[key_comp_i] + data_set_n + 1); +// +// for (int point_i = 0; point_i < data_set_n; point_i++) { +// rescaled[key_comp_i][point_i] = +// ((max - min) != 0 +// ? (rescaled[key_comp_i][point_i] - min) / (max - min) +// : 0); +// } +// rescaled[key_comp_i][data_set_n] = +// ((max - min) != 0 ? (from[output_comp_i] - min) / (max - min) : 0); +// } +// +// // calculate distances for each data set +// double inv_sum = 0; +// for (int point_i = 0; point_i < data_set_n; point_i++) { +// double distance = 0; +// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { +// distance += std::pow( +// rescaled[key_comp_i][point_i] - rescaled[key_comp_i][data_set_n], 2); +// } +// weights[point_i] = 1 / std::sqrt(distance); +// assert(!std::isnan(weights[point_i])); +// inv_sum += weights[point_i]; +// } +// +// assert(!std::isnan(inv_sum)); +// +// // actual interpolation +// // bool has_h = false; +// // bool has_o = false; +// +// for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { +// const auto output_comp_i = to_calc[key_comp_i]; +// double key_delta = 0; +// +// // if (interp_i == 0) { +// // has_h = true; +// // } +// +// // if (interp_i == 1) { +// // has_o = true; +// // } +// +// for (int j = 0; j < data_set_n; j++) { +// key_delta += weights[j] * input[j][key_comp_i]; +// } +// +// key_delta /= inv_sum; +// +// results[output_comp_i] = from[output_comp_i] + key_delta; +// } +// +// // if (!has_h) { +// // double new_val = 0; +// // for (int j = 0; j < data_set_n; j++) { +// // new_val += weights[j] * output[j][0]; +// // } +// // results[0] = new_val / inv_sum; +// // } +// +// // if (!has_h) { +// // double new_val = 0; +// // for (int j = 0; j < data_set_n; j++) { +// // new_val += weights[j] * output[j][1]; +// // } +// // results[1] = new_val / inv_sum; +// // } +// +// // for (std::uint32_t i = 0; i < to_calc.size(); i++) { +// // const std::uint32_t interp_i = to_calc[i]; +// +// // // rescale input between 0 and 1 +// // for (int j = 0; j < input.size(); j++) { +// // buffer[j] = input[j].at(i); +// // } +// +// // buffer[buffer_size - 1] = from[interp_i]; +// +// // const double min = *std::min_element(buffer, buffer + buffer_size); +// // const double max = *std::max_element(buffer, buffer + buffer_size); +// +// // for (int j = 0; j < input.size(); j++) { +// // buffer[j] = ((max - min) != 0 ? (buffer[j] - min) / (max - min) : 1); +// // } +// // from_rescaled = +// // ((max - min) != 0 ? (from[interp_i] - min) / (max - min) : 0); +// +// // double inv_sum = 0; +// +// // // calculate distances for each point +// // for (int i = 0; i < input.size(); i++) { +// // const double distance = std::pow(buffer[i] - from_rescaled, 2); +// +// // buffer[i] = distance > 0 ? (1 / std::sqrt(distance)) : 0; +// // inv_sum += buffer[i]; +// // } +// // // calculate new values +// // double new_val = 0; +// // for (int i = 0; i < output.size(); i++) { +// // new_val += buffer[i] * output[i][interp_i]; +// // } +// // results[interp_i] = new_val / inv_sum; +// // if (std::isnan(results[interp_i])) { +// // std::cout << "nan with new_val = " << output[0][i] << std::endl; +// // } +// // } +// +// return results; +//} + inline std::string get_string(int root, MPI_Comm communicator) { int count; MPI_Bcast(&count, 1, MPI_INT, root, communicator); @@ -52,43 +196,6 @@ void poet::ChemistryModule::WorkerLoop() { initializeField(dummy); break; } - case CHEM_DHT_ENABLE: { - bool enable; - ChemBCast(&enable, 1, MPI_CXX_BOOL); - - uint32_t size_mb; - ChemBCast(&size_mb, 1, MPI_UINT32_T); - - std::vector name_dummy; - - SetDHTEnabled(enable, size_mb, name_dummy); - break; - } - case CHEM_DHT_SIGNIF_VEC: { - std::vector input_vec; - - SetDHTSignifVector(input_vec); - break; - } - case CHEM_DHT_PROP_TYPE_VEC: { - std::vector input_vec(this->prop_count); - ChemBCast(input_vec.data(), this->prop_count, MPI_UINT32_T); - - SetDHTPropTypeVector(input_vec); - break; - } - case CHEM_DHT_SNAPS: { - int type; - ChemBCast(&type, 1, MPI_INT); - - SetDHTSnaps(type, get_string(0, this->group_comm)); - - break; - } - case CHEM_DHT_READ_FILE: { - ReadDHTFile(get_string(0, this->group_comm)); - break; - } case CHEM_WORK_LOOP: { WorkerProcessPkgs(timings, iteration); break; @@ -103,12 +210,6 @@ void poet::ChemistryModule::WorkerLoop() { WorkerMetricsToMaster(type); break; } - case CHEM_PROGRESSBAR: { - bool enable; - ChemBCast(&enable, 1, MPI_CXX_BOOL); - setProgressBarPrintout(enable); - break; - } case CHEM_BREAK_MAIN_LOOP: { WorkerPostSim(iteration); loop = false; @@ -211,7 +312,9 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, dht->checkDHT(local_work_package_size, dt, vecCurrWP, vecMappingWP); dht_get_end = MPI_Wtime(); - // DHT_Results.ResultsToMapping(vecMappingWP); + if (interp_enabled) { + interp->tryInterpolation(dht->getDHTResults(), vecMappingWP); + } } phreeqc_time_start = MPI_Wtime(); @@ -225,6 +328,9 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, if (dht_enabled) { dht->resultsToWP(vecCurrWP); + if (interp_enabled) { + interp->resultsToWP(vecCurrWP); + } } /* send results to master */ @@ -238,6 +344,10 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, dht->fillDHT(local_work_package_size, vecCurrWP); dht_fill_end = MPI_Wtime(); + if (interp_enabled) { + interp->writePairs(dht->getDHTResults()); + } + timings.dht_get += dht_get_end - dht_get_start; timings.dht_fill += dht_fill_end - dht_fill_start; } @@ -252,23 +362,44 @@ void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, MPI_STATUS_IGNORE); if (this->dht_enabled) { - this->dht->printStatistics(); + dht_hits.push_back(dht->getHits()); + dht_evictions.push_back(dht->getEvictions()); + dht->resetCounter(); - if (this->dht_snaps_type == DHT_FILES_ITEREND) { + if (this->interp_enabled) { + interp_calls.push_back(interp->getInterpolationCount()); + interp->resetCounter(); + interp->writePHTStats(); + } + + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); + + if (this->interp_enabled) { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } } } } void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { - if (this->dht_enabled && this->dht_snaps_type == DHT_FILES_SIMEND) { + if (this->dht_enabled && this->dht_snaps_type == DHT_SNAPS_SIMEND) { WorkerWriteDHTDump(iteration); + if (this->interp_enabled) { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } } } void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { std::stringstream out; - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') << std::setw(3) - << iteration << ".dht"; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".dht"; int res = dht->tableToFile(out.str().c_str()); if (res != DHT_SUCCESS && this->comm_rank == 2) std::cerr @@ -355,6 +486,26 @@ void poet::ChemistryModule::WorkerPerfToMaster(int type, this->group_comm); break; } + case WORKER_IP_WRITE: { + double val = interp->getPHTWriteTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_READ: { + double val = interp->getPHTReadTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_GATHER: { + double val = interp->getDHTGatherTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_FC: { + double val = interp->getInterpolationTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } default: { throw std::runtime_error("Unknown perf type in master's message."); } @@ -362,24 +513,46 @@ void poet::ChemistryModule::WorkerPerfToMaster(int type, } void poet::ChemistryModule::WorkerMetricsToMaster(int type) { - uint32_t value; + MPI_Comm worker_comm = dht->getCommunicator(); + int worker_rank; + MPI_Comm_rank(worker_comm, &worker_rank); + + MPI_Comm &group_comm = this->group_comm; + + auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm]( + std::vector &send_buffer, int tag) { + std::vector to_master(send_buffer.size()); + MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(), + MPI_UINT32_T, MPI_SUM, 0, worker_comm); + + if (worker_rank == 0) { + MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag, + group_comm); + } + }; + switch (type) { case WORKER_DHT_HITS: { - value = dht->getHits(); - break; - } - case WORKER_DHT_MISS: { - value = dht->getMisses(); + reduce_and_send(dht_hits, WORKER_DHT_HITS); break; } case WORKER_DHT_EVICTIONS: { - value = dht->getEvictions(); + reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS); break; } + case WORKER_IP_CALLS: { + reduce_and_send(interp_calls, WORKER_IP_CALLS); + return; + } + case WORKER_PHT_CACHE_HITS: { + std::vector input = this->interp->getPHTLocalCacheHits(); + reduce_and_send(input, WORKER_PHT_CACHE_HITS); + return; + } default: { throw std::runtime_error("Unknown perf type in master's message."); } } - MPI_Gather(&value, 1, MPI_UINT32_T, NULL, 1, MPI_UINT32_T, 0, - this->group_comm); } + +} // namespace poet diff --git a/src/SimParams.cpp b/src/SimParams.cpp index 8d484a54b..80c0015b5 100644 --- a/src/SimParams.cpp +++ b/src/SimParams.cpp @@ -82,20 +82,20 @@ poet::DiffusionParams::s_DiffusionParams(RInside &R) { Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_index")); } -poet::ChemistryParams::s_ChemistryParams(RInside &R) { +void poet::ChemistryParams::initFromR(RInsidePOET &R) { this->database_path = Rcpp::as(R.parseEval("mysetup$chemistry$database")); this->input_script = Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); - if (Rcpp::as( - R.parseEval("'dht_species' %in% names(mysetup$chemistry)"))) { - this->dht_species = Rcpp::as>( - R.parseEval("mysetup$chemistry$dht_species")); + + if (R.checkIfExists("dht_species", "mysetup$chemistry")) { + this->dht_signifs = + R.wrapNamedVector("mysetup$chemistry$dht_species"); } - if (Rcpp::as( - R.parseEval("'dht_signif' %in% names(mysetup$chemistry)"))) { - this->dht_signif = Rcpp::as>( - R.parseEval("mysetup$chemistry$dht_signif")); + + if (R.checkIfExists("pht_species", "mysetup$chemistry")) { + this->pht_signifs = + R.wrapNamedVector("mysetup$chemistry$pht_species"); } } @@ -104,7 +104,7 @@ SimParams::SimParams(int world_rank_, int world_size_) { this->simparams.world_size = world_size_; } -int SimParams::parseFromCmdl(char *argv[], RInside &R) { +int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { // initialize argh object argh::parser cmdl(argv); @@ -144,32 +144,26 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { simparams.print_progressbar = cmdl[{"P", "progress"}]; /*Parse DHT arguments*/ - simparams.dht_enabled = cmdl["dht"]; + chem_params.use_dht = cmdl["dht"]; // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; - if (simparams.dht_enabled) { - cmdl("dht-strategy", 0) >> simparams.dht_strategy; - // cout << "CPP: DHT strategy is " << dht_strategy << endl; - - cmdl("dht-signif", 5) >> simparams.dht_significant_digits; - // cout << "CPP: DHT significant digits = " << dht_significant_digits << - // endl; - - simparams.dht_log = !(cmdl["dht-nolog"]); - // cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" - // : "OFF" ) << endl; - - cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> simparams.dht_size_per_process; + if (chem_params.use_dht) { + cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> chem_params.dht_size; // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << // endl; - cmdl("dht-snaps", 0) >> simparams.dht_snaps; + cmdl("dht-snaps", 0) >> chem_params.dht_snaps; - cmdl("dht-file") >> dht_file; + cmdl("dht-file") >> chem_params.dht_file; } /*Parse work package size*/ cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; + chem_params.use_interp = cmdl["interp"]; + cmdl("interp-size", 100) >> chem_params.pht_size; + cmdl("interp-min", 5) >> chem_params.interp_min_entries; + cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; + /*Parse output options*/ simparams.store_result = !cmdl["ignore-result"]; @@ -177,25 +171,27 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { cout << "CPP: Complete results storage is " << (simparams.store_result ? "ON" : "OFF") << endl; cout << "CPP: Work Package Size: " << simparams.wp_size << endl; - cout << "CPP: DHT is " << (simparams.dht_enabled ? "ON" : "OFF") << '\n'; + cout << "CPP: DHT is " << (chem_params.use_dht ? "ON" : "OFF") << '\n'; - if (simparams.dht_enabled) { + if (chem_params.use_dht) { cout << "CPP: DHT strategy is " << simparams.dht_strategy << endl; cout << "CPP: DHT key default digits (ignored if 'signif_vector' is " "defined) = " << simparams.dht_significant_digits << endl; cout << "CPP: DHT logarithm before rounding: " << (simparams.dht_log ? "ON" : "OFF") << endl; - cout << "CPP: DHT size per process (Byte) = " - << simparams.dht_size_per_process << endl; - cout << "CPP: DHT save snapshots is " << simparams.dht_snaps << endl; - cout << "CPP: DHT load file is " << dht_file << endl; + cout << "CPP: DHT size per process (Byte) = " << chem_params.dht_size + << endl; + cout << "CPP: DHT save snapshots is " << chem_params.dht_snaps << endl; + cout << "CPP: DHT load file is " << chem_params.dht_file << endl; } } cmdl(1) >> filesim; cmdl(2) >> out_dir; + chem_params.dht_outdir = out_dir; + /* distribute information to R runtime */ // if local_rank == 0 then master else worker R["local_rank"] = simparams.world_rank; @@ -210,65 +206,20 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { // work package size R["work_package_size"] = simparams.wp_size; // dht enabled? - R["dht_enabled"] = simparams.dht_enabled; + R["dht_enabled"] = chem_params.use_dht; // log before rounding? R["dht_log"] = simparams.dht_log; // eval the init string, ignoring any returns R.parseEvalQ("source(filesim)"); + R.parseEvalQ("mysetup <- setup"); + + this->chem_params.initFromR(R); return poet::PARSER_OK; } -void SimParams::initVectorParams(RInside &R) { - if (simparams.dht_enabled) { - /*Load significance vector from R setup file (or set default)*/ - bool signif_vector_exists = R.parseEval("exists('signif_vector')"); - if (signif_vector_exists) { - dht_signif_vector = as>(R["signif_vector"]); - } - - /*Load property type vector from R setup file (or set default)*/ - bool prop_type_vector_exists = R.parseEval("exists('prop_type')"); - if (prop_type_vector_exists) { - std::vector prop_type_R = - as>(R["prop_type"]); - this->dht_prop_type_vector.clear(); - this->dht_prop_type_vector.reserve(prop_type_R.size()); - - for (const auto &type : prop_type_R) { - if (type == "act") { - this->dht_prop_type_vector.push_back(DHT_TYPE_CHARGE); - continue; - } - - if (type == "ignore") { - this->dht_prop_type_vector.push_back(DHT_TYPE_IGNORE); - continue; - } - - this->dht_prop_type_vector.push_back(DHT_TYPE_DEFAULT); - } - } - - if (simparams.world_rank == 0) { - // MDL: new output on signif_vector and prop_type - if (signif_vector_exists) { - cout << "CPP: using problem-specific rounding digits: " << endl; - R.parseEval("print(data.frame(prop=prop, type=prop_type, " - "digits=signif_vector))"); - } else { - cout << "CPP: using DHT default rounding digits = " - << simparams.dht_significant_digits << endl; - } - - // MDL: pass to R the DHT stuff. These variables exist - // only if dht_enabled is true - R["dht_final_signif"] = dht_signif_vector; - R["dht_final_proptype"] = dht_prop_type_vector; - } - } -} +void SimParams::initVectorParams(RInside &R) {} std::list SimParams::validateOptions(argh::parser cmdl) { /* store all unknown parameters here */ diff --git a/test/testRounding.cpp b/test/testRounding.cpp new file mode 100644 index 000000000..06deef193 --- /dev/null +++ b/test/testRounding.cpp @@ -0,0 +1,79 @@ +#include + +#include "poet/Rounding.hpp" + +TEST_CASE("Rounding") { + constexpr int signif = 3; + + poet::DHT_Rounder rounder; + + SUBCASE("+exp - +sig") { + double input = 1.2345; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("+exp - -sig") { + double input = -1.2345; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - +sig") { + double input = 1.23456789E-5; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, -5); + } + + SUBCASE("-exp - -sig") { + double input = -1.23456789E-5; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, -5); + } + + SUBCASE("-exp - +sig (exceeding aqueous minimum)") { + double input = 1.23456789E-14; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 0); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - -sig (exceeding aqueous minimum)") { + double input = -1.23456789E-14; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 0); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - +sig (diff exceeds aqueous minimum)") { + double input = 1.23456789E-13; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 1); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - -sig (diff exceeds aqueous minimum)") { + double input = -1.23456789E-13; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -1); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - +sig (diff exceeds aqueous minimum - but H or O)") { + double input = 1.23456789E-13; + auto rounded = rounder.round(input, signif, true); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - -sig (diff exceeds aqueous minimum - but H or O)") { + double input = -1.23456789E-13; + auto rounded = rounder.round(input, signif, true); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, -13); + } +} diff --git a/util/data_evaluation/RFun_Eval.R b/util/data_evaluation/RFun_Eval.R index ed0068689..8243b8c05 100644 --- a/util/data_evaluation/RFun_Eval.R +++ b/util/data_evaluation/RFun_Eval.R @@ -1,10 +1,11 @@ ## Simple library of functions to assess and visualize the results of the coupled simulations -## Time-stamp: "Last modified 2023-04-24 16:09:55 mluebke" +## Time-stamp: "Last modified 2023-05-29 13:51:21 mluebke" require(RedModRphree) require(Rmufits) ## essentially for PlotCartCellData require(Rcpp) +require(stringr) curdir <- dirname(sys.frame(1)$ofile) ##path.expand(".") @@ -16,13 +17,18 @@ ConvertDHTKey <- function(value) { rcpp_key_convert(value) } +ConvertToUInt64 <- function(double_data) { + rcpp_uint64_convert(double_data) +} + ## function which reads all simulation results in a given directory ReadRTSims <- function(dir) { files_full <- list.files(dir, pattern="iter.*rds", full.names=TRUE) files_name <- list.files(dir, pattern="iter.*rds", full.names=FALSE) res <- lapply(files_full, readRDS) names(res) <- gsub(".rds","",files_name, fixed=TRUE) - return(res) + + return(res[str_sort(names(res), numeric = TRUE)]) } ## function which reads all successive DHT stored in a given directory @@ -68,6 +74,61 @@ ReadDHT <- function(file, new_scheme = TRUE) { return(res) } +## function which reads all successive DHT stored in a given directory +ReadAllPHT <- function(dir, with_info = FALSE) { + files_full <- list.files(dir, pattern="iter.*pht", full.names=TRUE) + files_name <- list.files(dir, pattern="iter.*pht", full.names=FALSE) + res <- lapply(files_full, ReadPHT, with_info = with_info) + names(res) <- gsub(".pht","",files_name, fixed=TRUE) + return(res) +} + +## function which reads one .dht file and gives a matrix +ReadPHT <- function(file, with_info = FALSE) { + conn <- file(file, "rb") ## open for reading in binary mode + if (!isSeekable(conn)) + stop("Connection not seekable") + + ## we first reposition ourselves to the end of the file... + tmp <- seek(conn, where=0, origin = "end") + ## ... and then back to the origin so to store the length in bytes + flen <- seek(conn, where=0, origin = "start") + + ## we read the first 2 integers (4 bytes each) containing dimensions in bytes + dims <- readBin(conn, what="integer", n=2) + + ## compute dimensions of the data + tots <- sum(dims) + ncol <- tots/8 + nrow <- (flen - 8)/tots ## 8 here is 2*sizeof("int") + buff <- readBin(conn, what="double", n=ncol*nrow) + ## close connection + close(conn) + res <- matrix(buff, nrow=nrow, ncol=ncol, byrow=TRUE) + + nkeys <- dims[1] / 8 + keys <- res[, 1:nkeys - 1] + + timesteps <- res[, nkeys] + conv <- apply(keys, 2, ConvertDHTKey) + #res[, 1:nkeys - 1] <- conv + + ndata <- dims[2] / 8 + fill_rate <- ConvertToUInt64(res[, nkeys + 1]) + + buff <- c(conv, timesteps, fill_rate) + + if (with_info) { + ndata <- dims[2]/8 + visit_count <- ConvertToUInt64(res[, nkeys + ndata]) + buff <- c(buff, visit_count) + } + + res <- matrix(buff, nrow = nrow, byrow = FALSE) + + return(res) +} + ## Scatter plots of each variable in the iteration PlotScatter <- function(sam1, sam2, which=NULL, labs=c("NO DHT", "DHT"), pch=".", cols=3, ...) { if ((!is.data.frame(sam1)) & ("T" %in% names(sam1))) @@ -228,3 +289,33 @@ Plot2DCellData <- function (data, grid, nx, ny, contour = TRUE, } invisible(pp) } + +PlotAsMP4 <- function(data, nx, ny, to_plot, out_dir, name, + contour = FALSE, scale = FALSE, framerate = 30) { + sort_data <- data[str_sort(names(data), numeric = TRUE)] + plot_data <- lapply(sort_data, function(x) x$C[[to_plot]]) + pad_size <- ceiling(log10(length(plot_data))) + + dir.create(out_dir, showWarnings = FALSE) + output_files <- paste0(out_dir, "/", name, "_%0", pad_size, "d.png") + output_mp4 <- paste0(out_dir, "/", name, ".mp4") + + png(output_files, + width = 297, height = 210, units = "mm", + res = 100 + ) + + for (i in 1:length(plot_data)) { + Rmufits::PlotCartCellData(plot_data[[i]], nx = nx, ny = ny, contour = contour, scale = scale) + } + dev.off() + + ffmpeg_command <- paste( + "ffmpeg -framerate", framerate, "-i", output_files, + "-c:v libx264 -crf 22", output_mp4 + ) + + unlink(output_mp4) + system(ffmpeg_command) + +} diff --git a/util/data_evaluation/interpret_keys.cpp b/util/data_evaluation/interpret_keys.cpp index 777f95d1d..26419076e 100644 --- a/util/data_evaluation/interpret_keys.cpp +++ b/util/data_evaluation/interpret_keys.cpp @@ -35,3 +35,16 @@ std::vector rcpp_key_convert(std::vector input) { return output; } + +// [[Rcpp::export]] +std::vector rcpp_uint64_convert(std::vector input) { + std::vector output; + output.reserve(input.size()); + + for (double &value : input) { + std::uint64_t *as_int = reinterpret_cast(&value); + output.push_back(*as_int); + } + + return output; +}