From 18725bf87db2ad24e4133a6e53f6d8ed5f62a837 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Thu, 12 Dec 2024 15:24:57 +0100 Subject: [PATCH 1/5] Added qs2 as new default format --- R_lib/kin_r_library.R | 137 ++++++++++++++++++++++-------------------- bench/CMakeLists.txt | 2 +- src/initializer.cpp | 16 ++++- src/poet.cpp | 13 ++-- src/poet.hpp.in | 4 +- 5 files changed, 97 insertions(+), 75 deletions(-) diff --git a/R_lib/kin_r_library.R b/R_lib/kin_r_library.R index cb5552a9b..50497b958 100644 --- a/R_lib/kin_r_library.R +++ b/R_lib/kin_r_library.R @@ -109,69 +109,6 @@ msgm <- function(...) { } -## Function called by master R process to store on disk all relevant -## parameters for the simulation -StoreSetup <- function(setup, filesim, out_dir) { - to_store <- vector(mode = "list", length = 4) - ## names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT") - names(to_store) <- c("Sim", "Transport", "DHT", "Cmdline") - - ## read the setup R file, which is sourced in kin.cpp - tmpbuff <- file(filesim, "r") - setupfile <- readLines(tmpbuff) - close.connection(tmpbuff) - - to_store$Sim <- setupfile - - ## to_store$Flow <- list( - ## snapshots = setup$snapshots, - ## gridfile = setup$gridfile, - ## phase = setup$phase, - ## density = setup$density, - ## dt_differ = setup$dt_differ, - ## prolong = setup$prolong, - ## maxiter = setup$maxiter, - ## saved_iter = setup$iter_output, - ## out_save = setup$out_save ) - - to_store$Transport <- setup$diffusion - - ## to_store$Chemistry <- list( - ## nprocs = n_procs, - ## wp_size = work_package_size, - ## base = setup$base, - ## first = setup$first, - ## init = setup$initsim, - ## db = db, - ## kin = setup$kin, - ## ann = setup$ann) - - if (dht_enabled) { - to_store$DHT <- list( - enabled = dht_enabled, - log = dht_log - ## signif = dht_final_signif, - ## proptype = dht_final_proptype - ) - } else { - to_store$DHT <- FALSE - } - - if (dht_enabled) { - to_store$DHT <- list( - enabled = dht_enabled, - log = dht_log - # signif = dht_final_signif, - # proptype = dht_final_proptype - ) - } else { - to_store$DHT <- FALSE - } - - saveRDS(to_store, file = paste0(fileout, "/setup.rds")) - msgm("initialization stored in ", paste0(fileout, "/setup.rds")) -} - GetWorkPackageSizesVector <- function(n_packages, package_size, len) { ids <- rep(1:n_packages, times = package_size, each = 1)[1:len] return(as.integer(table(ids))) @@ -179,7 +116,7 @@ GetWorkPackageSizesVector <- function(n_packages, package_size, len) { ## Handler to read R objs from binary files using either builtin -## readRDS() or qs::qread() based on file extension +## readRDS(), qs::qread() or qs2::qs_read() based on file extension ReadRObj <- function(path) { ## code borrowed from tools::file_ext() pos <- regexpr("\\.([[:alnum:]]+)$", path) @@ -187,7 +124,8 @@ ReadRObj <- function(path) { switch(extension, rds = readRDS(path), - qs = qs::qread(path) + qs = qs::qread(path), + qs2 = qs2::qs_read(path) ) } @@ -201,6 +139,73 @@ SaveRObj <- function(x, path) { switch(extension, rds = saveRDS(object = x, file = path), - qs = qs::qsave(x = x, file = path) + qs = qs::qsave(x = x, file = path), + qs2 = qs2::qs_save(object = x, file = path) ) } + + +######## Old relic code + +## ## Function called by master R process to store on disk all relevant +## ## parameters for the simulation +## StoreSetup <- function(setup, filesim, out_dir) { +## to_store <- vector(mode = "list", length = 4) +## ## names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT") +## names(to_store) <- c("Sim", "Transport", "DHT", "Cmdline") + +## ## read the setup R file, which is sourced in kin.cpp +## tmpbuff <- file(filesim, "r") +## setupfile <- readLines(tmpbuff) +## close.connection(tmpbuff) + +## to_store$Sim <- setupfile + +## ## to_store$Flow <- list( +## ## snapshots = setup$snapshots, +## ## gridfile = setup$gridfile, +## ## phase = setup$phase, +## ## density = setup$density, +## ## dt_differ = setup$dt_differ, +## ## prolong = setup$prolong, +## ## maxiter = setup$maxiter, +## ## saved_iter = setup$iter_output, +## ## out_save = setup$out_save ) + +## to_store$Transport <- setup$diffusion + +## ## to_store$Chemistry <- list( +## ## nprocs = n_procs, +## ## wp_size = work_package_size, +## ## base = setup$base, +## ## first = setup$first, +## ## init = setup$initsim, +## ## db = db, +## ## kin = setup$kin, +## ## ann = setup$ann) + +## if (dht_enabled) { +## to_store$DHT <- list( +## enabled = dht_enabled, +## log = dht_log +## ## signif = dht_final_signif, +## ## proptype = dht_final_proptype +## ) +## } else { +## to_store$DHT <- FALSE +## } + +## if (dht_enabled) { +## to_store$DHT <- list( +## enabled = dht_enabled, +## log = dht_log +## # signif = dht_final_signif, +## # proptype = dht_final_proptype +## ) +## } else { +## to_store$DHT <- FALSE +## } + +## saveRDS(to_store, file = paste0(fileout, "/setup.rds")) +## msgm("initialization stored in ", paste0(fileout, "/setup.rds")) +## } diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index 01dc43caf..794774c4f 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -7,7 +7,7 @@ function(ADD_BENCH_TARGET TARGET POET_BENCH_LIST RT_FILES OUT_PATH) foreach(BENCH_FILE ${${POET_BENCH_LIST}}) get_filename_component(BENCH_NAME ${BENCH_FILE} NAME_WE) set(OUT_FILE ${CMAKE_CURRENT_BINARY_DIR}/${BENCH_NAME}) - set(OUT_FILE_EXT ${OUT_FILE}.qs) + set(OUT_FILE_EXT ${OUT_FILE}.qs2) add_custom_command( OUTPUT ${OUT_FILE_EXT} diff --git a/src/initializer.cpp b/src/initializer.cpp index d2e663931..d613b83be 100644 --- a/src/initializer.cpp +++ b/src/initializer.cpp @@ -35,7 +35,11 @@ int main(int argc, char **argv) { ->default_val(false); bool asRDS; - app.add_flag("-r, --rds", asRDS, "Save output as .rds file instead of .qs") + app.add_flag("-r, --rds", asRDS, "Save output as .rds") + ->default_val(false); + + bool asQS; + app.add_flag("-q, --qs", asQS, "Save output as .qs") ->default_val(false); CLI11_PARSE(app, argc, argv); @@ -69,8 +73,14 @@ int main(int argc, char **argv) { } // append the correct file extension - output_file += asRDS ? ".rds" : ".qs"; - + if (asRDS) { + output_file += ".rds"; + } else if (asQS) { + output_file += ".qs"; + } else { + output_file += ".qs2"; + } + // set working directory to the directory of the input script if (setwd) { const std::string dir_path = Rcpp::as( diff --git a/src/poet.cpp b/src/poet.cpp index 9fbf94c18..d4f27525f 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -57,7 +57,7 @@ static std::unique_ptr global_rt_setup; // before the R runtime is initialized static poet::DEFunc master_init_R; static poet::DEFunc master_iteration_end_R; -static poet::DEFunc store_setup_R; +// MDL: unused -> static poet::DEFunc store_setup_R; static poet::DEFunc ReadRObj_R; static poet::DEFunc SaveRObj_R; static poet::DEFunc source_R; @@ -66,7 +66,7 @@ static void init_global_functions(RInside &R) { R.parseEval(kin_r_library); master_init_R = DEFunc("master_init"); master_iteration_end_R = DEFunc("master_iteration_end"); - store_setup_R = DEFunc("StoreSetup"); + // MDL: unused -> store_setup_R = DEFunc("StoreSetup"); source_R = DEFunc("source"); ReadRObj_R = DEFunc("ReadRObj"); SaveRObj_R = DEFunc("SaveRObj"); @@ -146,8 +146,11 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { "Enable AI surrogate for chemistry module"); app.add_flag("--rds", params.as_rds, - "Save output as .rds file instead of .qs"); + "Save output as .rds file instead of default .qs2"); + app.add_flag("--qs", params.as_qs, + "Save output as .qs file instead of default .qs2"); + std::string init_file; std::string runtime_file; @@ -174,7 +177,9 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) { } // set the output extension - params.out_ext = params.as_rds ? "rds" : "qs"; + params.out_ext = "qs2"; + if (params.as_rds) params.out_ext = "rds"; + if (params.as_qs) params.out_ext = "qs"; if (MY_RANK == 0) { // MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); diff --git a/src/poet.hpp.in b/src/poet.hpp.in index 0e2409f87..c48a0f3df 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -45,8 +45,10 @@ struct RuntimeParameters { Rcpp::List init_params; + // MDL added to accomodate for qs::qsave/qread bool as_rds = false; - std::string out_ext; // MDL added to accomodate for qs::qsave/qread + bool as_qs = false; + std::string out_ext; bool print_progress = false; From a714211560fa2f68c4bc022ca7a24ee377631adb Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Thu, 12 Dec 2024 16:20:09 +0100 Subject: [PATCH 2/5] Less and more informative stdout messages --- R_lib/kin_r_library.R | 4 ++-- src/Transport/DiffusionModule.cpp | 2 +- src/poet.cpp | 30 +++++++++++++++++------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/R_lib/kin_r_library.R b/R_lib/kin_r_library.R index 50497b958..97fd49e16 100644 --- a/R_lib/kin_r_library.R +++ b/R_lib/kin_r_library.R @@ -94,7 +94,7 @@ master_iteration_end <- function(setup, state_T, state_C) { ## Add last time step to simulation time setup$simulation_time <- setup$simulation_time + setup$timesteps[iter] - msgm("done iteration", iter, "/", length(setup$timesteps)) + ## msgm("done iteration", iter, "/", length(setup$timesteps)) setup$iter <- setup$iter + 1 return(setup) } @@ -132,7 +132,7 @@ ReadRObj <- function(path) { ## Handler to store R objs to binary files using either builtin ## saveRDS() or qs::qsave() based on file extension SaveRObj <- function(x, path) { - msgm("Storing to", path) + ## msgm("Storing to", path) ## code borrowed from tools::file_ext() pos <- regexpr("\\.([[:alnum:]]+)$", path) extension <- ifelse(pos > -1L, substring(path, pos + 1L), "") diff --git a/src/Transport/DiffusionModule.cpp b/src/Transport/DiffusionModule.cpp index 2ea80564a..754d135c9 100644 --- a/src/Transport/DiffusionModule.cpp +++ b/src/Transport/DiffusionModule.cpp @@ -70,7 +70,7 @@ VecToMatrix(const std::vector &vec, std::uint32_t n_rows, // static constexpr double ZERO_MULTIPLICATOR = 10E-14; void DiffusionModule::simulate(double requested_dt) { - MSG("Starting diffusion ..."); + // MSG("Starting diffusion ..."); const auto start_diffusion_t = std::chrono::high_resolution_clock::now(); const auto &n_rows = this->param_list.n_rows; diff --git a/src/poet.cpp b/src/poet.cpp index d4f27525f..93a168496 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -39,13 +39,14 @@ #include #include #include +#include #include #include #include -using namespace std; +// using namespace std; using namespace poet; using namespace Rcpp; @@ -292,18 +293,19 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, const double &dt = params.timesteps[iter - 1]; - // cout << "CPP: Next time step is " << dt << "[s]" << endl; - MSG("Next time step is " + std::to_string(dt) + " [s]"); - + std::cout << std::endl; /* displaying iteration number, with C++ and R iterator */ - MSG("Going through iteration " + std::to_string(iter)); + MSG("Going through iteration " + std::to_string(iter) + "/" + + std::to_string(maxiter)); + + MSG("Current time step is " + std::format("{:.2f}", dt)); /* run transport */ diffusion.simulate(dt); chem.getField().update(diffusion.getField()); - MSG("Chemistry step"); + // MSG("Chemistry start"); if (params.use_ai_surrogate) { double ai_start_t = MPI_Wtime(); // Save current values from the tug field as predictor for the ai step @@ -319,16 +321,16 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, R.parseEval("predictors_scaled <- preprocess(predictors)"); // Predict - MSG("AI Predict"); + MSG("AI Prediction"); R.parseEval( "aipreds_scaled <- prediction_step(model, predictors_scaled)"); // Apply postprocessing - MSG("AI Postprocesing"); + MSG("AI Postprocessing"); R.parseEval("aipreds <- postprocess(aipreds_scaled)"); // Validate prediction and write valid predictions to chem field - MSG("AI Validate"); + MSG("AI Validation"); R.parseEval( "validity_vector <- validate_predictions(predictors, aipreds)"); @@ -338,8 +340,8 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, MSG("AI TempField"); std::vector> RTempField = R.parseEval("set_valid_predictions(predictors,\ - aipreds,\ - validity_vector)"); + aipreds,\ + validity_vector)"); MSG("AI Set Field"); Field predictions_field = @@ -390,9 +392,11 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); - MSG(); + // MSG(); } // END SIMULATION LOOP + std::cout << std::endl; + Rcpp::List chem_profiling; chem_profiling["simtime"] = chem.GetChemistryTime(); chem_profiling["loop"] = chem.GetMasterLoopTime(); @@ -588,7 +592,7 @@ int main(int argc, char *argv[]) { R["setup"] = *global_rt_setup; R["setup$out_ext"] = run_params.out_ext; - string r_vis_code; + std::string r_vis_code; r_vis_code = "SaveRObj(x = profiling, path = paste0(out_dir, " "'/timings.', setup$out_ext));"; R.parseEval(r_vis_code); From ce5db736044014cee23810e8d3c12e2ca3311173 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Thu, 12 Dec 2024 16:42:44 +0100 Subject: [PATCH 3/5] Update README: qs2 as default output format, gfz.de everywhere --- README.md | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index d71766ecd..cc4e643cf 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ original MPI-based Distributed Hash Table. ## Parsed code documentiation A parsed version of POET's documentation can be found at [Gitlab -pages](https://naaice.git-pages.gfz-potsdam.de/poet). +pages](https://naaice.git-pages.gfz.de/poet). ## External Libraries @@ -18,8 +18,8 @@ The following external libraries are shipped with POET: - **CLI11** - - **IPhreeqc** with patches from GFZ/UP - - - -- **tug** - + +- **tug** - ## Installation @@ -41,7 +41,7 @@ installed: - [Rcpp](https://cran.r-project.org/web/packages/Rcpp/index.html) - [RInside](https://cran.r-project.org/web/packages/RInside/index.html) - [qs](https://cran.r-project.org/web/packages/qs/index.html) - +- [qs2](https://cran.r-project.org/web/packages/qs2/index.html) This can be simply achieved by issuing the following commands: ```sh @@ -49,7 +49,7 @@ This can be simply achieved by issuing the following commands: $ R # install R dependencies (case sensitive!) -> install.packages(c("Rcpp", "RInside","qs")) +> install.packages(c("Rcpp", "RInside","qs","qs2")) > q(save="no") ``` @@ -59,7 +59,7 @@ POET can be anonimously cloned from this repo over https. Make sure to also download the submodules: ```sh -git clone --recurse-submodules https://git.gfz-potsdam.de/naaice/poet.git +git clone --recurse-submodules https://git.gfz.de/naaice/poet.git ``` The `--recurse-submodules` option is a shorthand for: ```sh @@ -110,7 +110,7 @@ follows: $ R # install R dependencies -> install.packages(c("Rcpp", "RInside","qs")) +> install.packages(c("Rcpp", "RInside","qs","qs2")) > q(save="no") # cd into POET project root @@ -138,17 +138,17 @@ poet └── share └── poet ├── barite - │   ├── barite_200.rds + │   ├── barite_200.qs2 │   ├── barite_200_rt.R - │   ├── barite_het.rds + │   ├── barite_het.qs2 │   └── barite_het_rt.R ├── dolo - │   ├── dolo_inner_large.rds + │   ├── dolo_inner_large.qs2 │   ├── dolo_inner_large_rt.R - │   ├── dolo_interp.rds + │   ├── dolo_interp.qs2 │   └── dolo_interp_rt.R └── surfex - ├── PoetEGU_surfex_500.rds + ├── PoetEGU_surfex_500.qs2 └── PoetEGU_surfex_500_rt.R ``` @@ -182,7 +182,8 @@ The following parameters can be set: | **-P, --progress** | | show progress bar | | **--ai-surrogate** | | activates the AI surrogate chemistry model (defaults to _OFF_) | | **--dht** | | enabling DHT usage (defaults to _OFF_) | -| **--qs** | | store results using qs::qsave() (.qs extension) instead of default RDS (.rds) | +| **--qs** | | store results using qs::qsave() (.qs extension) instead of default qs2 (.qs2) | +| **--rds** | | store results using saveRDS() (.rds extension) instead of default qs2 (.qs2) | | **--dht-strategy=** | _0-1_ | change DHT strategy. **NOT IMPLEMENTED YET** (Defaults to _0_) | | **--dht-size=** | _1-n_ | size of DHT per process involved in megabyte (defaults to _1000 MByte_) | | **--dht-snaps=** | _0-2_ | disable or enable storage of DHT snapshots | @@ -284,7 +285,7 @@ produce any valid predictions. In order to provide a model to POET, you need to setup a R script which can then be used by `poet_init` to generate the simulation input. Which parameters are required can be found in the -[Wiki](https://git.gfz-potsdam.de/naaice/poet/-/wikis/Initialization). +[Wiki](https://git.gfz.de/naaice/poet/-/wikis/Initialization). We try to keep the document up-to-date. However, if you encounter missing information or need help, please get in touch with us via the issue tracker or E-Mail. @@ -298,7 +299,7 @@ issue tracker or E-Mail. where: - **output** - name of the output file (defaults to the input file - name with the extension `.rds`) + name with the extension `.qs2`) - **setwd** - set the working directory to the directory of the input file (e.g. to allow relative paths in the input script). However, the output file will be stored in the directory from which From fbd4739360bf22ecd8da5f4bdb0b025ca45d839e Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Thu, 12 Dec 2024 16:56:05 +0100 Subject: [PATCH 4/5] Update references to .qs2 in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cc4e643cf..9e31ff6d4 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ installed POET-dir `/bin` and run: ```sh cp ../share/poet/barite/barite_het* . -mpirun -n 4 ./poet barite_het_rt.R barite_het.rds output +mpirun -n 4 ./poet barite_het_rt.R barite_het.qs2 output ``` After a finished simulation all data generated by POET will be found @@ -227,7 +227,7 @@ DHT snapshot shall be produced. This is done by appending the `--dht-snaps=` option. The resulting call would look like this: ```sh -mpirun -n 4 ./poet --dht --dht-snaps=2 barite_het_rt.R barite_het.rds output +mpirun -n 4 ./poet --dht --dht-snaps=2 barite_het_rt.R barite_het.qs2 output ``` ### Example: Preparing Environment and Running with AI surrogate @@ -274,7 +274,7 @@ cp /bench/barite/{barite_50ai*,db_barite.dat,barite.pqi} . ./poet_init barite_50ai.R # run POET with AI surrogate and GPU utilization -srun --gres=gpu -N 1 -n 12 ./poet --ai-surrogate barite_50ai_rt.R barite_50ai.rds output +srun --gres=gpu -N 1 -n 12 ./poet --ai-surrogate barite_50ai_rt.R barite_50ai.qs2 output ``` Keep in mind that the AI surrogate is currently not stable or might also not From f1b166145e4b441d4f47b54be63cc4df9376a831 Mon Sep 17 00:00:00 2001 From: Marco De Lucia Date: Thu, 12 Dec 2024 18:05:50 +0100 Subject: [PATCH 5/5] reverting since gcc < 13 does not support it --- src/poet.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/poet.cpp b/src/poet.cpp index 93a168496..3da740096 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include @@ -294,11 +293,12 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, const double &dt = params.timesteps[iter - 1]; std::cout << std::endl; + /* displaying iteration number, with C++ and R iterator */ MSG("Going through iteration " + std::to_string(iter) + "/" + std::to_string(maxiter)); - MSG("Current time step is " + std::format("{:.2f}", dt)); + MSG("Current time step is " + std::to_string(dt)); /* run transport */ diffusion.simulate(dt);