diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 000000000..8016eb576 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,12 @@ +FROM mcr.microsoft.com/vscode/devcontainers/base:debian + +RUN sudo apt-get update && export DEBIAN_FRONTEND=noninteractive \ + && sudo apt-get install -y \ + cmake-curses-gui \ + clangd \ + git \ + libeigen3-dev \ + libopenmpi-dev \ + r-cran-rcpp \ + r-cran-rinside + diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..d32365cee --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,28 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile +{ + "build": { + "dockerfile": "Dockerfile" + }, + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // Uncomment the next line to run commands after the container is created. + // "postCreateCommand": "cat /etc/os-release", + // Configure tool-specific properties. + "customizations": { + "vscode": { + "extensions": [ + "twxs.cmake", + "llvm-vs-code-extensions.vscode-clangd" + ] + } + }, + // in case you want to push/pull from remote repositories using ssh + "mounts": [ + "source=${localEnv:HOME}/.ssh,target=/home/vscode/.ssh,type=bind,consistency=cached" + ] + // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "devcontainer" +} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 79e7da401..1e597b203 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,6 @@ vignettes/*.pdf # End of https://www.toptal.com/developers/gitignore/api/c,c++,r,cmake build/ +/.cache/ + +.vscode \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 35eabee89..1a7bf2db0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -34,11 +34,10 @@ build-poet: # This job runs in the build stage, which runs first. - mkdir build && cd build - cmake -DPOET_ENABLE_TESTING=ON .. - make -j$(nproc) - rules: - - if: $CI_PIPELINE_SOURCE == 'merge_request_event' artifacts: paths: - build + expire_in: 1 day test-poet: stage: test @@ -47,8 +46,6 @@ test-poet: script: - cd build - make -j$(nproc) check - rules: - - if: $CI_PIPELINE_SOURCE == 'merge_request_event' archive-sources: # This job runs in the build stage, which runs first. image: python:3 diff --git a/CMake/POET_Scripts.cmake b/CMake/POET_Scripts.cmake index 69760cc44..89fc1e84f 100644 --- a/CMake/POET_Scripts.cmake +++ b/CMake/POET_Scripts.cmake @@ -9,7 +9,7 @@ macro(get_POET_version) OUTPUT_VARIABLE POET_GIT_BRANCH OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process( - COMMAND ${GIT_EXECUTABLE} describe --always + COMMAND ${GIT_EXECUTABLE} describe --tags --always WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE POET_GIT_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb87f3a7a..8faf514e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,9 +23,6 @@ find_package(MPI REQUIRED) find_package(RRuntime REQUIRED) add_subdirectory(src) -add_subdirectory(R_lib) -add_subdirectory(data) -add_subdirectory(app) add_subdirectory(bench) # as tug will also pull in doctest as a dependency diff --git a/README.md b/README.md index 4e29aea28..a273f7f10 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # POET @@ -69,6 +69,9 @@ following available options: DHT usage. Defaults to _OFF_. - **POET_ENABLE_TESTING**=_boolean_ - enables small set of unit tests (more to come). Defaults to _OFF_. +- **POET_PHT_ADDITIONAL_INFO**=_boolean_ - enabling the count of accesses to one + PHT bucket. Use with caution, as things will get slowed down significantly. + Defaults to _OFF_. ### Example: Build from scratch @@ -109,15 +112,24 @@ poet │ └── kin_r_library.R └── share └── poet - ├── bench - │ ├── dolo_diffu_inner_large.R - │ ├── dolo_diffu_inner.R - │ └── dolo_inner.pqi - └── examples - ├── dol.pqi - ├── phreeqc_kin.dat - ├── SimDol1D_diffu.R - └── SimDol2D_diffu.R + └── bench + ├── barite + │ ├── barite_interp_eval.R + │ ├── barite.pqi + │ ├── barite.R + │ └── db_barite.dat + ├── dolo + │ ├── dolo_diffu_inner_large.R + │ ├── dolo_diffu_inner.R + │ ├── dolo_inner.pqi + │ ├── dolo_interp_long.R + │ └── phreeqc_kin.dat + └── surfex + ├── ExBase.pqi + ├── ex.R + ├── SMILE_2021_11_01_TH.dat + ├── SurfExBase.pqi + └── surfex.R ``` The R libraries will be loaded at runtime and the paths are hardcoded @@ -125,6 +137,10 @@ absolute paths inside `poet.cpp`. So, if you consider to move `bin/poet` either change paths of the R source files and recompile POET or also move `R_lib/*` relative to the binary. +The benchmarks consist of input scripts, which are provided as .R files. +Additionally, Phreeqc scripts and their corresponding databases are required, +stored as .pqi and .dat files, respectively. + ## Running Run POET by `mpirun ./poet ` @@ -132,28 +148,25 @@ where: - **OPTIONS** - runtime parameters (explained below) - **SIMFILE** - simulation described as R script (e.g. - `/share/examples/SimDol2D_diffu.R`) + `/share/poet/bench/dolo/dolo_interp_long.R`) - **OUTPUT_DIRECTORY** - path, where all output of POET should be stored ### Runtime options The following parameters can be set: -| Option | Value | Description | -|--------------------------|--------------|--------------------------------------------------------------------------------------------------------------------------| -| **--work-package-size=** | _1..n_ | size of work packages (defaults to _5_) | -| **--ignore-result** | | disables store of simulation resuls | -| **--dht** | | enabling DHT usage (defaults to _OFF_) | -| **--dht-signif=** | _1..n_ | set rounding to number of significant digits (defaults to _5_) (it is recommended to use `signif_vec` in R input script) | -| **--dht-strategy=** | _0-1_ | change DHT strategy. **NOT IMPLEMENTED YET** (Defaults to _0_) | -| **--dht-size=** | _1-n_ | size of DHT per process involved in megabyte (defaults to _1000 MByte_) | -| **--dht-snaps=** | _0-2_ | disable or enable storage of DHT snapshots | -| **--dht-file=** | `` | initializes DHT with the given snapshot file | - -#### Additions to `dht-signif` - -Only used if no vector is given in setup file. For individual values -per column use R vector `signif_vector` in `SIMFILE`. +| Option | Value | Description | +|-----------------------------|--------------|--------------------------------------------------------------------------------------------------------------------------| +| **--work-package-size=** | _1..n_ | size of work packages (defaults to _5_) | +| **--ignore-result** | | disables store of simulation resuls | +| **--dht** | | enabling DHT usage (defaults to _OFF_) | +| **--dht-strategy=** | _0-1_ | change DHT strategy. **NOT IMPLEMENTED YET** (Defaults to _0_) | +| **--dht-size=** | _1-n_ | size of DHT per process involved in megabyte (defaults to _1000 MByte_) | +| **--dht-snaps=** | _0-2_ | disable or enable storage of DHT snapshots | +| **--dht-file=** | `` | initializes DHT with the given snapshot file | +| **--interp-size** | _1-n_ | size of PHT (interpolation) per process in megabyte | +| **--interp-bucket-entries** | _1-n_ | number of entries to store at maximum in one PHT bucket | +| **--interp-min** | _1-n_ | number of entries in PHT bucket needed to start interpolation | #### Additions to `dht-snaps` @@ -168,13 +181,13 @@ Following values can be set: ### Example: Running from scratch We will continue the above example and start a simulation with -`SimDol2D_diffu.R`. As transport a simple fixed-coefficient diffusion is used. +`dolo_diffu_inner.R`. As transport a simple fixed-coefficient diffusion is used. It's a 2D, 100x100 grid, simulating 10 time steps. To start the simulation with 4 processes `cd` into your previously installed POET-dir `/bin` and run: ```sh -mpirun -n 4 ./poet ../share/poet/examples/SimDol2D_diffu.R output +mpirun -n 4 ./poet ../share/poet/bench/dolo/dolo_diffu_inner.R/ output ``` After a finished simulation all data generated by POET will be found @@ -187,7 +200,7 @@ produced. This is done by appending the `--dht-snaps=` option. The resulting call would look like this: ```sh -mpirun -n 4 ./poet --dht --dht-snaps=2 ../share/poet/examples/SimDol2D_diffu.R output +mpirun -n 4 ./poet --dht --dht-snaps=2 ../share/poet/bench/dolo/dolo_diffu_inner.R/ output ``` ## About the usage of MPI_Wtime() diff --git a/R_lib/CMakeLists.txt b/R_lib/CMakeLists.txt deleted file mode 100644 index 24bab1690..000000000 --- a/R_lib/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -install(FILES kin_r_library.R DESTINATION R_lib) diff --git a/R_lib/kin_r_library.R b/R_lib/kin_r_library.R index e0634c153..1e7f803ed 100644 --- a/R_lib/kin_r_library.R +++ b/R_lib/kin_r_library.R @@ -1,4 +1,6 @@ -### Copyright (C) 2018-2022 Marco De Lucia, Max Luebke (GFZ Potsdam) +## Time-stamp: "Last modified 2023-08-15 11:58:23 delucia" + +### Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam) ### ### POET is free software; you can redistribute it and/or modify it under the ### terms of the GNU General Public License as published by the Free Software @@ -13,202 +15,208 @@ ### this program; if not, write to the Free Software Foundation, Inc., 51 ### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + ## Simple function to check file extension. It is needed to check if ## the GridFile is SUM (MUFITS format) or rds/RData FileExt <- function(x) { - pos <- regexpr("\\.([[:alnum:]]+)$", x) - ifelse(pos > -1L, substring(x, pos + 1L), "") + pos <- regexpr("\\.([[:alnum:]]+)$", x) + ifelse(pos > -1L, substring(x, pos + 1L), "") } master_init <- function(setup) { - msgm("Process with rank 0 reading GRID properties") + msgm("Process with rank 0 reading GRID properties") - ## Setup the directory where we will store the results - verb <- FALSE - if (local_rank == 0) { - verb <- TRUE ## verbosity loading MUFITS results - if (!dir.exists(fileout)) { - dir.create(fileout) - msgm("created directory ", fileout) + ## Setup the directory where we will store the results + verb <- FALSE + if (local_rank == 0) { + verb <- TRUE ## verbosity loading MUFITS results + if (!dir.exists(fileout)) { + dir.create(fileout) + msgm("created directory ", fileout) + } else { + msgm("dir ", fileout, " already exists, I will overwrite!") + } + if (!exists("store_result")) { + msgm("store_result doesn't exist!") + } else { + msgm("store_result is ", store_result) + } } else { - msgm("dir ", fileout, " already exists, I will overwrite!") + } - if (!exists("store_result")) { - msgm("store_result doesn't exist!") - } else { - msgm("store_result is ", store_result) + + setup$iter <- 1 + setup$maxiter <- setup$iterations + setup$timesteps <- setup$timesteps + setup$simulation_time <- 0 + + if (is.null(setup[["store_result"]])) { + setup$store_result <- TRUE } - } else { - - } - - setup$iter <- 1 - setup$maxiter <- setup$iterations - setup$timesteps <- setup$timesteps - setup$simulation_time <- 0 - - if (is.null(setup[["store_result"]])) { - setup$store_result <- TRUE - } - - if (setup$store_result) { - if (is.null(setup[["out_save"]])) { - setup$out_save <- seq(1, setup$iterations) + + if (setup$store_result) { + if (is.null(setup[["out_save"]])) { + setup$out_save <- seq(1, setup$iterations) + } } - } - - return(setup) + + return(setup) } ## This function, called only by master, stores on disk the last ## calculated time step if store_result is TRUE and increments the ## iteration counter master_iteration_end <- function(setup) { - iter <- setup$iter - ## MDL Write on disk state_T and state_C after every iteration - ## comprised in setup$out_save - if (setup$store_result) { - if (iter %in% setup$out_save) { - nameout <- paste0(fileout, "/iter_", sprintf("%03d", iter), ".rds") - info <- list( - tr_req_dt = as.integer(setup$req_dt) - # tr_allow_dt = setup$allowed_dt, - # tr_inniter = as.integer(setup$inniter) - ) - saveRDS(list( - T = setup$state_T, C = setup$state_C, - simtime = as.integer(setup$simtime), - tr_info = info - ), file = nameout) - msgm("results stored in <", nameout, ">") + iter <- setup$iter + ## max digits for iterations + dgts <- as.integer(ceiling(log10(setup$iterations + 1))) + ## string format to use in sprintf + fmt <- paste0("%0", dgts, "d") + + ## Write on disk state_T and state_C after every iteration + ## comprised in setup$out_save + if (setup$store_result) { + if (iter %in% setup$out_save) { + nameout <- paste0(fileout, "/iter_", sprintf(fmt=fmt, iter), ".rds") + info <- list( + tr_req_dt = as.integer(setup$req_dt) + ## tr_allow_dt = setup$allowed_dt, + ## tr_inniter = as.integer(setup$inniter) + ) + saveRDS(list( + T = setup$state_T, C = setup$state_C, + simtime = as.integer(setup$simtime), + tr_info = info + ), file = nameout) + msgm("results stored in <", nameout, ">") + } } - } - msgm("done iteration", iter, "/", setup$maxiter) - setup$iter <- setup$iter + 1 - return(setup) + msgm("done iteration", iter, "/", setup$maxiter) + setup$iter <- setup$iter + 1 + return(setup) } ## function for the workers to compute chemistry through PHREEQC slave_chemistry <- function(setup, data) { - base <- setup$base - first <- setup$first - prop <- setup$prop - immobile <- setup$immobile - kin <- setup$kin - ann <- setup$ann - - iter <- setup$iter - timesteps <- setup$timesteps - dt <- timesteps[iter] - - state_T <- data ## not the global field, but the work-package - - ## treat special H+/pH, e-/pe cases - state_T <- RedModRphree::Act2pH(state_T) - - ## reduction of the problem - if (setup$reduce) { - reduced <- ReduceStateOmit(state_T, omit = setup$ann) - } else { - reduced <- state_T - } - - ## form the PHREEQC input script for the current work package - inplist <- SplitMultiKin( - data = reduced, procs = 1, base = base, first = first, - ann = ann, prop = prop, minerals = immobile, kin = kin, dt = dt - ) - - ## if (local_rank==1 & iter==1) - ## RPhreeWriteInp("FirstInp", inplist) - - tmpC <- RunPQC(inplist, procs = 1, second = TRUE) - - ## recompose after the reduction - if (setup$reduce) { - state_C <- RecomposeState(tmpC, reduced) - } else { - state_C <- tmpC - } - - ## the next line is needed since we don't need all columns of - ## PHREEQC output - return(state_C[, prop]) + base <- setup$base + first <- setup$first + prop <- setup$prop + immobile <- setup$immobile + kin <- setup$kin + ann <- setup$ann + + iter <- setup$iter + timesteps <- setup$timesteps + dt <- timesteps[iter] + + state_T <- data ## not the global field, but the work-package + + ## treat special H+/pH, e-/pe cases + state_T <- RedModRphree::Act2pH(state_T) + + ## reduction of the problem + if (setup$reduce) { + reduced <- ReduceStateOmit(state_T, omit = setup$ann) + } else { + reduced <- state_T + } + + ## form the PHREEQC input script for the current work package + inplist <- SplitMultiKin( + data = reduced, procs = 1, base = base, first = first, + ann = ann, prop = prop, minerals = immobile, kin = kin, dt = dt + ) + + ## if (local_rank==1 & iter==1) + ## RPhreeWriteInp("FirstInp", inplist) + + tmpC <- RunPQC(inplist, procs = 1, second = TRUE) + + ## recompose after the reduction + if (setup$reduce) { + state_C <- RecomposeState(tmpC, reduced) + } else { + state_C <- tmpC + } + + ## the next line is needed since we don't need all columns of + ## PHREEQC output + return(state_C[, prop]) } ## This function, called by master master_chemistry <- function(setup, data) { - state_T <- setup$state_T - - msgm(" chemistry iteration", setup$iter) - - ## treat special H+/pH, e-/pe cases - state_T <- RedModRphree::Act2pH(state_T) - - ## reduction of the problem - if (setup$reduce) { - reduced <- ReduceStateOmit(state_T, omit = setup$ann) - } else { - reduced <- state_T - } - - ### inject data from workers - res_C <- data - - rownames(res_C) <- NULL - - ## print(res_C) - - if (nrow(res_C) > nrow(reduced)) { - res_C <- res_C[seq(2, nrow(res_C), by = 2), ] - } - - ## recompose after the reduction - if (setup$reduce) { - state_C <- RecomposeState(res_C, reduced) - } else { - state_C <- res_C - } - - setup$state_C <- state_C - setup$reduced <- reduced - - return(setup) + state_T <- setup$state_T + + msgm(" chemistry iteration", setup$iter) + + ## treat special H+/pH, e-/pe cases + state_T <- RedModRphree::Act2pH(state_T) + + ## reduction of the problem + if (setup$reduce) { + reduced <- ReduceStateOmit(state_T, omit = setup$ann) + } else { + reduced <- state_T + } + + ## inject data from workers + res_C <- data + + rownames(res_C) <- NULL + + ## print(res_C) + + if (nrow(res_C) > nrow(reduced)) { + res_C <- res_C[seq(2, nrow(res_C), by = 2), ] + } + + ## recompose after the reduction + if (setup$reduce) { + state_C <- RecomposeState(res_C, reduced) + } else { + state_C <- res_C + } + + setup$state_C <- state_C + setup$reduced <- reduced + + return(setup) } ## Adapted version for "reduction" ReduceStateOmit <- function(data, omit = NULL, sign = 6) { - require(mgcv) + require(mgcv) + + rem <- colnames(data) + if (is.list(omit)) { + indomi <- match(names(omit), colnames(data)) + datao <- data[, -indomi] + } else { + datao <- data + } + + datao <- signif(datao, sign) + red <- mgcv::uniquecombs(datao) + inds <- attr(red, "index") + now <- ncol(red) + + + ## reattach the omitted column(s) + ## FIXME: control if more than one ann is present + if (is.list(omit)) { + red <- cbind(red, rep(data[1, indomi], nrow(red))) - rem <- colnames(data) - if (is.list(omit)) { - indomi <- match(names(omit), colnames(data)) - datao <- data[, -indomi] - } else { - datao <- data - } + colnames(red)[now + 1] <- names(omit) - datao <- signif(datao, sign) - red <- mgcv::uniquecombs(datao) - inds <- attr(red, "index") - now <- ncol(red) - - - ## reattach the omitted column(s) - ## FIXME: control if more than one ann is present - if (is.list(omit)) { - red <- cbind(red, rep(data[1, indomi], nrow(red))) - - colnames(red)[now + 1] <- names(omit) - - ret <- red[, colnames(data)] - } else { - ret <- red - } - rownames(ret) <- NULL - attr(ret, "index") <- inds - return(ret) + ret <- red[, colnames(data)] + } else { + ret <- red + } + rownames(ret) <- NULL + attr(ret, "index") <- inds + return(ret) } @@ -216,50 +224,63 @@ ReduceStateOmit <- function(data, omit = NULL, sign = 6) { ## Attach the name of the calling function to the message displayed on ## R's stdout msgm <- function(...) { - if (local_rank == 0) { - fname <- as.list(sys.call(-1))[[1]] - prefix <- paste0("R: ", fname, " ::") - cat(paste(prefix, ..., "\n")) - } - invisible() + if (local_rank == 0) { + fname <- as.list(sys.call(-1))[[1]] + prefix <- paste0("R: ", fname, " ::") + cat(paste(prefix, ..., "\n")) + } + invisible() } ## Function called by master R process to store on disk all relevant ## parameters for the simulation StoreSetup <- function(setup) { - to_store <- vector(mode = "list", length = 3) - # names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT") - names(to_store) <- c("Sim", "Transport", "DHT") - ## read the setup R file, which is sourced in kin.cpp - tmpbuff <- file(filesim, "r") - setupfile <- readLines(tmpbuff) - close.connection(tmpbuff) + to_store <- vector(mode = "list", length = 4) + ## names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT") + names(to_store) <- c("Sim", "Transport", "DHT", "Cmdline") + + ## read the setup R file, which is sourced in kin.cpp + tmpbuff <- file(filesim, "r") + setupfile <- readLines(tmpbuff) + close.connection(tmpbuff) + + to_store$Sim <- setupfile + + ## to_store$Flow <- list( + ## snapshots = setup$snapshots, + ## gridfile = setup$gridfile, + ## phase = setup$phase, + ## density = setup$density, + ## dt_differ = setup$dt_differ, + ## prolong = setup$prolong, + ## maxiter = setup$maxiter, + ## saved_iter = setup$iter_output, + ## out_save = setup$out_save ) - to_store$Sim <- setupfile + to_store$Transport <- setup$diffusion - # to_store$Flow <- list( - # snapshots = setup$snapshots, - # gridfile = setup$gridfile, - # phase = setup$phase, - # density = setup$density, - # dt_differ = setup$dt_differ, - # prolong = setup$prolong, - # maxiter = setup$maxiter, - # saved_iter = setup$iter_output, - # out_save = setup$out_save ) + ## to_store$Chemistry <- list( + ## nprocs = n_procs, + ## wp_size = work_package_size, + ## base = setup$base, + ## first = setup$first, + ## init = setup$initsim, + ## db = db, + ## kin = setup$kin, + ## ann = setup$ann) - to_store$Transport <- setup$diffusion - # to_store$Chemistry <- list( - # nprocs = n_procs, - # wp_size = work_package_size, - # base = setup$base, - # first = setup$first, - # init = setup$initsim, - # db = db, - # kin = setup$kin, - # ann = setup$ann) + if (dht_enabled) { + to_store$DHT <- list( + enabled = dht_enabled, + log = dht_log + ## signif = dht_final_signif, + ## proptype = dht_final_proptype + ) + } else { + to_store$DHT <- FALSE + } if (dht_enabled) { to_store$DHT <- list( @@ -277,6 +298,6 @@ StoreSetup <- function(setup) { } GetWorkPackageSizesVector <- function(n_packages, package_size, len) { - ids <- rep(1:n_packages, times=package_size, each = 1)[1:len] - return(as.integer(table(ids))) + ids <- rep(1:n_packages, times=package_size, each = 1)[1:len] + return(as.integer(table(ids))) } diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt deleted file mode 100644 index 1aa791dd4..000000000 --- a/app/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -configure_file(poet.h.in poet.h) - -add_executable(poet poet.cpp) -target_include_directories(poet PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries(poet PUBLIC poet_lib MPI::MPI_CXX) - -install(TARGETS poet DESTINATION bin) diff --git a/app/poet.h.in b/app/poet.h.in deleted file mode 100644 index d98fe41e9..000000000 --- a/app/poet.h.in +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef POET_H -#define POET_H - -#include "poet/ChemistryModule.hpp" -#include -const char *poet_version = "@POET_VERSION@"; - -const char *CHEMISTRY_MODULE_NAME = "state_C"; - -#endif // POET_H diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index a891897c0..6b5e9b9f3 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -1,3 +1,3 @@ -add_subdirectory(dolo_diffu_inner) +add_subdirectory(dolo) add_subdirectory(surfex) add_subdirectory(barite) diff --git a/bench/barite/CMakeLists.txt b/bench/barite/CMakeLists.txt index 29982a1e0..6c8b63aee 100644 --- a/bench/barite/CMakeLists.txt +++ b/bench/barite/CMakeLists.txt @@ -1,5 +1,6 @@ install(FILES barite.R + barite_interp_eval.R barite.pqi db_barite.dat DESTINATION diff --git a/bench/barite/README.org b/bench/barite/README.org new file mode 100644 index 000000000..e7b316052 --- /dev/null +++ b/bench/barite/README.org @@ -0,0 +1,124 @@ +#+TITLE: Description of =barite= benchmark +#+AUTHOR: MDL +#+DATE: 2023-08-26 +#+STARTUP: inlineimages +#+LATEX_CLASS_OPTIONS: [a4paper,9pt] +#+LATEX_HEADER: \usepackage{fullpage} +#+LATEX_HEADER: \usepackage{amsmath, systeme} +#+LATEX_HEADER: \usepackage{graphicx} +#+LATEX_HEADER: \usepackage{charter} +#+OPTIONS: toc:nil + +* Quick start + +#+begin_src sh :language sh :frame single +mpirun -np 4 ./poet barite.R barite_results +mpirun -np 4 ./poet --interp barite_interp_eval.R barite_results +#+end_src + +* List of Files + +- =barite.R=: POET input script for a 20x20 simulation grid +- =barite_interp_eval.R=: POET input script for a 400x200 simulation + grid +- =db_barite.dat=: PHREEQC database containing the kinetic expressions + for barite and celestite, stripped down from =phreeqc.dat= +- =barite.pqi=: PHREEQC input script defining the chemical system + +* Chemical system + +The benchmark depicts an isotherm porous system at *25 °C* where pure +water is initially at equilibrium with *celestite* (strontium sulfate; +brute formula: SrSO_{4}). A solution containing only dissolved Ba^{2+} +and Cl^- diffuses into the system causing celestite dissolution. The +increased concentration of dissolved sulfate SO_{4}^{2-} induces +precipitation of *barite* (barium sulfate; brute formula: +BaSO_{4}^{2-}). The overall reaction can be written: + +Ba^{2+} + celestite \rightarrow barite + Sr^{2+} + +Both celestite dissolution and barite precipitation are calculated +using a kinetics rate law based on transition state theory: + +rate = -S_{m} k_{barite} (1-SR_{m}) + +where the reaction rate has units mol/s, S_{m} (m^{2}) is the reactive +surface area, k (mol/m^{2}/s) is the kinetic coefficient, and SR is +the saturation ratio, i.e., the ratio of the ion activity product of +the reacting species and the solubility constant, calculated +internally by PHREEQC from the speciated solution. + +For barite, the reaction rate is computed as sum of two mechanisms, +r_{/acid/} and r_{/neutral/}: + +rate_{barite} = S_{barite} (k_{/acid/} + k_{/neutral/}) * (1 - SR_{barite}) + +where: + +k_{/acid/} = 10^{-6.9} e^{-30800 / R} \cdot act(H^{+})^{0.22} + +k_{/neutral/} = 10^{-7.9} e^{-30800 / R} + +R (8.314462 J K^{-1} mol^{-1}) is the gas constant. + +For celestite the kinetic law considers only the acidic mechanism and +reads: + +rate_{celestite} = S_{celestite} 10^{-5.66} e^{-23800 / R} \cdot +act(H^{+})^{0.109} \cdot (1 - SR_{celestite}) + +The kinetic rates as implemented in the =db_barite.dat= file accepts +one parameter which represents reactive surface area in m^{2}. For the +benchmarks the surface areas are set to + +- S_{barite}: 50 m^{2} +- S_{celestite}: 10 m^{2} + +A starting seed for barite is given at 0.001 mol in each domain +element. + +* Nucleation (TODO) + +Geochemical benchmark inspired by Tranter et al. (2021) without +nucleation. + +* POET simulations + +Currently these benchmarks are pure diffusion simulations. There are 7 +transported species: H, O, Charge, Ba, Cl, S(6), Sr. + +** =barite.R= + +- Grid discretization: square domain of 1 \cdot 1 m^{2} discretized in + 20x20 cells +- Boundary conditions: E, S and W sides of the domain are closed; the + N boundary has a *fixed concentration* (Dirichlet) of 0.1 molal + BaCl_{2} +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 20 iteration with \Delta t = 250 s +- *DHT* parameters: + | H | O | Charge | Ba | Cl | S(6) | Sr | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | + + + +** =barite_interp_eval.R= +- Grid discretization: rectangular domain of 40 \cdot 20 m^{2} + discretized in 400 \cdot 200 cells +- Boundary conditions: all boundaries are closed. The center of the + domain at indeces (200, 100) has fixed concentration of 0.1 molal of + BaCl_{2} +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 200 iterations with \Delta t = 250 s +- *DHT* parameters: + | H | O | Charge | Ba | Cl | S(6) | Sr | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | + +* References + +- Tranter, Wetzel, De Lucia and Kühn (2021): Reactive transport model + of kinetically controlled celestite to barite replacement, Advances + in Geosciences, 56, 57-–65, 2021. + https://doi.org/10.5194/adgeo-56-57-20211 + + diff --git a/bench/barite/barite.R b/bench/barite/barite.R index dad3b79ea..7d73de75b 100644 --- a/bench/barite/barite.R +++ b/bench/barite/barite.R @@ -1,5 +1,4 @@ -## Time-stamp: "Last modified 2023-04-24 16:51:23 mluebke" - +## Time-stamp: "Last modified 2024-01-12 12:39:14 delucia" database <- normalizePath("../share/poet/bench/barite/db_barite.dat") input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") @@ -16,11 +15,11 @@ types <- c("scratch", "phreeqc", "rds") init_cell <- list( "H" = 110.0124, "O" = 55.5087, - "Charge" = -1.217E-09, - "Ba" = 1.E-10, - "Cl" = 2.E-10, - "S" = 6.205E-4, - "Sr" = 6.205E-4, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04, "Barite" = 0.001, "Celestite" = 1 ) @@ -44,26 +43,20 @@ grid <- list( ## initial conditions init_diffu <- list( - #"H" = 110.0124, - "H" = 0.00000028904, - #"O" = 55.5087, - "O" = 0.000000165205, - #"Charge" = -1.217E-09, - "Charge" = -3.337E-08, - "Ba" = 1.E-10, - "Cl" = 1.E-10, - "S(6)" = 6.205E-4, - "Sr" = 6.205E-4 + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04 ) injection_diff <- list( list( - #"H" = 111.0124, - "H" = 0.0000002890408, - #"O" = 55.50622, - "O" = 0.00002014464, - #"Charge" = -3.337E-08, - "Charge" = -3.337000004885E-08, + "H" = 111.0124, + "O" = 55.50622, + "Charge" = -3.336970273297e-08, "Ba" = 0.1, "Cl" = 0.2, "S(6)" = 0, @@ -88,11 +81,12 @@ alpha_diffu <- c( ## ) boundary <- list( - "N" = rep(1, n), + "N" = c(1,1, rep(0, n-2)), ## "N" = rep(0, n), "E" = rep(0, n), "S" = rep(0, n), - "W" = rep(0, n) +# "W" = rep(0, n) + "W" = c(1,1, rep(0, n-2)) ) diffu_list <- names(alpha_diffu) @@ -115,16 +109,24 @@ diffusion <- list( ## # Needed when using DHT +dht_species <- c( + "H" = 7, + "O" = 7, + "Charge" = 4, + "Ba" = 7, + "Cl" = 7, + "S(6)" = 7, + "Sr" = 7, + "Barite" = 4, + "Celestite" = 4 +) + -dht_species <- names(init_diffu) -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5) chemistry <- list( database = database, input_script = input_script, - dht_species = dht_species, - dht_signif = dht_signif + dht_species = dht_species ) ################################################################# diff --git a/bench/barite/barite_200.R b/bench/barite/barite_200.R new file mode 100644 index 000000000..76526b00e --- /dev/null +++ b/bench/barite/barite_200.R @@ -0,0 +1,138 @@ +## Time-stamp: "Last modified 2024-01-12 12:49:03 delucia" + +database <- normalizePath("../share/poet/bench/barite/db_barite.dat") +input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") +## database <- normalizePath("/home/work/simR/Rphree/poetsims/Sims/Hans/db_barite.dat") +## input_script <- normalizePath("/home/work/simR/Rphree/poetsims/Sims/Hans/barite.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 200 +m <- 200 + +init_cell <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04, + "Barite" = 0.001, + "Celestite" = 1 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(1, 1), + type = "scratch", + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions + +init_diffu <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04 +) + +injection_diff <- list( + list( + "H" = 111.0124, + "O" = 55.50622, + "Charge" = -3.336970273297e-08, + "Ba" = 0.1, + "Cl" = 0.2, + "S(6)" = 0, + "Sr" = 0) +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-06, + "O" = 1E-06, + "Charge" = 1E-06, + "Ba" = 1E-06, + "Cl" = 1E-06, + "S(6)" = 1E-06, + "Sr" = 1E-06 +) + +boundary <- list( + "N" = c(1,1, rep(0, n-2)), + "E" = rep(0, n), + "S" = rep(0, n), + "W" = c(1,1, rep(0, n-2)) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, injection_diff) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + +## DHT significant digits +dht_species <- c( + "H" = 7, + "O" = 7, + "Charge" = 4, + "Ba" = 7, + "Cl" = 7, + "S(6)" = 7, + "Sr" = 7, + "Barite" = 4, + "Celestite" = 4 +) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# + +iterations <- 50 +dt <- 100 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = seq(1, iterations) + ## out_save = c(1, 5, 10, seq(50, iterations, by=50)) +) diff --git a/bench/barite/barite_interp_eval.R b/bench/barite/barite_interp_eval.R new file mode 100644 index 000000000..6f2b79a34 --- /dev/null +++ b/bench/barite/barite_interp_eval.R @@ -0,0 +1,136 @@ +## Time-stamp: "Last modified 2024-01-12 11:35:11 delucia" + +database <- normalizePath("../share/poet/bench/barite/db_barite.dat") +input_script <- normalizePath("../share/poet/bench/barite/barite.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 400 +m <- 200 + +types <- c("scratch", "phreeqc", "rds") + +init_cell <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04, + "Barite" = 0.001, + "Celestite" = 1 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(1, 1), + type = types[1], + init_cell = as.data.frame(init_cell, check.names = FALSE), + props = names(init_cell), + database = database, + input_script = input_script +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions + +init_diffu <- list( + "H" = 110.0124, + "O" = 55.5087, + "Charge" = -1.216307845207e-09, + "Ba" = 1.E-12, + "Cl" = 2.E-12, + "S(6)" = 6.204727095976e-04, + "Sr" = 6.204727095976e-04 +) + +injection_diff <- list( + list( + "H" = 111.0124, + "O" = 55.50622, + "Charge" = -3.336970273297e-08, + "Ba" = 0.1, + "Cl" = 0.2, + "S(6)" = 0, + "Sr" = 0) +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-06, + "O" = 1E-06, + "Charge" = 1E-06, + "Ba" = 1E-06, + "Cl" = 1E-06, + "S(6)" = 1E-06, + "Sr" = 1E-06 +) + +boundary <- list( + "N" = c(1,1, rep(0, n-2)), + "E" = rep(0, n), + "S" = rep(0, n), + "W" = c(1,1, rep(0, n-2)) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, injection_diff) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + +## # Needed when using DHT +dht_species <- c( + "H" = 7, + "O" = 7, + "Charge" = 4, + "Ba" = 7, + "Cl" = 7, + "S(6)" = 7, + "Sr" = 7, + "Barite" = 4, + "Celestite" = 4 +) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# +iterations <- 200 +dt <- 250 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = seq(1, iterations) +) diff --git a/bench/dolo_diffu_inner/CMakeLists.txt b/bench/dolo/CMakeLists.txt similarity index 74% rename from bench/dolo_diffu_inner/CMakeLists.txt rename to bench/dolo/CMakeLists.txt index cde190af0..d2c0acd1e 100644 --- a/bench/dolo_diffu_inner/CMakeLists.txt +++ b/bench/dolo/CMakeLists.txt @@ -2,6 +2,8 @@ install(FILES dolo_diffu_inner.R dolo_diffu_inner_large.R dolo_inner.pqi + dolo_interp_long.R + phreeqc_kin.dat DESTINATION share/poet/bench/dolo ) diff --git a/bench/dolo_diffu_inner/Eval.R b/bench/dolo/Eval.R similarity index 100% rename from bench/dolo_diffu_inner/Eval.R rename to bench/dolo/Eval.R diff --git a/bench/dolo/README.org b/bench/dolo/README.org new file mode 100644 index 000000000..67caf6e12 --- /dev/null +++ b/bench/dolo/README.org @@ -0,0 +1,162 @@ +#+TITLE: Description of =dolo= benchmark +#+AUTHOR: MDL +#+DATE: 2023-08-26 +#+STARTUP: inlineimages +#+LATEX_CLASS_OPTIONS: [a4paper,9pt] +#+LATEX_HEADER: \usepackage{fullpage} +#+LATEX_HEADER: \usepackage{amsmath, systeme} +#+LATEX_HEADER: \usepackage{graphicx} +#+LATEX_HEADER: \usepackage{charter} +#+OPTIONS: toc:nil + +* Quick start + +#+begin_src sh :language sh :frame single +mpirun -np 4 ./poet dolo_diffu_inner.R dolo_diffu_inner_res +mpirun -np 4 ./poet --dht --interp dolo_interp_long.R dolo_interp_long_res +#+end_src + +* List of Files + +- =dolo_diffu_inner.R=: POET input script for a 100x100 simulation + grid +- =dolo_interp_long.R=: POET input script for a 400x200 simulation + grid +- =phreeqc_kin.dat=: PHREEQC database containing the kinetic expressions + for dolomite and celestite, stripped down from =phreeqc.dat= +- =dol.pqi=: PHREEQC input script for the chemical system +# - =dolo.R=: POET input script for a 20x20 simulation grid +# - =dolo_diffu_inner_large.R=: POET input script for a 400x200 +# simulation grid + +* Chemical system + +This model describes a simplified version of /dolomitization/ of +calcite, itself a complex and not yet fully understood natural process +which is observed naturally at higher temperatures (Möller and De +Lucia, 2020). Variants of such model have been widely used in many +instances especially for the purpose of benchmarking numerical codes +(De Lucia et al., 2021 and references therein). + +We consider an isotherm porous system at *25 °C* in which pure water +is initially at equilibrium with *calcite* (calcium carbonate; brute +formula: CaCO_{3}). An MgCl_{2} solution enters the system causing +calcite dissolution. The released excess concentration of dissolved +calcium Ca^{2+} and carbonate (CO_{3}^{2-}) induces after a while +supersaturation and hence precipitation of *dolomite* +(calcium-magnesium carbonate; brute formula: CaMg(CO_{3})_{2}). The +overall /dolomitization/ reaction can be written: + +Mg^{2+} + 2 \cdot calcite \rightarrow dolomite + 2 \cdot Ca^{2+} + +The precipitation of dolomite continues until enough Ca^{2+} is +present in solution. Further injection of MgCl_{2} changes its +saturation state causing its dissolution too. After enough time, the +whole system has depleted all minerals and the injected MgCl_{2} +solution fills up the domain. + +Both calcite dissolution and dolomite precipitation/dissolution follow +a kinetics rate law based on transition state theory (Palandri and +Karhaka, 2004; De Lucia et al., 2021). + +rate = -S_{m} k_{m} (1-SR_{m}) + +where the reaction rate has units mol/s, S_{m} (m^{2}) is the reactive +surface area, k_{m} (mol/m^{2}/s) is the kinetic coefficient, and SR +is the saturation ratio, i.e., the ratio of the ion activity product +of the reacting species and the solubility constant, calculated +internally by PHREEQC from the speciated solution. + +For dolomite, the kinetic coefficient results from the sum of two +mechanisms, r_{/acid/} and r_{/neutral/}: + +rate_{dolomite} = S_{dolomite} (k_{/acid/} + k_{/neutral/}) * (1 - SR_{dolomite}) + +where: + +k_{/acid/} = 10^{-3.19} e^{-36100 / R} \cdot act(H^{+})^{0.5} + +k_{/neutral/} = 10^{-7.53} e^{-52200 / R} + +R (8.314462 J K^{-1} mol^{-1}) is the gas constant. + +Similarly, the kinetic law for calcite reads: + +k_{/acid/} = 10^{-0.3} e^{-14400 / R} \cdot act(H^{+})^{0.5} + +k_{/neutral/} = 10^{-5.81} e^{-23500 / R} + +The kinetic laws as implemented in the =phreeqc_kin.dat= file accepts +one parameter which represents reactive surface area in m^{2}. For the +benchmarks the surface areas are set to + +- S_{dolomite}: 0.005 m^{2} +- S_{calcite}: 0.05 m^{2} + +The initial content of calcite in the domain is of 0.0002 mol per kg +of water. A constant partial pressure of 10^{-1675} atm of O_{2(g)} is +maintained at any time in the domain in order to fix the redox +potential of the solution to an oxidizing state (pe around 9). + +Note that Cl is unreactive in this system and only effects the +computation of the activities in solution. + +* POET simulations + +Several benchmarks based on the same chemical system are defined here +with different grid sizes, resolution and boundary conditions. The +transported elemental concentrations are 7: C(4), Ca, Cl, Mg and the +implicit total H, total O and Charge as required by PHREEQC_RM. + +** =dolo_diffu_inner.R= + +- Grid discretization: square domain of 1 \cdot 1 m^{2} discretized in + 100x100 cells +- Boundary conditions: All sides of the domain are closed. *Fixed + concentration* of 0.001 molal of MgCl_{2} is defined in the domain + cell (20, 20) and of 0.002 molal MgCl_{2} at cells (60, 60) and + (80, 80) +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 10 iterations with \Delta t of 200 s +- *DHT* parameters: + | H | O | Charge | C(4) | Ca | Cl | Mg | Calcite | Dolomite | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | 5 | 5 | +- Hooks: the following hooks are defined: + 1. =dht_fill=: + 2. =dht_fuzz=: + 3. =interp_pre_func=: + 4. =interp_post_func=: + + +** =dolo_interp_long.R= + +- Grid discretization: rectangular domain of 5 \cdot 2.5 m^{2} + discretized in 400 \times 200 cells +- Boundary conditions: *Fixed concentrations* equal to the initial + state are imposed at all four sides of the domain. *Fixed + concentration* of 0.001 molal of MgCl_{2} is defined in the domain + center at cell (100, 50) +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 20000 iterations with \Delta t of 200 s +- *DHT* parameters: + | H | O | Charge | C(4) | Ca | Cl | Mg | Calcite | Dolomite | + | 10 | 10 | 3 | 5 | 5 | 5 | 5 | 5 | 5 | +- Hooks: the following hooks are defined: + 1. =dht_fill=: + 2. =dht_fuzz=: + 3. =interp_pre_func=: + 4. =interp_post_func=: + + +* References + +- De Lucia, Kühn, Lindemann, Lübke, Schnor: POET (v0.1): speedup of + many-core parallel reactive transport simulations with fast DHT + lookups, Geosci. Model Dev., 14, 7391–7409, 2021. + https://doi.org/10.5194/gmd-14-7391-2021 +- Möller, Marco De Lucia: The impact of Mg^{2+} ions on equilibration + of Mg-Ca carbonates in groundwater and brines, Geochemistry + 80, 2020. https://doi.org/10.1016/j.chemer.2020.125611 +- Palandri, Kharaka: A Compilation of Rate Parameters of Water-Mineral + Interaction Kinetics for Application to Geochemical Modeling, Report + 2004-1068, USGS, 2004. diff --git a/data/dol.pqi b/bench/dolo/dol.pqi similarity index 100% rename from data/dol.pqi rename to bench/dolo/dol.pqi diff --git a/bench/dolo_diffu_inner/dolo_diffu_inner.R b/bench/dolo/dolo_diffu_inner.R similarity index 59% rename from bench/dolo_diffu_inner/dolo_diffu_inner.R rename to bench/dolo/dolo_diffu_inner.R index f243889d4..a16f8a49c 100644 --- a/bench/dolo_diffu_inner/dolo_diffu_inner.R +++ b/bench/dolo/dolo_diffu_inner.R @@ -1,6 +1,6 @@ -## Time-stamp: "Last modified 2023-04-24 15:12:02 mluebke" +## Time-stamp: "Last modified 2023-08-16 17:04:42 mluebke" -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") ################################################################# @@ -29,11 +29,7 @@ init_cell <- list( grid <- list( n_cells = c(n, m), s_cells = c(1, 1), - type = types[1], - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = types[1] ) @@ -44,8 +40,8 @@ grid <- list( ## initial conditions init_diffu <- list( - "H" = 0.000211313883539788, - "O" = 0.00398302904424952, + "H" = 110.683, + "O" = 55.3413, "Charge" = -5.0822e-19, "C(4)" = 1.2279E-4, "Ca" = 1.2279E-4, @@ -66,34 +62,34 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( - list( - "H" = 0.0001540445, - "O" = 0.002148006, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ), - list( - "H" = 0.0001610193, - "O" = 0.002386934, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0.0, - "Cl" = 0.004, - "Mg" = 0.002 - ) + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ) ) vecinj_inner <- list( - l1 = c(1,20,20), - l2 = c(2,80,80), - l3 = c(2,60,80) + l1 = c(1, 20, 20), + l2 = c(2, 80, 80), + l3 = c(2, 60, 80) ) boundary <- list( -# "N" = c(1, rep(0, n-1)), + # "N" = c(1, rep(0, n-1)), "N" = rep(0, n), "E" = rep(0, n), "S" = rep(0, n), @@ -120,16 +116,59 @@ diffusion <- list( ## # Needed when using DHT -dht_species <- c("H", "O", "Charge", "C(4)", "Ca", "Cl", "Mg", "Calcite", - "Dolomite") -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5, 5, 5) +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" = 5 +) + +check_sign_cal_dol_dht <- function(old, new) { + if ((old["Calcite"] == 0) != (new["Calcite"] == 0)) { + return(TRUE) + } + if ((old["Dolomite"] == 0) != (new["Dolomite"] == 0)) { + return(TRUE) + } + return(FALSE) +} + +fuzz_input_dht_keys <- function(input) { + return(input[names(dht_species)]) +} + +check_sign_cal_dol_interp <- function(to_interp, data_set) { + data_set <- as.data.frame(do.call(rbind, data_set), check.names = FALSE, optional = TRUE) + names(data_set) <- names(dht_species) + cal <- (data_set$Calcite == 0) == (to_interp["Calcite"] == 0) + dol <- (data_set$Dolomite == 0) == (to_interp["Dolomite"] == 0) + + cal_dol_same_sig <- cal == dol + return(rev(which(!cal_dol_same_sig))) +} + +check_neg_cal_dol <- function(result) { + neg_sign <- (result["Calcite"] <- 0) || (result["Dolomite"] < 0) + return(any(neg_sign)) +} + +hooks <- list( + dht_fill = check_sign_cal_dol_dht, + dht_fuzz = fuzz_input_dht_keys, + interp_pre_func = check_sign_cal_dol_interp, + interp_post_func = check_neg_cal_dol +) chemistry <- list( database = database, input_script = input_script, dht_species = dht_species, - dht_signif = dht_signif + hooks = hooks ) ################################################################# diff --git a/bench/dolo_diffu_inner/dolo_diffu_inner_large.R b/bench/dolo/dolo_diffu_inner_large.R similarity index 58% rename from bench/dolo_diffu_inner/dolo_diffu_inner_large.R rename to bench/dolo/dolo_diffu_inner_large.R index 69b528ce3..c9c70c6a4 100644 --- a/bench/dolo_diffu_inner/dolo_diffu_inner_large.R +++ b/bench/dolo/dolo_diffu_inner_large.R @@ -1,6 +1,6 @@ -## Time-stamp: "Last modified 2023-04-24 15:43:26 mluebke" +## Time-stamp: "Last modified 2023-08-16 17:05:04 mluebke" -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") ################################################################# @@ -28,12 +28,8 @@ init_cell <- list( grid <- list( n_cells = c(n, m), - s_cells = c(2, 1), - type = types[1], - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + s_cells = c(20, 10), + type = types[1] ) @@ -44,8 +40,8 @@ grid <- list( ## initial conditions init_diffu <- list( - "H" = 0.000211313883539788, - "O" = 0.00398302904424952, + "H" = 110.683, + "O" = 55.3413, "Charge" = -5.0822e-19, "C(4)" = 1.2279E-4, "Ca" = 1.2279E-4, @@ -66,34 +62,34 @@ alpha_diffu <- c( ## list of boundary conditions/inner nodes vecinj_diffu <- list( - list( - "H" = 0.0001540445, - "O" = 0.002148006, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ), - list( - "H" = 0.0001610193, - "O" = 0.002386934, - "Charge" = 1.90431e-16, - "C(4)" = 0, - "Ca" = 0.0, - "Cl" = 0.004, - "Mg" = 0.002 - ) + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ) ) vecinj_inner <- list( - l1 = c(1,400,200), - l2 = c(2,1400,800), - l3 = c(2,1600,800) + l1 = c(1, 400, 200), + l2 = c(2, 1400, 800), + l3 = c(2, 1600, 800) ) boundary <- list( -# "N" = c(1, rep(0, n-1)), + # "N" = c(1, rep(0, n-1)), "N" = rep(0, n), "E" = rep(0, m), "S" = rep(0, n), @@ -118,18 +114,60 @@ diffusion <- list( ## Chemistry module (Phreeqc) ## ################################################################# - ## # Needed when using DHT -dht_species <- c("H", "O", "Charge", "C(4)", "Ca", "Cl", "Mg", "Calcite", - "Dolomite") -#dht_signif <- rep(15, length(dht_species)) -dht_signif <- c(10, 10, 3, 5, 5, 5, 5, 5, 5) +dht_species <- c( + "H" = 10, + "O" = 10, + "Charge" = 3, + "C(4)" = 5, + "Ca" = 5, + "Cl" = 5, + "Mg" = 5, + "Calcite" = 5, + "Dolomite" = 5 +) + +check_sign_cal_dol_dht <- function(old, new) { + if ((old["Calcite"] == 0) != (new["Calcite"] == 0)) { + return(TRUE) + } + if ((old["Dolomite"] == 0) != (new["Dolomite"] == 0)) { + return(TRUE) + } + return(FALSE) +} + +fuzz_input_dht_keys <- function(input) { + return(input[names(dht_species)]) +} + +check_sign_cal_dol_interp <- function(to_interp, data_set) { + data_set <- as.data.frame(do.call(rbind, data_set), check.names = FALSE, optional = TRUE) + names(data_set) <- names(dht_species) + cal <- (data_set$Calcite == 0) == (to_interp["Calcite"] == 0) + dol <- (data_set$Dolomite == 0) == (to_interp["Dolomite"] == 0) + + cal_dol_same_sig <- cal == dol + return(rev(which(!cal_dol_same_sig))) +} + +check_neg_cal_dol <- function(result) { + neg_sign <- (result["Calcite"] <- 0) || (result["Dolomite"] < 0) + return(any(neg_sign)) +} + +hooks <- list( + dht_fill = check_sign_cal_dol_dht, + dht_fuzz = fuzz_input_dht_keys, + interp_pre_func = check_sign_cal_dol_interp, + interp_post_func = check_neg_cal_dol +) chemistry <- list( database = database, input_script = input_script, dht_species = dht_species, - dht_signif = dht_signif + hooks = hooks ) ################################################################# @@ -148,5 +186,5 @@ setup <- list( iterations = iterations, timesteps = rep(dt, iterations), store_result = TRUE, - out_save = seq(5, iterations, by=5) + out_save = seq(5, iterations, by = 5) ) diff --git a/bench/dolo_diffu_inner/dolo_inner.pqi b/bench/dolo/dolo_inner.pqi similarity index 100% rename from bench/dolo_diffu_inner/dolo_inner.pqi rename to bench/dolo/dolo_inner.pqi diff --git a/bench/dolo/dolo_interp_long.R b/bench/dolo/dolo_interp_long.R new file mode 100644 index 000000000..7acca4298 --- /dev/null +++ b/bench/dolo/dolo_interp_long.R @@ -0,0 +1,204 @@ +## Time-stamp: "Last modified 2023-08-16 14:57:25 mluebke" + +database <- normalizePath("../share/poet/bench/dolo/phreeqc_kin.dat") +input_script <- normalizePath("../share/poet/bench/dolo/dolo_inner.pqi") + +################################################################# +## Section 1 ## +## Grid initialization ## +################################################################# + +n <- 400 +m <- 200 + +types <- c("scratch", "phreeqc", "rds") + +init_cell <- list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = -5.0822e-19, + "C" = 1.2279E-4, + "Ca" = 1.2279E-4, + "Cl" = 0, + "Mg" = 0, + "O2g" = 0.499957, + "Calcite" = 2.07e-4, + "Dolomite" = 0 +) + +grid <- list( + n_cells = c(n, m), + s_cells = c(5, 2.5), + type = types[1] +) + + +################################################################## +## Section 2 ## +## Diffusion parameters and boundary conditions ## +################################################################## + +## initial conditions +init_diffu <- list( + "H" = 1.110124E+02, + "O" = 5.550833E+01, + "Charge" = -1.216307659761E-09, + "C(4)" = 1.230067028174E-04, + "Ca" = 1.230067028174E-04, + "Cl" = 0, + "Mg" = 0 +) + +## diffusion coefficients +alpha_diffu <- c( + "H" = 1E-6, + "O" = 1E-6, + "Charge" = 1E-6, + "C(4)" = 1E-6, + "Ca" = 1E-6, + "Cl" = 1E-6, + "Mg" = 1E-6 +) + +## list of boundary conditions/inner nodes +vecinj_diffu <- list( + list( + "H" = 1.110124E+02, + "O" = 5.550796E+01, + "Charge" = -3.230390327801E-08, + "C(4)" = 0, + "Ca" = 0, + "Cl" = 0.002, + "Mg" = 0.001 + ), + list( + "H" = 110.683, + "O" = 55.3413, + "Charge" = 1.90431e-16, + "C(4)" = 0, + "Ca" = 0.0, + "Cl" = 0.004, + "Mg" = 0.002 + ), + init_diffu +) + +vecinj_inner <- list( + l1 = c(1, floor(n / 2), floor(m / 2)) + # l2 = c(2,1400,800), + # l3 = c(2,1600,800) +) + +boundary <- list( + # "N" = c(1, rep(0, n-1)), + "N" = rep(3, n), + "E" = rep(3, m), + "S" = rep(3, n), + "W" = rep(3, m) +) + +diffu_list <- names(alpha_diffu) + +vecinj <- do.call(rbind.data.frame, vecinj_diffu) +names(vecinj) <- names(init_diffu) + +diffusion <- list( + init = as.data.frame(init_diffu, check.names = FALSE), + vecinj = vecinj, + vecinj_inner = vecinj_inner, + vecinj_index = boundary, + alpha = alpha_diffu +) + +################################################################# +## Section 3 ## +## Chemistry module (Phreeqc) ## +################################################################# + + +## # optional when using DHT +dht_species <- c( + "H" = 3, + "O" = 3, + "Charge" = 3, + "C(4)" = 6, + "Ca" = 6, + "Cl" = 3, + "Mg" = 5, + "Calcite" = 4, + "Dolomite" = 4 +) + +## # Optional when using Interpolation (example with less key species and custom +## # significant digits) + +# pht_species <- c( +# "C(4)" = 3, +# "Ca" = 3, +# "Mg" = 2, +# "Calcite" = 2, +# "Dolomite" = 2 +# ) + +check_sign_cal_dol_dht <- function(old, new) { + if ((old["Calcite"] == 0) != (new["Calcite"] == 0)) { + return(TRUE) + } + if ((old["Dolomite"] == 0) != (new["Dolomite"] == 0)) { + return(TRUE) + } + return(FALSE) +} + +fuzz_input_dht_keys <- function(input) { + return(input[names(dht_species)]) +} + +check_sign_cal_dol_interp <- function(to_interp, data_set) { + data_set <- as.data.frame(do.call(rbind, data_set), check.names = FALSE, optional = TRUE) + names(data_set) <- names(dht_species) + cal <- (data_set$Calcite == 0) == (to_interp["Calcite"] == 0) + dol <- (data_set$Dolomite == 0) == (to_interp["Dolomite"] == 0) + + cal_dol_same_sig <- cal == dol + return(rev(which(!cal_dol_same_sig))) +} + +check_neg_cal_dol <- function(result) { + neg_sign <- (result["Calcite"] < 0) || (result["Dolomite"] < 0) + return(neg_sign) +} + +hooks <- list( + dht_fill = check_sign_cal_dol_dht, + dht_fuzz = fuzz_input_dht_keys, + interp_pre_func = check_sign_cal_dol_interp, + interp_post_func = check_neg_cal_dol +) + +chemistry <- list( + database = database, + input_script = input_script, + dht_species = dht_species, + hooks = hooks + # pht_species = pht_species +) + +################################################################# +## Section 4 ## +## Putting all those things together ## +################################################################# + + +iterations <- 20000 +dt <- 200 + +setup <- list( + grid = grid, + diffusion = diffusion, + chemistry = chemistry, + iterations = iterations, + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = c(1, seq(50, iterations, by = 50)) +) diff --git a/data/phreeqc_kin.dat b/bench/dolo/phreeqc_kin.dat similarity index 99% rename from data/phreeqc_kin.dat rename to bench/dolo/phreeqc_kin.dat index 8c1b671aa..01d5bf516 100644 --- a/data/phreeqc_kin.dat +++ b/bench/dolo/phreeqc_kin.dat @@ -2,7 +2,7 @@ ### SURFACE and with the RATES for Calcite and Dolomite to use with ### RedModRphree -### Time-stamp: "Last modified 2018-05-06 14:36:23 delucia" +### Time-stamp: "Last modified 2023-05-23 10:35:56 mluebke" # PHREEQC.DAT for calculating pressure dependence of reactions, with # molal volumina of aqueous species and of minerals, and @@ -1276,9 +1276,9 @@ Calcite 110 logK25=-5.81 120 mech_c=(10^logK25)*(e^(-Ea/R*deltaT)) 130 rate=mech_a+mech_c - ## 140 IF SI("Calcite")<0 then moles=parm(1)*rate*(1-SR("Calcite")) # dissolution + 140 IF (SI("Calcite")<0 AND M>0) then moles=parm(1)*rate*(1-SR("Calcite")) # dissolution ## 145 IF SI("Calcite")>0 then moles=parm(1)*M*rate*(-1+SR("Calcite")) # precipitation - 150 moles=parm(1)*rate*(1-SR("Calcite")) # precipitation + ## 150 moles=parm(1)*rate*(1-SR("Calcite")) # precipitation 200 save moles*time -end diff --git a/bench/surfex/README.org b/bench/surfex/README.org new file mode 100644 index 000000000..b765f4ef5 --- /dev/null +++ b/bench/surfex/README.org @@ -0,0 +1,100 @@ +#+TITLE: Description of =surfex= benchmark +#+AUTHOR: MDL +#+DATE: 2023-08-26 +#+STARTUP: inlineimages +#+LATEX_CLASS_OPTIONS: [a4paper,9pt] +#+LATEX_HEADER: \usepackage{fullpage} +#+LATEX_HEADER: \usepackage{amsmath, systeme} +#+LATEX_HEADER: \usepackage{graphicx} +#+LATEX_HEADER: \usepackage{charter} +#+OPTIONS: toc:nil + +* Quick start + +#+begin_src sh :language sh :frame single +mpirun -np 4 ./poet ex.R ex_res +mpirun -np 4 ./poet surfex.R surfex_res +#+end_src + +* List of Files +- =ex.R=: POET input script for a 100x100 simulation grid, only + exchange +- =ExBase.pqi=: PHREEQC input script for the =ex.R= model +- =surfex.R=: POET input script for a 100x100 simulation grid + considering both cation exchange and surface complexation +- =SurfExBase.pqi=: PHREEQC input script for the =surfex.R= model +- =SMILE_2021_11_01_TH.dat=: PHREEQC database containing the + parametrized data for Surface and Exchange, based on the SMILE + Thermodynamic Database (Version 01-November-2021) + +* Chemical system + +This model describes migration of Uranium radionuclide in Opalinus +clay subject to surface complexation and cation exchange on the +surface of clay minerals. These two processes account for the binding +of aqueous complexes to the surfaces of minerals, which may have a +significant impact on safety of underground nuclear waste repository. +Namely, they can act as retardation buffer for uranium complexes +entering into a natural system. The system is kindly provided by Dr. +T. Hennig and is inspired to the sandy facies BWS-A3 sample from the +Mont Terri underground lab (Hennig and Kühn, 2021). + +This chemical system is highly redox-sensitive, and several elements +are defined in significant amounts in different valence states. In +total, 20 elemental concentrations and valences are transported: +C(-4), C(4), Ca, Cl, Fe(2), Fe(3), K, Mg, Na, S(-2), S(2), S(4), S(6), +Sr , U(4), U(5), U(6); plus the total H, total O and Charge implicitly +required by PHREEQC_RM. + +** Exchange + +The SMILE database defines thermodynamical data for exchange of all +major cations and uranyl-ions on Illite and Montmorillonite. In +PHREEQC terms: +- *Y* for Montmorillonite, with a total amount of 1.2585 + milliequivalents and +- *Z* for Illite, with a total amount of 0.9418 meq + +** Surface + +Here we consider a Donnan diffuse double layer of 0.49 nm. Six +distinct sorption sites are defined: +- Kln_aOH (aluminol site) and Kln_siOH (silanol) for Kaolinite +- For Illite, strong and weak sites Ill_sOH and Ill_wOH respectively +- For Montmorillonite, strong and weak sites Mll_sOH and Mll_wOH + respectively + +Refer to the =SurfExBase.pqi= script for the actual numerical values +of the parameters. + +* POET simulations + +** =ex.R= + +This benchmark only considers EXCHANGE, no mineral or SURFACE +complexation is involved. + +- Grid discretization: square domain of 1 \cdot 1 m^{2} discretized in + 100x100 cells +- Boundary conditions: E, S and W sides of the domain are closed. + *Fixed concentrations* are fixed at the N boundary. +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 10 iterations with \Delta t of 200 s +- *DHT* is not implemented as of yet for models including SURFACE and + EXCHANGE geochemical processes *TODO* +- Hooks: no hooks defined *TODO* + +** =surfex.R= + +- Grid discretization: rectangular domain of 1 \cdot 1 m^{2} + discretized in 10 \times 10 cells +- Boundary conditions: E, S and W sides of the domain are closed. + *Fixed concentrations* are fixed at the N boundary. +- Diffusion coefficients: isotropic homogeneous \alpha = 1E-06 +- Time steps & iterations: 10 iterations with \Delta t of 200 s + +* References + +- Hennig, T.; Kühn, M.Surrogate Model for Multi-Component Diffusion of + Uranium through Opalinus Clay on the Host Rock Scale. Appl. Sci. + 2021, 11, 786. https://doi.org/10.3390/app11020786 diff --git a/bench/surfex/ex.R b/bench/surfex/ex.R index 299c5db0c..13feea1d8 100644 --- a/bench/surfex/ex.R +++ b/bench/surfex/ex.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-04-17 12:29:27 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:59:35 mluebke" database <- normalizePath("./SMILE_2021_11_01_TH.dat") input_script <- normalizePath("./ExBase.pqi") @@ -40,11 +40,7 @@ init_cell <- list(H = 1.476571028625e-01, grid <- list( n_cells = c(n, m), s_cells = c(1, 1), - type = "scratch", - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + type = "scratch" ) diff --git a/bench/surfex/surfex.R b/bench/surfex/surfex.R index 37a5e0429..27409b97f 100644 --- a/bench/surfex/surfex.R +++ b/bench/surfex/surfex.R @@ -1,4 +1,4 @@ -## Time-stamp: "Last modified 2023-04-17 15:48:21 mluebke" +## Time-stamp: "Last modified 2023-08-02 13:59:44 mluebke" database <- normalizePath("../share/poet/bench/surfex/SMILE_2021_11_01_TH.dat") input_script <- normalizePath("../share/poet/bench/surfex/SurfExBase.pqi") @@ -10,8 +10,8 @@ cat(paste(":: R This is a test 1\n")) ## Grid initialization ## ################################################################# -n <- 10 -m <- 10 +n <- 1000 +m <- 1000 types <- c("scratch", "phreeqc", "rds") @@ -39,12 +39,8 @@ init_cell <- list(H = 1.476571028625e-01, grid <- list( n_cells = c(n, m), - s_cells = c(1, 1), - type = "scratch", - init_cell = as.data.frame(init_cell, check.names = FALSE), - props = names(init_cell), - database = database, - input_script = input_script + s_cells = c(n/10, m/10), + type = "scratch" ) @@ -131,7 +127,7 @@ chemistry <- list( ################################################################# -iterations <- 10 +iterations <- 100 dt <- 200 setup <- list( diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt deleted file mode 100644 index 664e4ac45..000000000 --- a/data/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -install( -FILES - SimDol2D_diffu.R - SimDol1D_diffu.R - phreeqc_kin.dat - dol.pqi -DESTINATION - share/poet/examples) diff --git a/data/SimDol1D_diffu.R b/data/SimDol1D_diffu.R deleted file mode 100644 index 6e74e2ccf..000000000 --- a/data/SimDol1D_diffu.R +++ /dev/null @@ -1,180 +0,0 @@ -################################################################# -## Section 1 ## -## Grid initialization ## -################################################################# - -n <- 5 -m <- 5 - -types <- c("scratch", "phreeqc", "rds") - -# initsim <- c("SELECTED_OUTPUT", "-high_precision true", -# "-reset false", -# "USER_PUNCH", -# "-head C Ca Cl Mg pH pe O2g Calcite Dolomite", -# "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")", -# "SOLUTION 1", -# "units mol/kgw", -# "temp 25.0", "water 1", -# "pH 9.91 charge", -# "pe 4.0", -# "C 1.2279E-04", -# "Ca 1.2279E-04", -# "Cl 1E-12", -# "Mg 1E-12", -# "PURE 1", -# "O2g -0.6788 10.0", -# "Calcite 0.0 2.07E-3", -# "Dolomite 0.0 0.0", -# "END") - -# needed if init type is set to "scratch" -# prop <- c("C", "Ca", "Cl", "Mg", "pH", "pe", "O2g", "Calcite", "Dolomite") - -init_cell <- list( - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "pH" = 9.91, - "pe" = 4, - "O2g" = 10, - "Calcite" = 2.07e-4, - "Dolomite" = 0 -) - -grid <- list( - n_cells = n, - s_cells = n, - type = types[1], - init_cell = as.data.frame(init_cell), - props = names(init_cell), - init_script = NULL -) - - -################################################################## -## Section 2 ## -## Diffusion parameters and boundary conditions ## -################################################################## - -init_diffu <- c( - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "pe" = 4, - "pH" = 7 -) - -alpha_diffu <- c( - "C" = 1E-4, - "Ca" = 1E-4, - "Cl" = 1E-4, - "Mg" = 1E-4, - "pe" = 1E-4, - "pH" = 1E-4 -) - -vecinj_diffu <- list( - list( - "C" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001, - "pe" = 4, - "pH" = 9.91 - ) -) - -boundary <- list( - "E" = 0, - "W" = 1 -) - -diffu_list <- names(alpha_diffu) - -diffusion <- list( - init = init_diffu, - vecinj = do.call(rbind.data.frame, vecinj_diffu), - vecinj_index = boundary, - alpha = alpha_diffu -) - -################################################################## -## Section 3 ## -## Phreeqc simulation ## -################################################################## - -db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", - package = "RedModRphree" -), is.db = TRUE) - -phreeqc::phrLoadDatabaseString(db) - -# NOTE: This won't be needed in the future either. Could also be done in a. pqi -# file -base <- c( - "SOLUTION 1", - "units mol/kgw", - "temp 25.0", - "water 1", - "pH 9.91 charge", - "pe 4.0", - "C 1.2279E-04", - "Ca 1.2279E-04", - "Mg 0.001", - "Cl 0.002", - "PURE 1", - "O2g -0.1675 10", - "KINETICS 1", - "-steps 100", - "-step_divide 100", - "-bad_step_max 2000", - "Calcite", "-m 0.000207", - "-parms 0.0032", - "Dolomite", - "-m 0.0", - "-parms 0.00032", - "END" -) - -selout <- c( - "SELECTED_OUTPUT", "-high_precision true", "-reset false", - "-time", "-soln", "-temperature true", "-water true", - "-pH", "-pe", "-totals C Ca Cl Mg", - "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g" -) - -# Needed when using DHT -signif_vector <- c(7, 7, 7, 7, 7, 7, 7, 5, 5) -prop_type <- c("act", "act", "act", "act", "logact", "logact", "ignore", "act", "act") -prop <- names(init_cell) - -iterations <- 500 - -setup <- list( - # bound = myboundmat, - base = base, - first = selout, - # initsim = initsim, - # Cf = 1, - grid = grid, - diffusion = diffusion, - prop = prop, - immobile = c(7, 8, 9), - kin = c(8, 9), - ann = list(O2g = -0.1675), - # phase = "FLUX1", - # density = "DEN1", - reduce = FALSE, - # snapshots = demodir, ## directory where we will read MUFITS SUM files - # gridfile = paste0(demodir, "/d2ascii.run.GRID.SUM") - # init = init, - # vecinj = vecinj, - # cinj = c(0,1), - # boundary = boundary, - # injections = FALSE, - iterations = iterations, - timesteps = rep(10, iterations) -) diff --git a/data/SimDol2D_diffu.R b/data/SimDol2D_diffu.R deleted file mode 100644 index df965bcc6..000000000 --- a/data/SimDol2D_diffu.R +++ /dev/null @@ -1,213 +0,0 @@ -database <- normalizePath("../share/poet/examples/phreeqc_kin.dat") -input_script <- normalizePath("../share/poet/examples/dol.pqi") - -################################################################# -## Section 1 ## -## Grid initialization ## -################################################################# - -n <- 100 -m <- 100 - -types <- c("scratch", "phreeqc", "rds") - -# initsim <- c("SELECTED_OUTPUT", "-high_precision true", -# "-reset false", -# "USER_PUNCH", -# "-head C Ca Cl Mg pH pe O2g Calcite Dolomite", -# "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")", -# "SOLUTION 1", -# "units mol/kgw", -# "temp 25.0", "water 1", -# "pH 9.91 charge", -# "pe 4.0", -# "C 1.2279E-04", -# "Ca 1.2279E-04", -# "Cl 1E-12", -# "Mg 1E-12", -# "PURE 1", -# "O2g -0.6788 10.0", -# "Calcite 0.0 2.07E-3", -# "Dolomite 0.0 0.0", -# "END") - -# needed if init type is set to "scratch" -# prop <- c("C", "Ca", "Cl", "Mg", "pH", "pe", "O2g", "Calcite", "Dolomite") - -init_cell <- list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = -5.0822e-19, - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0, - "O2g" = 0.499957, - "Calcite" = 2.07e-4, - "Dolomite" = 0 -) - -grid <- list( - n_cells = c(n, m), - s_cells = c(n, m), - type = types[1], - init_cell = as.data.frame(init_cell), - props = names(init_cell), - database = database, - input_script = input_script -) - - -################################################################## -## Section 2 ## -## Diffusion parameters and boundary conditions ## -################################################################## - -init_diffu <- c( - "H" = 110.683, - "O" = 55.3413, - "Charge" = -5.0822e-19, - "C" = 1.2279E-4, - "Ca" = 1.2279E-4, - "Cl" = 0, - "Mg" = 0 -) - -alpha_diffu <- c( - "H" = 1E-4, - "O" = 1E-4, - "Charge" = 1E-4, - "C" = 1E-4, - "Ca" = 1E-4, - "Cl" = 1E-4, - "Mg" = 1E-4 -) - -vecinj_diffu <- list( - list( - "H" = 110.683, - "O" = 55.3413, - "Charge" = 1.90431e-16, - "C" = 0, - "Ca" = 0, - "Cl" = 0.002, - "Mg" = 0.001 - ) -) - -#inner_index <- c(5, 15, 25) -#inner_vecinj_index <- rep(1, 3) -# -#vecinj_inner <- cbind(inner_index, inner_vecinj_index) -vecinj_inner <- list( - l1 = c(1,2,2) -) - - -boundary <- list( - "N" = rep(0, n), - "E" = rep(0, n), - "S" = rep(0, n), - "W" = rep(0, n) -) - -diffu_list <- names(alpha_diffu) - -diffusion <- list( - init = init_diffu, - vecinj = do.call(rbind.data.frame, vecinj_diffu), - vecinj_inner = vecinj_inner, - vecinj_index = boundary, - alpha = alpha_diffu -) - -################################################################# -## Section 3 ## -## Chemitry module (Phreeqc) ## -################################################################# - -# db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat", -# package = "RedModRphree" -# ), is.db = TRUE) -# -# phreeqc::phrLoadDatabaseString(db) - -# NOTE: This won't be needed in the future either. Could also be done in a. pqi -# file -base <- c( - "SOLUTION 1", - "units mol/kgw", - "temp 25.0", - "water 1", - "pH 9.91 charge", - "pe 4.0", - "C 1.2279E-04", - "Ca 1.2279E-04", - "Mg 0.001", - "Cl 0.002", - "PURE 1", - "O2g -0.1675 10", - "KINETICS 1", - "-steps 100", - "-step_divide 100", - "-bad_step_max 2000", - "Calcite", "-m 0.000207", - "-parms 0.0032", - "Dolomite", - "-m 0.0", - "-parms 0.00032", - "END" -) - -selout <- c( - "SELECTED_OUTPUT", "-high_precision true", "-reset false", - "-time", "-soln", "-temperature true", "-water true", - "-pH", "-pe", "-totals C Ca Cl Mg", - "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g" -) - - -# Needed when using DHT -signif_vector <- c(10, 10, 10, 7, 7, 7, 7, 0, 5, 5) -prop_type <- c("", "", "", "act", "act", "act", "act", "ignore", "", "") -prop <- names(init_cell) - -chemistry <- list( - database = database, - input_script = input_script -) - -################################################################# -## Section 4 ## -## Putting all those things together ## -################################################################# - - -iterations <- 10 - -setup <- list( - # bound = myboundmat, - base = base, - first = selout, - # initsim = initsim, - # Cf = 1, - grid = grid, - diffusion = diffusion, - chemistry = chemistry, - prop = prop, - immobile = c(7, 8, 9), - kin = c(8, 9), - ann = list(O2g = -0.1675), - # phase = "FLUX1", - # density = "DEN1", - reduce = FALSE, - # snapshots = demodir, ## directory where we will read MUFITS SUM files - # gridfile = paste0(demodir, "/d2ascii.run.GRID.SUM") - # init = init, - # vecinj = vecinj, - # cinj = c(0,1), - # boundary = boundary, - # injections = FALSE, - iterations = iterations, - timesteps = rep(10, iterations) -) diff --git a/data/SimDol2D_diffu.yaml b/data/SimDol2D_diffu.yaml deleted file mode 100644 index 84c04dd29..000000000 --- a/data/SimDol2D_diffu.yaml +++ /dev/null @@ -1,13 +0,0 @@ -phreeqc: - RebalanceByCell: True - RebalanceFraction: 0.5 - SolutionDensityVolume: False - PartitionUZSolids: False - Units: - Solution: "mg/L" - PPassamblage: "mol/L" - Exchange: "mol/L" - Surface: "mol/L" - GasPhase: "mol/L" - SSassemblage: "mol/L" - Kinetics: "mol/L" diff --git a/docs/20221216_Scheme_PORT_en.svg b/docs/20221216_Scheme_PORT_en.svg deleted file mode 100644 index d5c0c2208..000000000 --- a/docs/20221216_Scheme_PORT_en.svg +++ /dev/null @@ -1,572 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TUG - - - - - - - - - - - In DHT?Next Iteration - NoYes - - - - - Store in DHTChemistry - - - - - RetrieveResult - - - - - - Flow - - - - - Transport - - - - - PHREEQC - - AI Surrogate - - - - - - Accurate? - - - - - - - - - Yes - No - - Update - PORT - - - - - - - - diff --git a/docs/20230720_Scheme_POET_en.svg b/docs/20230720_Scheme_POET_en.svg index 89e339751..dd29d9b9e 100644 --- a/docs/20230720_Scheme_POET_en.svg +++ b/docs/20230720_Scheme_POET_en.svg @@ -1,568 +1,637 @@ + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TUG - - - - - - - - - - - In DHT?Next Iteration - NoYes - - - - - Store in DHTChemistry - - - - - RetrieveResult - - - - - - Flow - - - - - Transport - - - - - PHREEQC - - AI Surrogate - - - - - - Accurate? - - - - - - - - - Yes - No - - Update - POET - tugFlowTransportPOET - - - - - - - + transform="matrix(1.0545,0,0,1,0.0745073,-0.0682484)" + id="g182">ChemistryIn DHT?RetrieveResultPHREEQCStore in DHTAI SurrogateAccurate?YesNoUpdateNoYesWorkerDistribute WorkGather ResultsMasterNext Iteration diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 8b4303dfa..4781c2b87 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -12,7 +12,7 @@ if(DOXYGEN_FOUND) set(DOXYGEN_PROJECT_NUMBER ${POET_VERSION}) doxygen_add_docs(doxygen - ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/README.md ${PROJECT_SOURCE_DIR}/docs/Input_Scripts.md ${PROJECT_SOURCE_DIR}/docs/Output.md diff --git a/docs/Input_Scripts.md b/docs/Input_Scripts.md index 305ac554f..1c03de749 100644 --- a/docs/Input_Scripts.md +++ b/docs/Input_Scripts.md @@ -13,10 +13,6 @@ by POET. | `n_cells` | Numeric Vector | Number of cells in each direction | | `s_cells` | Numeric Vector | Spatial resolution of grid in each direction | | `type` | String | Type of initialization, can be set to *scratch*, *phreeqc* or *rds* | -| `init_cell` | Data Frame | Containing all exactly one value per species to initialize the field. | -| `props` | String Vector | Names of all species | -| `database` | String | Path to Phreeqc database | -| `input_script` | String | Path to Phreeqc initial script | ## Diffusion parameters @@ -70,10 +66,12 @@ vecinj_index <- list( ## Chemistry parameters -| name | type | description | -|----------------|--------|-----------------------------------| -| `database` | String | Path to the Phreeqc database | -| `input_script` | String | Path the the Phreeqc input script | +| name | type | description | +|----------------|--------------|----------------------------------------------------------------------------------| +| `database` | String | Path to the Phreeqc database | +| `input_script` | String | Path the the Phreeqc input script | +| `dht_species` | Named Vector | Indicates significant digits to use for each species for DHT rounding. | +| `pht_species` | Named Vector | Indicates significant digits to use for each species for Interpolation rounding. | ## Final setup @@ -86,10 +84,3 @@ vecinj_index <- list( | `timesteps` | Numeric Vector | $\Delta t$ to use for specific iteration | | `store_result` | Boolean | Indicates if results should be stored | | `out_save` | Numeric Vector | *optional:* At which iteration the states should be stored | - -### DHT setup - -| name | type | description | -|-----------------|----------------|---------------------------------------------------------------------------------| -| `signif_vector` | Numeric Vector | Indicates significant digits to use for DHT rounding. Order of `props` vector. | -| `prop_type` | String Vector | Set type of species for rounding, can be left blank or set to *act* or *ignore* | diff --git a/ext/doctest b/ext/doctest index 8fdfd113d..ae7a13539 160000 --- a/ext/doctest +++ b/ext/doctest @@ -1 +1 @@ -Subproject commit 8fdfd113dcb4ad1a31705ff8ddb13a21a505bad8 +Subproject commit ae7a13539fb71f270b87eb2e874fbac80bc8dda2 diff --git a/ext/phreeqcrm b/ext/phreeqcrm index ba5dc40af..6ed14c353 160000 --- a/ext/phreeqcrm +++ b/ext/phreeqcrm @@ -1 +1 @@ -Subproject commit ba5dc40af119da015d36d2554ecd558ef9da1eb6 +Subproject commit 6ed14c35322a245e3a9776ef262c0ac0eba3b301 diff --git a/include/poet/DHT_Types.hpp b/include/poet/DHT_Types.hpp deleted file mode 100644 index 15eb9913e..000000000 --- a/include/poet/DHT_Types.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef DHT_TYPES_H_ -#define DHT_TYPES_H_ - -namespace poet { -enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_TOTAL }; -} - -#endif // DHT_TYPES_H_ diff --git a/include/poet/RInsidePOET.hpp b/include/poet/RInsidePOET.hpp deleted file mode 100644 index a1d2b8306..000000000 --- a/include/poet/RInsidePOET.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef RPOET_H_ -#define RPOET_H_ - -#include - -class RInsidePOET : public RInside { -public: - static RInsidePOET &getInstance() { - static RInsidePOET instance; - - return instance; - } - - RInsidePOET(RInsidePOET const &) = delete; - void operator=(RInsidePOET const &) = delete; - -private: - RInsidePOET() : RInside(){}; -}; - -#endif // RPOET_H_ diff --git a/src/Grid.cpp b/src/Base/Grid.cpp similarity index 99% rename from src/Grid.cpp rename to src/Base/Grid.cpp index 64561b5ae..e51b114ba 100644 --- a/src/Grid.cpp +++ b/src/Base/Grid.cpp @@ -18,7 +18,9 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/SimParams.hpp" +#include "Grid.hpp" +#include "SimParams.hpp" + #include #include #include @@ -26,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/include/poet/Grid.hpp b/src/Base/Grid.hpp similarity index 99% rename from include/poet/Grid.hpp rename to src/Base/Grid.hpp index 2ea2068b9..deec5c04a 100644 --- a/include/poet/Grid.hpp +++ b/src/Base/Grid.hpp @@ -21,7 +21,8 @@ #ifndef GRID_H #define GRID_H -#include "poet/SimParams.hpp" +#include "SimParams.hpp" + #include #include #include diff --git a/src/Base/Macros.hpp b/src/Base/Macros.hpp new file mode 100644 index 000000000..47beeeae3 --- /dev/null +++ b/src/Base/Macros.hpp @@ -0,0 +1,17 @@ +// Time-stamp: "Last modified 2023-08-09 14:16:04 mluebke" +#ifndef MACROS_H +#define MACROS_H + +#include +#include + +// Prepend "msg" with name of calling function +#define MSG(msg) std::cout << "CPP: " << __func__ << ": " << std::string(msg) << std::endl; + +#define MSG_NOENDL(msg) std::cout << "CPP: " << __func__ << ": " << std::string(msg); + +#define ERRMSG(msg) std::cerr << "CPP_ERROR: " << __func__ << ": " << std::string(msg) << std::endl; + +#define BOOL_PRINT(bool) std::string(bool ? "ON" : "OFF") + +#endif // MACROS_H diff --git a/src/Base/RInsidePOET.hpp b/src/Base/RInsidePOET.hpp new file mode 100644 index 000000000..6467e5760 --- /dev/null +++ b/src/Base/RInsidePOET.hpp @@ -0,0 +1,59 @@ +#ifndef RPOET_H_ +#define RPOET_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class RInsidePOET : public RInside { +public: + static RInsidePOET &getInstance() { + static RInsidePOET instance; + + return instance; + } + + RInsidePOET(RInsidePOET const &) = delete; + void operator=(RInsidePOET const &) = delete; + + inline bool checkIfExists(const std::string &R_name, + const std::string &where) { + return Rcpp::as( + this->parseEval("'" + R_name + "' %in% names(" + where + ")")); + } + +private: + RInsidePOET() : RInside(){}; +}; + +template class RHookFunction { +public: + RHookFunction() {} + RHookFunction(RInside &R, const std::string &f_name) { + try { + this->func = Rcpp::Function(Rcpp::as(R.parseEval(f_name.c_str()))); + } catch (const std::exception &e) { + } + } + + template T operator()(Args... args) const { + if (func.has_value()) { + return (Rcpp::as(this->func.value()(args...))); + } else { + throw std::exception(); + } + } + + bool isValid() const { return this->func.has_value(); } + +private: + std::optional func; +}; + +#endif // RPOET_H_ diff --git a/src/SimParams.cpp b/src/Base/SimParams.cpp similarity index 56% rename from src/SimParams.cpp rename to src/Base/SimParams.cpp index c3ff54118..f711e796a 100644 --- a/src/SimParams.cpp +++ b/src/Base/SimParams.cpp @@ -1,8 +1,10 @@ +// Time-stamp: "Last modified 2023-08-15 12:12:36 delucia" + /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of ** Potsdam) ** -** Copyright (C) 2018-2022 Marco De Lucia, Max Luebke (GFZ Potsdam) +** Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam) ** ** POET is free software; you can redistribute it and/or modify it under the ** terms of the GNU General Public License as published by the Free Software @@ -18,15 +20,19 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/DHT_Types.hpp" +#include "SimParams.hpp" + +#include "../Chemistry/enums.hpp" + #include #include #include -#include #include #include - +#include +#include +#include #include #include #include @@ -56,14 +62,6 @@ poet::GridParams::s_GridParams(RInside &R) { (dim == 1 ? this->n_cells[0] : this->n_cells[0] * this->n_cells[1]); this->type = Rcpp::as(R.parseEval("mysetup$grid$type")); - this->init_df = - Rcpp::as(R.parseEval("mysetup$grid$init_cell")); - this->props = - Rcpp::as>(R.parseEval("mysetup$grid$props")); - this->input_script = - Rcpp::as(R.parseEval("mysetup$grid$input_script")); - this->database_path = - Rcpp::as(R.parseEval("mysetup$grid$database")); } poet::DiffusionParams::s_DiffusionParams(RInside &R) { @@ -82,21 +80,29 @@ poet::DiffusionParams::s_DiffusionParams(RInside &R) { Rcpp::as(R.parseEval("mysetup$diffusion$vecinj_index")); } -poet::ChemistryParams::s_ChemistryParams(RInside &R) { +void poet::ChemistryParams::initFromR(RInsidePOET &R) { this->database_path = Rcpp::as(R.parseEval("mysetup$chemistry$database")); this->input_script = Rcpp::as(R.parseEval("mysetup$chemistry$input_script")); - if (Rcpp::as( - R.parseEval("'dht_species' %in% names(mysetup$chemistry)"))) { - this->dht_species = Rcpp::as>( - R.parseEval("mysetup$chemistry$dht_species")); + + if (R.checkIfExists("dht_species", "mysetup$chemistry")) { + this->dht_signifs = Rcpp::as>( + R.parseEval(("mysetup$chemistry$dht_species"))); } - if (Rcpp::as( - R.parseEval("'dht_signif' %in% names(mysetup$chemistry)"))) { - this->dht_signif = Rcpp::as>( - R.parseEval("mysetup$chemistry$dht_signif")); + + if (R.checkIfExists("pht_species", "mysetup$chemistry")) { + this->pht_signifs = Rcpp::as>( + R.parseEval(("mysetup$chemistry$pht_species"))); } + this->hooks.dht_fill = + RHookFunction(R, "mysetup$chemistry$hooks$dht_fill"); + this->hooks.dht_fuzz = + RHookFunction>(R, "mysetup$chemistry$hooks$dht_fuzz"); + this->hooks.interp_pre = RHookFunction>( + R, "mysetup$chemistry$hooks$interp_pre_func"); + this->hooks.interp_post = + RHookFunction(R, "mysetup$chemistry$hooks$interp_post_func"); } SimParams::SimParams(int world_rank_, int world_size_) { @@ -104,25 +110,24 @@ SimParams::SimParams(int world_rank_, int world_size_) { this->simparams.world_size = world_size_; } -int SimParams::parseFromCmdl(char *argv[], RInside &R) { +int SimParams::parseFromCmdl(char *argv[], RInsidePOET &R) { // initialize argh object argh::parser cmdl(argv); // if user asked for help if (cmdl[{"help", "h"}]) { if (simparams.world_rank == 0) { - cout << "Todo" << endl - << "See README.md for further information." << endl; + MSG("Todo"); + MSG("See README.md for further information."); } return poet::PARSER_HELP; } // if positional arguments are missing else if (!cmdl(2)) { if (simparams.world_rank == 0) { - cerr << "ERROR. Kin needs 2 positional arguments: " << endl - << "1) the R script defining your simulation and" << endl - << "2) the directory prefix where to save results and profiling" - << endl; + ERRMSG("Kin needs 2 positional arguments: "); + ERRMSG("1) the R script defining your simulation and"); + ERRMSG("2) the directory prefix where to save results and profiling"); } return poet::PARSER_ERROR; } @@ -132,70 +137,86 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { std::list optionsError = validateOptions(cmdl); if (!optionsError.empty()) { if (simparams.world_rank == 0) { - cerr << "Unrecognized option(s):\n" << endl; + ERRMSG("Unrecognized option(s):\n"); for (auto option : optionsError) { - cerr << option << endl; + ERRMSG(std::string(option)); } - cerr << "\nMake sure to use available options. Exiting!" << endl; + ERRMSG("Make sure to use available options. Exiting!"); } return poet::PARSER_ERROR; } simparams.print_progressbar = cmdl[{"P", "progress"}]; - /*Parse DHT arguments*/ - simparams.dht_enabled = cmdl["dht"]; + // simparams.print_progressbar = cmdl[{"P", "progress"}]; + + /* Parse DHT arguments */ + chem_params.use_dht = cmdl["dht"]; + chem_params.use_interp = cmdl["interp"]; // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n'; - if (simparams.dht_enabled) { - cmdl("dht-strategy", 0) >> simparams.dht_strategy; - // cout << "CPP: DHT strategy is " << dht_strategy << endl; + cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> chem_params.dht_size; + // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << + // endl; - cmdl("dht-signif", 5) >> simparams.dht_significant_digits; - // cout << "CPP: DHT significant digits = " << dht_significant_digits << - // endl; + cmdl("dht-snaps", 0) >> chem_params.dht_snaps; - simparams.dht_log = !(cmdl["dht-nolog"]); - // cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" - // : "OFF" ) << endl; - - cmdl("dht-size", DHT_SIZE_PER_PROCESS_MB) >> simparams.dht_size_per_process; - // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << - // endl; - - cmdl("dht-snaps", 0) >> simparams.dht_snaps; - - cmdl("dht-file") >> dht_file; - } + cmdl("dht-file") >> chem_params.dht_file; /*Parse work package size*/ cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; + cmdl("interp-size", 100) >> chem_params.pht_size; + cmdl("interp-min", 5) >> chem_params.interp_min_entries; + cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; + + /*Parse output options*/ + simparams.store_result = !cmdl["ignore-result"]; + + /*Parse work package size*/ + cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size; + + chem_params.use_interp = cmdl["interp"]; + cmdl("interp-size", 100) >> chem_params.pht_size; + cmdl("interp-min", 5) >> chem_params.interp_min_entries; + cmdl("interp-bucket-entries", 20) >> chem_params.pht_max_entries; + /*Parse output options*/ simparams.store_result = !cmdl["ignore-result"]; if (simparams.world_rank == 0) { - cout << "CPP: Complete results storage is " - << (simparams.store_result ? "ON" : "OFF") << endl; - cout << "CPP: Work Package Size: " << simparams.wp_size << endl; - cout << "CPP: DHT is " << (simparams.dht_enabled ? "ON" : "OFF") << '\n'; + MSG("Complete results storage is " + BOOL_PRINT(simparams.store_result)); + MSG("Work Package Size: " + std::to_string(simparams.wp_size)); + MSG("DHT is " + BOOL_PRINT(chem_params.use_dht)); - if (simparams.dht_enabled) { - cout << "CPP: DHT strategy is " << simparams.dht_strategy << endl; - cout << "CPP: DHT key default digits (ignored if 'signif_vector' is " - "defined) = " - << simparams.dht_significant_digits << endl; - cout << "CPP: DHT logarithm before rounding: " - << (simparams.dht_log ? "ON" : "OFF") << endl; - cout << "CPP: DHT size per process (Byte) = " - << simparams.dht_size_per_process << endl; - cout << "CPP: DHT save snapshots is " << simparams.dht_snaps << endl; - cout << "CPP: DHT load file is " << dht_file << endl; + if (chem_params.use_dht) { + MSG("DHT strategy is " + std::to_string(simparams.dht_strategy)); + // MDL: these should be outdated (?) + // MSG("DHT key default digits (ignored if 'signif_vector' is " + // "defined) = " + // << simparams.dht_significant_digits); + // MSG("DHT logarithm before rounding: " + // << (simparams.dht_log ? "ON" : "OFF")); + MSG("DHT size per process (Megabyte) = " + + std::to_string(chem_params.dht_size)); + MSG("DHT save snapshots is " + BOOL_PRINT(chem_params.dht_snaps)); + MSG("DHT load file is " + chem_params.dht_file); + } + + if (chem_params.use_interp) { + MSG("PHT interpolation enabled: " + BOOL_PRINT(chem_params.use_interp)); + MSG("PHT interp-size = " + std::to_string(chem_params.pht_size)); + MSG("PHT interp-min = " + + std::to_string(chem_params.interp_min_entries)); + MSG("PHT interp-bucket-entries = " + + std::to_string(chem_params.pht_max_entries)); } } cmdl(1) >> filesim; cmdl(2) >> out_dir; + chem_params.dht_outdir = out_dir; + /* distribute information to R runtime */ // if local_rank == 0 then master else worker R["local_rank"] = simparams.world_rank; @@ -210,12 +231,15 @@ int SimParams::parseFromCmdl(char *argv[], RInside &R) { // work package size R["work_package_size"] = simparams.wp_size; // dht enabled? - R["dht_enabled"] = simparams.dht_enabled; + R["dht_enabled"] = chem_params.use_dht; // log before rounding? R["dht_log"] = simparams.dht_log; // eval the init string, ignoring any returns R.parseEvalQ("source(filesim)"); + R.parseEvalQ("mysetup <- setup"); + + this->chem_params.initFromR(R); return poet::PARSER_OK; } diff --git a/include/poet/SimParams.hpp b/src/Base/SimParams.hpp similarity index 80% rename from include/poet/SimParams.hpp rename to src/Base/SimParams.hpp index 8d670fe15..523b5b105 100644 --- a/include/poet/SimParams.hpp +++ b/src/Base/SimParams.hpp @@ -21,20 +21,27 @@ #ifndef PARSER_H #define PARSER_H -#include -#include -#include -#include +#include "../DataStructures/DataStructures.hpp" +#include "Macros.hpp" +#include "RInsidePOET.hpp" #include "argh.hpp" // Argument handler https://github.com/adishavit/argh + +#include +#include +#include +#include +#include +#include + #include #include // BSD-licenced /** Standard DHT Size. Defaults to 1 GB (1000 MB) */ -constexpr uint32_t DHT_SIZE_PER_PROCESS_MB = 1E3; +constexpr uint32_t DHT_SIZE_PER_PROCESS_MB = 1.5E3; /** Standard work package size */ -#define WORK_PACKAGE_SIZE_DEFAULT 5 +#define WORK_PACKAGE_SIZE_DEFAULT 32 namespace poet { @@ -70,6 +77,8 @@ typedef struct { /** indicating whether the progress bar during chemistry simulation should be * printed or not */ bool print_progressbar; + + bool interp_enabled; } t_simparams; using GridParams = struct s_GridParams { @@ -101,13 +110,31 @@ using DiffusionParams = struct s_DiffusionParams { s_DiffusionParams(RInside &R); }; -using ChemistryParams = struct s_ChemistryParams { +struct ChemistryParams { std::string database_path; std::string input_script; - std::vector dht_species; - std::vector dht_signif; - s_ChemistryParams(RInside &R); + bool use_dht; + std::uint64_t dht_size; + int dht_snaps; + std::string dht_file; + std::string dht_outdir; + NamedVector dht_signifs; + + bool use_interp; + std::uint64_t pht_size; + std::uint32_t pht_max_entries; + std::uint32_t interp_min_entries; + NamedVector pht_signifs; + + struct Chem_Hook_Functions { + RHookFunction dht_fill; + RHookFunction> dht_fuzz; + RHookFunction> interp_pre; + RHookFunction interp_post; + } hooks; + + void initFromR(RInsidePOET &R); }; /** @@ -159,7 +186,7 @@ public: * @return int Returns with 0 if no error occured, otherwise value less than 0 * is returned. */ - int parseFromCmdl(char *argv[], RInside &R); + int parseFromCmdl(char *argv[], RInsidePOET &R); /** * @brief Init std::vector values @@ -193,15 +220,10 @@ public: */ auto getDHTSignifVector() const { return this->dht_signif_vector; }; - /** - * @brief Return name of DHT snapshot. - * - * Name of the DHT file which is used to initialize the DHT with a previously - * written snapshot. - * - * @return std::string Absolute paht to the DHT snapshot - */ - auto getDHTFile() const { return this->dht_file; }; + auto getPHTSignifVector() const { return this->pht_signif_vector; }; + + auto getPHTBucketSize() const { return this->pht_bucket_size; }; + auto getPHTMinEntriesNeeded() const { return this->pht_min_entries_needed; }; /** * @brief Get the filesim name @@ -223,22 +245,31 @@ public: */ auto getOutDir() const { return this->out_dir; }; + const auto &getChemParams() const { return chem_params; } + private: std::list validateOptions(argh::parser cmdl); - const std::set flaglist{"ignore-result", "dht", "dht-nolog", "P", - "progress"}; - const std::set paramlist{"work-package-size", "dht-signif", - "dht-strategy", "dht-size", - "dht-snaps", "dht-file"}; + const std::set flaglist{"ignore-result", "dht", "P", "progress", + "interp"}; + const std::set paramlist{ + "work-package-size", "dht-strategy", + "dht-size", "dht-snaps", + "dht-file", "interp-size", + "interp-min", "interp-bucket-entries"}; t_simparams simparams; std::vector dht_signif_vector; + std::vector pht_signif_vector; + + uint32_t pht_bucket_size; + uint32_t pht_min_entries_needed; - std::string dht_file; std::string filesim; std::string out_dir; + + ChemistryParams chem_params; }; } // namespace poet #endif // PARSER_H diff --git a/include/poet/argh.hpp b/src/Base/argh.hpp similarity index 100% rename from include/poet/argh.hpp rename to src/Base/argh.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e273c28f8..1494c0849 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,16 +1,27 @@ -file(GLOB_RECURSE poet_lib_SRC - CONFIGURE_DEPENDS - "*.cpp" "*.c") +add_library(poetlib + Base/Grid.cpp + Base/SimParams.cpp + Chemistry/ChemistryModule.cpp + Chemistry/MasterFunctions.cpp + Chemistry/WorkerFunctions.cpp + Chemistry/SurrogateModels/DHT_Wrapper.cpp + Chemistry/SurrogateModels/DHT.c + Chemistry/SurrogateModels/HashFunctions.cpp + Chemistry/SurrogateModels/InterpolationModule.cpp + Chemistry/SurrogateModels/ProximityHashTable.cpp + DataStructures/Field.cpp + Transport/DiffusionModule.cpp +) -find_library(MATH_LIBRARY m) -find_library(CRYPTO_LIBRARY crypto) +target_link_libraries(poetlib PUBLIC + MPI::MPI_C + ${MATH_LIBRARY} + RRuntime + PhreeqcRM + tug +) -add_library(poet_lib ${poet_lib_SRC}) -target_include_directories(poet_lib PUBLIC ${PROJECT_SOURCE_DIR}/include) -target_link_libraries(poet_lib PUBLIC - MPI::MPI_CXX ${MATH_LIBRARY} RRuntime PhreeqcRM tug) - -target_compile_definitions(poet_lib PUBLIC STRICT_R_HEADERS OMPI_SKIP_MPICXX) +target_compile_definitions(poetlib PUBLIC STRICT_R_HEADERS OMPI_SKIP_MPICXX) mark_as_advanced(PHREEQCRM_BUILD_MPI PHREEQCRM_DISABLE_OPENMP) set(PHREEQCRM_DISABLE_OPENMP ON CACHE BOOL "" FORCE) @@ -18,5 +29,21 @@ set(PHREEQCRM_DISABLE_OPENMP ON CACHE BOOL "" FORCE) option(POET_DHT_DEBUG "Build with DHT debug info" OFF) if(POET_DHT_DEBUG) - target_compile_definitions(poet_lib PRIVATE DHT_STATISTICS) + target_compile_definitions(poetlib PRIVATE DHT_STATISTICS) endif() + +option(POET_PHT_ADDITIONAL_INFO "Enables additional information in the PHT" OFF) + +if (POET_PHT_ADDITIONAL_INFO) + target_compile_definitions(poetlib PRIVATE POET_PHT_ADD) +endif() + +file(READ "${PROJECT_SOURCE_DIR}/R_lib/kin_r_library.R" R_KIN_LIB ) + +configure_file(poet.hpp.in poet.hpp @ONLY) + +add_executable(poet poet.cpp) +target_link_libraries(poet PRIVATE poetlib MPI::MPI_C RRuntime) +target_include_directories(poet PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") + +install(TARGETS poet DESTINATION bin) diff --git a/src/Chemistry/ChemistryModule.cpp b/src/Chemistry/ChemistryModule.cpp new file mode 100644 index 000000000..46531f2c6 --- /dev/null +++ b/src/Chemistry/ChemistryModule.cpp @@ -0,0 +1,532 @@ +#include "ChemistryModule.hpp" + +#include "SurrogateModels/DHT_Wrapper.hpp" +#include "SurrogateModels/Interpolation.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr uint32_t MB_FACTOR = 1E6; + +std::vector +inverseDistanceWeighting(const std::vector &to_calc, + const std::vector &from, + const std::vector> &input, + const std::vector> &output) { + std::vector results = from; + + const std::uint32_t buffer_size = input.size() + 1; + double buffer[buffer_size]; + double from_rescaled; + + const std::uint32_t data_set_n = input.size(); + double rescaled[to_calc.size()][data_set_n + 1]; + double weights[data_set_n]; + + // rescaling over all key elements + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + const auto output_comp_i = to_calc[key_comp_i]; + + // rescale input between 0 and 1 + for (int point_i = 0; point_i < data_set_n; point_i++) { + rescaled[key_comp_i][point_i] = input[point_i][key_comp_i]; + } + + rescaled[key_comp_i][data_set_n] = from[output_comp_i]; + + const double min = *std::min_element(rescaled[key_comp_i], + rescaled[key_comp_i] + data_set_n + 1); + const double max = *std::max_element(rescaled[key_comp_i], + rescaled[key_comp_i] + data_set_n + 1); + + for (int point_i = 0; point_i < data_set_n; point_i++) { + rescaled[key_comp_i][point_i] = + ((max - min) != 0 + ? (rescaled[key_comp_i][point_i] - min) / (max - min) + : 0); + } + rescaled[key_comp_i][data_set_n] = + ((max - min) != 0 ? (from[output_comp_i] - min) / (max - min) : 0); + } + + // calculate distances for each data set + double inv_sum = 0; + for (int point_i = 0; point_i < data_set_n; point_i++) { + double distance = 0; + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + distance += std::pow( + rescaled[key_comp_i][point_i] - rescaled[key_comp_i][data_set_n], 2); + } + weights[point_i] = 1 / std::sqrt(distance); + assert(!std::isnan(weights[point_i])); + inv_sum += weights[point_i]; + } + + assert(!std::isnan(inv_sum)); + + // actual interpolation + // bool has_h = false; + // bool has_o = false; + + for (int key_comp_i = 0; key_comp_i < to_calc.size(); key_comp_i++) { + const auto output_comp_i = to_calc[key_comp_i]; + double key_delta = 0; + + // if (interp_i == 0) { + // has_h = true; + // } + + // if (interp_i == 1) { + // has_o = true; + // } + + for (int j = 0; j < data_set_n; j++) { + key_delta += weights[j] * output[j][output_comp_i]; + } + + key_delta /= inv_sum; + + results[output_comp_i] = from[output_comp_i] + key_delta; + } + + // if (!has_h) { + // double new_val = 0; + // for (int j = 0; j < data_set_n; j++) { + // new_val += weights[j] * output[j][0]; + // } + // results[0] = new_val / inv_sum; + // } + + // if (!has_h) { + // double new_val = 0; + // for (int j = 0; j < data_set_n; j++) { + // new_val += weights[j] * output[j][1]; + // } + // results[1] = new_val / inv_sum; + // } + + // for (std::uint32_t i = 0; i < to_calc.size(); i++) { + // const std::uint32_t interp_i = to_calc[i]; + + // // rescale input between 0 and 1 + // for (int j = 0; j < input.size(); j++) { + // buffer[j] = input[j].at(i); + // } + + // buffer[buffer_size - 1] = from[interp_i]; + + // const double min = *std::min_element(buffer, buffer + buffer_size); + // const double max = *std::max_element(buffer, buffer + buffer_size); + + // for (int j = 0; j < input.size(); j++) { + // buffer[j] = ((max - min) != 0 ? (buffer[j] - min) / (max - min) : 1); + // } + // from_rescaled = + // ((max - min) != 0 ? (from[interp_i] - min) / (max - min) : 0); + + // double inv_sum = 0; + + // // calculate distances for each point + // for (int i = 0; i < input.size(); i++) { + // const double distance = std::pow(buffer[i] - from_rescaled, 2); + + // buffer[i] = distance > 0 ? (1 / std::sqrt(distance)) : 0; + // inv_sum += buffer[i]; + // } + // // calculate new values + // double new_val = 0; + // for (int i = 0; i < output.size(); i++) { + // new_val += buffer[i] * output[i][interp_i]; + // } + // results[interp_i] = new_val / inv_sum; + // if (std::isnan(results[interp_i])) { + // std::cout << "nan with new_val = " << output[0][i] << std::endl; + // } + // } + + return results; +} + +poet::ChemistryModule::ChemistryModule(uint32_t nxyz, uint32_t wp_size, + std::uint32_t maxiter, + const ChemistryParams &chem_param, + MPI_Comm communicator) + : PhreeqcRM(nxyz, 1), group_comm(communicator), wp_size(wp_size), + params(chem_param) { + + MPI_Comm_size(communicator, &this->comm_size); + MPI_Comm_rank(communicator, &this->comm_rank); + + this->is_sequential = (this->comm_size == 1); + this->is_master = (this->comm_rank == 0); + + if (!is_sequential && is_master) { + MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, this->group_comm); + MPI_Bcast(&maxiter, 1, MPI_UINT32_T, 0, this->group_comm); + } + + this->file_pad = std::ceil(std::log10(maxiter + 1)); +} + +poet::ChemistryModule::~ChemistryModule() { + if (dht) { + delete dht; + } +} + +poet::ChemistryModule +poet::ChemistryModule::createWorker(MPI_Comm communicator, + const ChemistryParams &chem_param) { + uint32_t wp_size; + MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, communicator); + + std::uint32_t maxiter; + MPI_Bcast(&maxiter, 1, MPI_UINT32_T, 0, communicator); + + return ChemistryModule(wp_size, wp_size, maxiter, chem_param, communicator); +} + +void poet::ChemistryModule::RunInitFile(const std::string &input_script_path) { + if (this->is_master) { + int f_type = CHEM_INIT; + PropagateFunctionType(f_type); + + int count = input_script_path.size(); + ChemBCast(&count, 1, MPI_INT); + ChemBCast(const_cast(input_script_path.data()), count, MPI_CHAR); + } + + this->RunFile(true, true, false, input_script_path); + this->RunString(true, false, false, "DELETE; -all; PRINT; -warnings 0;"); + + this->FindComponents(); + + PhreeqcRM::initializePOET(this->speciesPerModule, this->prop_names); + this->prop_count = prop_names.size(); + + char exchange = (speciesPerModule[1] == 0 ? -1 : 1); + char kinetics = (speciesPerModule[2] == 0 ? -1 : 1); + char equilibrium = (speciesPerModule[3] == 0 ? -1 : 1); + char surface = (speciesPerModule[4] == 0 ? -1 : 1); + + std::vector ic1; + ic1.resize(this->nxyz * 7, -1); + // TODO: hardcoded reaction modules + for (int i = 0; i < nxyz; i++) { + ic1[i] = 1; // Solution 1 + ic1[nxyz + i] = equilibrium; // Equilibrium 1 + ic1[2 * nxyz + i] = exchange; // Exchange none + ic1[3 * nxyz + i] = surface; // Surface none + ic1[4 * nxyz + i] = -1; // Gas phase none + ic1[5 * nxyz + i] = -1; // Solid solutions none + ic1[6 * nxyz + i] = kinetics; // Kinetics 1 + } + + this->InitialPhreeqc2Module(ic1); +} + +void poet::ChemistryModule::initializeField(const Field &trans_field) { + + if (is_master) { + int f_type = CHEM_INIT_SPECIES; + PropagateFunctionType(f_type); + } + + std::vector essentials_backup{ + prop_names.begin() + speciesPerModule[0], prop_names.end()}; + + std::vector new_solution_names{ + this->prop_names.begin(), this->prop_names.begin() + speciesPerModule[0]}; + + if (is_master) { + for (auto &prop : trans_field.GetProps()) { + if (std::find(new_solution_names.begin(), new_solution_names.end(), + prop) == new_solution_names.end()) { + int size = prop.size(); + ChemBCast(&size, 1, MPI_INT); + ChemBCast(prop.data(), prop.size(), MPI_CHAR); + new_solution_names.push_back(prop); + } + } + int end = 0; + ChemBCast(&end, 1, MPI_INT); + } else { + constexpr int MAXSIZE = 128; + MPI_Status status; + int recv_size; + char recv_buffer[MAXSIZE]; + while (1) { + ChemBCast(&recv_size, 1, MPI_INT); + if (recv_size == 0) { + break; + } + ChemBCast(recv_buffer, recv_size, MPI_CHAR); + recv_buffer[recv_size] = '\0'; + new_solution_names.push_back(std::string(recv_buffer)); + } + } + + // now sort the new values + std::sort(new_solution_names.begin() + 3, new_solution_names.end()); + this->SetPOETSolutionNames(new_solution_names); + this->speciesPerModule[0] = new_solution_names.size(); + + // and append other processes than solutions + std::vector new_prop_names = new_solution_names; + new_prop_names.insert(new_prop_names.end(), essentials_backup.begin(), + essentials_backup.end()); + + std::vector old_prop_names{this->prop_names}; + this->prop_names = std::move(new_prop_names); + this->prop_count = prop_names.size(); + + if (is_master) { + this->n_cells = trans_field.GetRequestedVecSize(); + + std::vector> phreeqc_dump(this->nxyz); + this->getDumpedField(phreeqc_dump); + + if (is_sequential) { + std::vector init_vec; + for (std::size_t i = 0; i < n_cells; i++) { + init_vec.insert(init_vec.end(), phreeqc_dump[i].begin(), + phreeqc_dump[i].end()); + } + + const auto tmp_buffer{init_vec}; + this->unshuffleField(tmp_buffer, n_cells, prop_count, 1, init_vec); + this->chem_field = Field(n_cells, init_vec, prop_names); + return; + } + + std::vector &phreeqc_init = phreeqc_dump[0]; + std::vector> initial_values; + + for (const auto &vec : trans_field.As2DVector()) { + initial_values.push_back(vec); + } + + this->base_totals = {initial_values.at(0).at(0), + initial_values.at(1).at(0)}; + ChemBCast(base_totals.data(), 2, MPI_DOUBLE); + + for (int i = speciesPerModule[0]; i < phreeqc_init.size(); i++) { + std::vector init(n_cells, phreeqc_init[i]); + initial_values.push_back(std::move(init)); + } + + this->chem_field = Field(n_cells, initial_values, prop_names); + } else { + ChemBCast(base_totals.data(), 2, MPI_DOUBLE); + } + + if (this->params.use_dht || this->params.use_interp) { + initializeDHT(this->params.dht_size, this->params.dht_signifs); + setDHTSnapshots(this->params.dht_snaps, this->params.dht_outdir); + setDHTReadFile(this->params.dht_file); + + this->dht_enabled = this->params.use_dht; + + if (this->params.use_interp) { + initializeInterp(this->params.pht_max_entries, this->params.pht_size, + this->params.interp_min_entries, + this->params.pht_signifs); + this->interp_enabled = this->params.use_interp; + } + } +} + +void poet::ChemistryModule::initializeDHT( + uint32_t size_mb, const NamedVector &key_species) { + constexpr uint32_t MB_FACTOR = 1E6; + + this->dht_enabled = true; + + MPI_Comm dht_comm; + + if (this->is_master) { + MPI_Comm_split(this->group_comm, MPI_UNDEFINED, this->comm_rank, &dht_comm); + return; + } + + if (!this->is_master) { + + MPI_Comm_split(this->group_comm, 1, this->comm_rank, &dht_comm); + + auto map_copy = key_species; + + if (key_species.empty()) { + std::vector default_signif( + this->prop_names.size(), DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT); + map_copy = NamedVector(this->prop_names, default_signif); + } + + auto key_indices = parseDHTSpeciesVec(key_species, this->prop_names); + + if (this->dht) { + delete this->dht; + } + + const std::uint64_t dht_size = size_mb * MB_FACTOR; + + this->dht = new DHT_Wrapper(dht_comm, dht_size, map_copy, key_indices, + this->prop_names, params.hooks, + this->prop_count, params.use_interp); + this->dht->setBaseTotals(base_totals.at(0), base_totals.at(1)); + } +} + +inline std::vector poet::ChemistryModule::parseDHTSpeciesVec( + const NamedVector &key_species, + const std::vector &to_compare) const { + std::vector species_indices; + species_indices.reserve(key_species.size()); + + for (const auto &species : key_species.getNames()) { + auto it = std::find(to_compare.begin(), to_compare.end(), species); + if (it == to_compare.end()) { + species_indices.push_back(DHT_Wrapper::DHT_KEY_INPUT_CUSTOM); + continue; + } + const std::uint32_t index = it - to_compare.begin(); + species_indices.push_back(index); + } + + return species_indices; +} + +void poet::ChemistryModule::BCastStringVec(std::vector &io) { + + if (this->is_master) { + int vec_size = io.size(); + ChemBCast(&vec_size, 1, MPI_INT); + + for (const auto &value : io) { + int buf_size = value.size() + 1; + ChemBCast(&buf_size, 1, MPI_INT); + ChemBCast(const_cast(value.c_str()), buf_size, MPI_CHAR); + } + } else { + int vec_size; + ChemBCast(&vec_size, 1, MPI_INT); + + io.resize(vec_size); + + for (int i = 0; i < vec_size; i++) { + int buf_size; + ChemBCast(&buf_size, 1, MPI_INT); + char buf[buf_size]; + ChemBCast(buf, buf_size, MPI_CHAR); + io[i] = std::string{buf}; + } + } +} + +void poet::ChemistryModule::setDHTSnapshots(int type, + const std::string &out_dir) { + if (this->is_master) { + return; + } + + this->dht_file_out_dir = out_dir; + this->dht_snaps_type = type; +} + +void poet::ChemistryModule::setDHTReadFile(const std::string &input_file) { + if (this->is_master) { + return; + } + + if (!input_file.empty()) { + WorkerReadDHTDump(input_file); + } +} + +void poet::ChemistryModule::initializeInterp( + std::uint32_t bucket_size, std::uint32_t size_mb, std::uint32_t min_entries, + const NamedVector &key_species) { + + if (!this->is_master) { + + constexpr uint32_t MB_FACTOR = 1E6; + + assert(this->dht); + + this->interp_enabled = true; + + auto map_copy = key_species; + + if (key_species.empty()) { + map_copy = this->dht->getKeySpecies(); + for (std::size_t i = 0; i < map_copy.size(); i++) { + const std::uint32_t signif = + map_copy[i] - (map_copy[i] > InterpolationModule::COARSE_DIFF + ? InterpolationModule::COARSE_DIFF + : 0); + map_copy[i] = signif; + } + } + + auto key_indices = + parseDHTSpeciesVec(map_copy, dht->getKeySpecies().getNames()); + + if (this->interp) { + this->interp.reset(); + } + + const uint64_t pht_size = size_mb * MB_FACTOR; + + interp = std::make_unique( + bucket_size, pht_size, min_entries, *(this->dht), map_copy, key_indices, + this->prop_names, this->params.hooks); + + interp->setInterpolationFunction(inverseDistanceWeighting); + } +} + +std::vector +poet::ChemistryModule::shuffleField(const std::vector &in_field, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count) { + std::vector out_buffer(in_field.size()); + uint32_t write_i = 0; + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_buffer[(write_i * prop_count) + k] = + in_field[(k * size_per_prop) + j]; + } + write_i++; + } + } + return out_buffer; +} + +void poet::ChemistryModule::unshuffleField(const std::vector &in_buffer, + uint32_t size_per_prop, + uint32_t prop_count, + uint32_t wp_count, + std::vector &out_field) { + uint32_t read_i = 0; + + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_field[(k * size_per_prop) + j] = + in_buffer[(read_i * prop_count) + k]; + } + read_i++; + } + } +} diff --git a/include/poet/ChemistryModule.hpp b/src/Chemistry/ChemistryModule.hpp similarity index 78% rename from include/poet/ChemistryModule.hpp rename to src/Chemistry/ChemistryModule.hpp index fbb932d0f..d343ffa31 100644 --- a/include/poet/ChemistryModule.hpp +++ b/src/Chemistry/ChemistryModule.hpp @@ -1,20 +1,22 @@ -// Time-stamp: "Last modified 2023-07-21 17:20:10 mluebke" #ifndef CHEMISTRYMODULE_H_ #define CHEMISTRYMODULE_H_ -#include "Field.hpp" -#include "IrmResult.h" -#include "PhreeqcRM.h" -#include "poet/DHT_Wrapper.hpp" +#include "../Base/SimParams.hpp" +#include "../DataStructures/DataStructures.hpp" +#include "SurrogateModels/DHT_Wrapper.hpp" +#include "SurrogateModels/Interpolation.hpp" + +#include +#include #include #include #include #include +#include #include #include -#include #include namespace poet { @@ -39,7 +41,8 @@ public: * \param wp_size Count of grid cells to fill each work package at maximum. * \param communicator MPI communicator to distribute work in. */ - ChemistryModule(uint32_t nxyz, uint32_t wp_size, MPI_Comm communicator); + ChemistryModule(uint32_t nxyz, uint32_t wp_size, std::uint32_t maxiter, + const ChemistryParams &chem_param, MPI_Comm communicator); /** * Deconstructor, which frees DHT data structure if used. @@ -86,7 +89,8 @@ public: * * \returns A worker instance with fixed work package size. */ - static ChemistryModule createWorker(MPI_Comm communicator); + static ChemistryModule createWorker(MPI_Comm communicator, + const ChemistryParams &chem_param); /** * Default work package size. @@ -109,9 +113,9 @@ public: * Enumerating DHT file options */ enum { - DHT_FILES_DISABLED = 0, //!< disabled file output - DHT_FILES_SIMEND, //!< only output of snapshot after simulation - DHT_FILES_ITEREND //!< output snapshots after each iteration + DHT_SNAPS_DISABLED = 0, //!< disabled file output + DHT_SNAPS_SIMEND, //!< only output of snapshot after simulation + DHT_SNAPS_ITEREND //!< output snapshots after each iteration }; /** @@ -135,41 +139,6 @@ public: */ void MasterLoopBreak(); - /** - * **Master only** Enables DHT usage for the workers. - * - * If called multiple times enabling the DHT, the current state of DHT will be - * overwritten with a new instance of the DHT. - * - * \param enable Enables or disables the usage of the DHT. - * \param size_mb Size in megabyte to allocate for the DHT if enabled. - * \param key_species Name of species to be used for key creation. - */ - void SetDHTEnabled(bool enable, uint32_t size_mb, - const std::vector &key_species); - /** - * **Master only** Set DHT snapshots to specific mode. - * - * \param type DHT snapshot mode. - * \param out_dir Path to output DHT snapshots. - */ - void SetDHTSnaps(int type, const std::string &out_dir); - /** - * **Master only** Set the vector with significant digits to round before - * inserting into DHT. - * - * \param signif_vec Vector defining significant digit for each species. Order - * is defined by prop_type vector (ChemistryModule::GetPropNames). - */ - void SetDHTSignifVector(std::vector &signif_vec); - - /** - * **Master only** Load the state of the DHT from given file. - * - * \param input_file File to load data from. - */ - void ReadDHTFile(const std::string &input_file); - /** * **Master only** Return count of grid cells. */ @@ -231,13 +200,6 @@ public: */ std::vector GetWorkerDHTHits() const; - /** - * **Master only** Collect and return DHT misses of all workers. - * - * \return Vector of all count of DHT misses. - */ - std::vector GetWorkerDHTMiss() const; - /** * **Master only** Collect and return DHT evictions of all workers. * @@ -257,9 +219,29 @@ public: * * \param enabled True if print progressbar, false if not. */ - void setProgressBarPrintout(bool enabled); + void setProgressBarPrintout(bool enabled) { + this->print_progessbar = enabled; + }; + + std::vector GetWorkerInterpolationCalls() const; + + std::vector GetWorkerInterpolationWriteTimings() const; + std::vector GetWorkerInterpolationReadTimings() const; + std::vector GetWorkerInterpolationGatherTimings() const; + std::vector GetWorkerInterpolationFunctionCallTimings() const; + + std::vector GetWorkerPHTCacheHits() const; protected: + void initializeDHT(uint32_t size_mb, + const NamedVector &key_species); + void setDHTSnapshots(int type, const std::string &out_dir); + void setDHTReadFile(const std::string &input_file); + + void initializeInterp(std::uint32_t bucket_size, std::uint32_t size_mb, + std::uint32_t min_entries, + const NamedVector &key_species); + enum { CHEM_INIT, CHEM_WP_SIZE, @@ -268,9 +250,11 @@ protected: CHEM_DHT_SIGNIF_VEC, CHEM_DHT_SNAPS, CHEM_DHT_READ_FILE, + CHEM_IP_ENABLE, + CHEM_IP_MIN_ENTRIES, + CHEM_IP_SIGNIF_VEC, CHEM_WORK_LOOP, CHEM_PERF, - CHEM_PROGRESSBAR, CHEM_BREAK_MAIN_LOOP }; @@ -281,11 +265,20 @@ protected: WORKER_DHT_GET, WORKER_DHT_FILL, WORKER_IDLE, + WORKER_IP_WRITE, + WORKER_IP_READ, + WORKER_IP_GATHER, + WORKER_IP_FC, WORKER_DHT_HITS, - WORKER_DHT_MISS, - WORKER_DHT_EVICTIONS + WORKER_DHT_EVICTIONS, + WORKER_PHT_CACHE_HITS, + WORKER_IP_CALLS }; + std::vector interp_calls; + std::vector dht_hits; + std::vector dht_evictions; + struct worker_s { double phreeqc_t = 0.; double dht_get = 0.; @@ -327,9 +320,8 @@ protected: void WorkerPerfToMaster(int type, const struct worker_s &timings); void WorkerMetricsToMaster(int type); - IRM_RESULT WorkerRunWorkPackage(std::vector &vecWP, - std::vector &vecMapping, - double dSimTime, double dTimestep); + IRM_RESULT WorkerRunWorkPackage(WorkPackage &work_package, double dSimTime, + double dTimestep); std::vector CalculateWPSizesVector(uint32_t n_cells, uint32_t wp_size) const; @@ -340,8 +332,11 @@ protected: void unshuffleField(const std::vector &in_buffer, uint32_t size_per_prop, uint32_t prop_count, uint32_t wp_count, std::vector &out_field); - std::vector - parseDHTSpeciesVec(const std::vector &species_vec) const; + std::vector + parseDHTSpeciesVec(const NamedVector &key_species, + const std::vector &to_compare) const; + + void BCastStringVec(std::vector &io); int comm_size, comm_rank; MPI_Comm group_comm; @@ -351,11 +346,14 @@ protected: uint32_t wp_size{CHEM_DEFAULT_WP_SIZE}; bool dht_enabled{false}; - int dht_snaps_type{DHT_FILES_DISABLED}; + int dht_snaps_type{DHT_SNAPS_DISABLED}; std::string dht_file_out_dir; poet::DHT_Wrapper *dht = nullptr; + bool interp_enabled{false}; + std::unique_ptr interp; + static constexpr uint32_t BUFFER_OFFSET = 5; inline void ChemBCast(void *buf, int count, MPI_Datatype datatype) const { @@ -374,16 +372,20 @@ protected: bool print_progessbar{false}; - double chem_t = 0.; + std::uint32_t file_pad; + + double chem_t{0.}; uint32_t n_cells = 0; uint32_t prop_count = 0; std::vector prop_names; - Field chem_field{0}; + Field chem_field; static constexpr int MODULE_COUNT = 5; + const ChemistryParams ¶ms; + std::array speciesPerModule{}; }; } // namespace poet diff --git a/src/ChemistryModule/MasterFunctions.cpp b/src/Chemistry/MasterFunctions.cpp similarity index 68% rename from src/ChemistryModule/MasterFunctions.cpp rename to src/Chemistry/MasterFunctions.cpp index 316b64cab..543cd21a8 100644 --- a/src/ChemistryModule/MasterFunctions.cpp +++ b/src/Chemistry/MasterFunctions.cpp @@ -1,7 +1,9 @@ -#include "PhreeqcRM.h" -#include "poet/ChemistryModule.hpp" +#include "ChemistryModule.hpp" + +#include #include +#include #include #include #include @@ -62,19 +64,136 @@ std::vector poet::ChemistryModule::GetWorkerIdleTimings() const { std::vector poet::ChemistryModule::GetWorkerDHTHits() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_HITS); -} - -std::vector poet::ChemistryModule::GetWorkerDHTMiss() const { - int type = CHEM_PERF; + type = WORKER_DHT_HITS; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_MISS); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_DHT_HITS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_DHT_HITS, + this->group_comm, NULL); + + return ret; } std::vector poet::ChemistryModule::GetWorkerDHTEvictions() const { int type = CHEM_PERF; MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); - return MasterGatherWorkerMetrics(WORKER_DHT_EVICTIONS); + type = WORKER_DHT_EVICTIONS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_DHT_EVICTIONS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, + WORKER_DHT_EVICTIONS, this->group_comm, NULL); + + return ret; +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationWriteTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_WRITE); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationReadTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_READ); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationGatherTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_GATHER); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationFunctionCallTimings() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + return MasterGatherWorkerTimings(WORKER_IP_FC); +} + +std::vector +poet::ChemistryModule::GetWorkerInterpolationCalls() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + type = WORKER_IP_CALLS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, WORKER_IP_CALLS, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, WORKER_IP_CALLS, + this->group_comm, NULL); + + return ret; +} + +std::vector poet::ChemistryModule::GetWorkerPHTCacheHits() const { + int type = CHEM_PERF; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + type = WORKER_PHT_CACHE_HITS; + MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm); + + MPI_Status probe; + MPI_Probe(MPI_ANY_SOURCE, type, this->group_comm, &probe); + int count; + MPI_Get_count(&probe, MPI_UINT32_T, &count); + + std::vector ret(count); + MPI_Recv(ret.data(), count, MPI_UINT32_T, probe.MPI_SOURCE, type, + this->group_comm, NULL); + + return ret; +} + +inline std::vector shuffleField(const std::vector &in_field, + uint32_t size_per_prop, + uint32_t prop_count, + uint32_t wp_count) { + std::vector out_buffer(in_field.size()); + uint32_t write_i = 0; + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_buffer[(write_i * prop_count) + k] = + in_field[(k * size_per_prop) + j]; + } + write_i++; + } + } + return out_buffer; +} + +inline void unshuffleField(const std::vector &in_buffer, + uint32_t size_per_prop, uint32_t prop_count, + uint32_t wp_count, std::vector &out_field) { + uint32_t read_i = 0; + + for (uint32_t i = 0; i < wp_count; i++) { + for (uint32_t j = i; j < size_per_prop; j += wp_count) { + for (uint32_t k = 0; k < prop_count; k++) { + out_field[(k * size_per_prop) + j] = + in_buffer[(read_i * prop_count) + k]; + } + read_i++; + } + } } inline void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70) { @@ -190,28 +309,45 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list, } void poet::ChemistryModule::RunCells() { + double start_t{MPI_Wtime()}; if (this->is_sequential) { MasterRunSequential(); return; } MasterRunParallel(); + double end_t{MPI_Wtime()}; + this->chem_t += end_t - start_t; } void poet::ChemistryModule::MasterRunSequential() { std::vector shuffled_field = shuffleField(chem_field.AsVector(), n_cells, prop_count, 1); - this->setDumpedField(shuffled_field); + + std::vector> input; + for (std::size_t i = 0; i < n_cells; i++) { + input.push_back( + std::vector(shuffled_field.begin() + (i * prop_count), + shuffled_field.begin() + ((i + 1) * prop_count))); + } + + this->setDumpedField(input); PhreeqcRM::RunCells(); - this->getDumpedField(shuffled_field); + this->getDumpedField(input); + + shuffled_field.clear(); + for (std::size_t i = 0; i < n_cells; i++) { + shuffled_field.insert(shuffled_field.end(), input[i].begin(), + input[i].end()); + } + std::vector out_vec{shuffled_field}; unshuffleField(shuffled_field, n_cells, prop_count, 1, out_vec); - chem_field.SetFromVector(out_vec); + chem_field = out_vec; } void poet::ChemistryModule::MasterRunParallel() { /* declare most of the needed variables here */ - double chem_a, chem_b; double seq_a, seq_b, seq_c, seq_d; double worker_chemistry_a, worker_chemistry_b; double sim_e_chemistry, sim_f_chemistry; @@ -227,9 +363,6 @@ void poet::ChemistryModule::MasterRunParallel() { double dt = this->PhreeqcRM::GetTimeStep(); static uint32_t iteration = 0; - /* start time measurement of whole chemistry simulation */ - chem_a = MPI_Wtime(); - /* start time measurement of sequential part */ seq_a = MPI_Wtime(); @@ -291,7 +424,7 @@ void poet::ChemistryModule::MasterRunParallel() { std::vector out_vec{mpi_buffer}; unshuffleField(mpi_buffer, this->n_cells, this->prop_count, wp_sizes_vector.size(), out_vec); - chem_field.SetFromVector(out_vec); + chem_field = out_vec; /* do master stuff */ @@ -308,8 +441,6 @@ void poet::ChemistryModule::MasterRunParallel() { for (int i = 1; i < this->comm_size; i++) { MPI_Send(NULL, 0, MPI_DOUBLE, i, LOOP_END, this->group_comm); } - chem_b = MPI_Wtime(); - this->chem_t += chem_b - chem_a; this->simtime += dt; iteration++; @@ -324,7 +455,8 @@ std::vector poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, uint32_t wp_size) const { bool mod_pkgs = (n_cells % wp_size) != 0; - uint32_t n_packages = (uint32_t)(n_cells / wp_size) + mod_pkgs; + uint32_t n_packages = + (uint32_t)(n_cells / wp_size) + static_cast(mod_pkgs); std::vector wp_sizes_vector(n_packages, 0); @@ -334,12 +466,3 @@ poet::ChemistryModule::CalculateWPSizesVector(uint32_t n_cells, return wp_sizes_vector; } - -void poet::ChemistryModule::setProgressBarPrintout(bool enabled) { - if (is_master) { - int type = CHEM_PROGRESSBAR; - ChemBCast(&type, 1, MPI_INT); - ChemBCast(&enabled, 1, MPI_CXX_BOOL); - } - this->print_progessbar = enabled; -} diff --git a/src/ChemistryModule/DHT.c b/src/Chemistry/SurrogateModels/DHT.c similarity index 68% rename from src/ChemistryModule/DHT.c rename to src/Chemistry/SurrogateModels/DHT.c index 485f50e18..b0d5b3caa 100644 --- a/src/ChemistryModule/DHT.c +++ b/src/Chemistry/SurrogateModels/DHT.c @@ -1,3 +1,4 @@ +/// Time-stamp: "Last modified 2023-08-10 11:50:46 mluebke" /* ** Copyright (C) 2017-2021 Max Luebke (University of Potsdam) ** @@ -15,10 +16,13 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include +#include "DHT.h" + +#include #include #include +#include #include #include #include @@ -32,8 +36,8 @@ static void determine_dest(uint64_t hash, int comm_size, /** how many bytes do we need for one index? */ int index_size = sizeof(double) - (index_count - 1); for (int i = 0; i < index_count; i++) { - tmp_index = 0; - memcpy(&tmp_index, (char *)&hash + i, index_size); + tmp_index = (hash >> (i * 8)) & ((1ULL << (index_size * 8)) - 1); + /* memcpy(&tmp_index, (char *)&hash + i, index_size); */ index[i] = (uint64_t)(tmp_index % table_size); } *dest_rank = (unsigned int)(hash % comm_size); @@ -52,7 +56,8 @@ static int read_flag(char flag_byte) { } DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, - unsigned int key_size, uint64_t (*hash_func)(int, const void *)) { + unsigned int key_size, + uint64_t (*hash_func)(int, const void *)) { DHT *object; MPI_Win window; void *mem_alloc; @@ -61,17 +66,20 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, // calculate how much bytes for the index are needed to address count of // buckets per process index_bytes = (int)ceil(log2(size)); - if (index_bytes % 8 != 0) index_bytes = index_bytes + (8 - (index_bytes % 8)); + if (index_bytes % 8 != 0) + index_bytes = index_bytes + (8 - (index_bytes % 8)); // allocate memory for dht-object object = (DHT *)malloc(sizeof(DHT)); - if (object == NULL) return NULL; + if (object == NULL) + return NULL; // every memory allocation has 1 additional byte for flags etc. if (MPI_Alloc_mem(size * (1 + data_size + key_size), MPI_INFO_NULL, &mem_alloc) != 0) return NULL; - if (MPI_Comm_size(comm, &comm_size) != 0) return NULL; + if (MPI_Comm_size(comm, &comm_size) != 0) + return NULL; // since MPI_Alloc_mem doesn't provide memory allocation with the memory set // to zero, we're doing this here @@ -104,7 +112,8 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, DHT_stats *stats; stats = (DHT_stats *)malloc(sizeof(DHT_stats)); - if (stats == NULL) return NULL; + if (stats == NULL) + return NULL; object->stats = stats; object->stats->writes_local = (int *)calloc(comm_size, sizeof(int)); @@ -118,7 +127,106 @@ DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, return object; } -int DHT_write(DHT *table, void *send_key, void *send_data) { +void DHT_set_accumulate_callback(DHT *table, + int (*callback_func)(int, void *, int, + void *)) { + table->accumulate_callback = callback_func; +} + +int DHT_write_accumulate(DHT *table, const void *send_key, int data_size, + void *send_data, uint32_t *proc, uint32_t *index, + int *callback_ret) { + unsigned int dest_rank, i; + int result = DHT_SUCCESS; + +#ifdef DHT_STATISTICS + table->stats->w_access++; +#endif + + // determine destination rank and index by hash of key + determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, + table->table_size, &dest_rank, table->index, + table->index_count); + + // concatenating key with data to write entry to DHT + set_flag((char *)table->send_entry); + memcpy((char *)table->send_entry + 1, (char *)send_key, table->key_size); + /* memcpy((char *)table->send_entry + table->key_size + 1, (char *)send_data, + */ + /* table->data_size); */ + + // locking window of target rank with exclusive lock + if (MPI_Win_lock(MPI_LOCK_EXCLUSIVE, dest_rank, 0, table->window) != 0) + return DHT_MPI_ERROR; + for (i = 0; i < table->index_count; i++) { + if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, + MPI_BYTE, dest_rank, table->index[i], + 1 + table->data_size + table->key_size, MPI_BYTE, + table->window) != 0) + return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + // increment eviction counter if receiving key doesn't match sending key + // entry has write flag and last index is reached. + if (read_flag(*(char *)table->recv_entry)) { + if (memcmp(send_key, (char *)table->recv_entry + 1, table->key_size) != + 0) { + if (i == (table->index_count) - 1) { + table->evictions += 1; +#ifdef DHT_STATISTICS + table->stats->evictions += 1; +#endif + result = DHT_WRITE_SUCCESS_WITH_EVICTION; + break; + } + } else + break; + } else { +#ifdef DHT_STATISTICS + table->stats->writes_local[dest_rank]++; +#endif + break; + } + } + + if (result == DHT_WRITE_SUCCESS_WITH_EVICTION) { + memset((char *)table->send_entry + 1 + table->key_size, '\0', + table->data_size); + } else { + memcpy((char *)table->send_entry + 1 + table->key_size, + (char *)table->recv_entry + 1 + table->key_size, table->data_size); + } + + *callback_ret = table->accumulate_callback( + data_size, (char *)send_data, table->data_size, + (char *)table->send_entry + 1 + table->key_size); + + // put data to DHT (with last selected index by value i) + if (*callback_ret == 0) { + if (MPI_Put(table->send_entry, 1 + table->data_size + table->key_size, + MPI_BYTE, dest_rank, table->index[i], + 1 + table->data_size + table->key_size, MPI_BYTE, + table->window) != 0) + return DHT_MPI_ERROR; + } + // unlock window of target rank + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + if (proc) { + *proc = dest_rank; + } + + if (index) { + *index = table->index[i]; + } + + return result; +} + +int DHT_write(DHT *table, void *send_key, void *send_data, uint32_t *proc, + uint32_t *index) { unsigned int dest_rank, i; int result = DHT_SUCCESS; @@ -146,7 +254,8 @@ int DHT_write(DHT *table, void *send_key, void *send_data) { 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR; - if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // increment eviction counter if receiving key doesn't match sending key // entry has write flag and last index is reached. @@ -178,12 +287,21 @@ int DHT_write(DHT *table, void *send_key, void *send_data) { table->window) != 0) return DHT_MPI_ERROR; // unlock window of target rank - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; + + if (proc) { + *proc = dest_rank; + } + + if (index) { + *index = table->index[i]; + } return result; } -int DHT_read(DHT *table, void *send_key, void *destination) { +int DHT_read(DHT *table, const void *send_key, void *destination) { unsigned int dest_rank, i; #ifdef DHT_STATISTICS @@ -205,7 +323,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR; - if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_flush(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // increment read error counter if write flag isn't set ... if ((read_flag(*(char *)table->recv_entry)) == 0) { @@ -214,7 +333,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { table->stats->read_misses += 1; #endif // unlock window and return - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; return DHT_READ_MISS; } @@ -227,7 +347,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { table->stats->read_misses += 1; #endif // unlock window an return - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; return DHT_READ_MISS; } } else @@ -235,7 +356,8 @@ int DHT_read(DHT *table, void *send_key, void *destination) { } // unlock window of target rank - if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR; + if (MPI_Win_unlock(dest_rank, table->window) != 0) + return DHT_MPI_ERROR; // if matching key was found copy data into memory of passed pointer memcpy((char *)destination, (char *)table->recv_entry + table->key_size + 1, @@ -244,6 +366,34 @@ int DHT_read(DHT *table, void *send_key, void *destination) { return DHT_SUCCESS; } +int DHT_read_location(DHT *table, uint32_t proc, uint32_t index, + void *destination) { + const uint32_t bucket_size = table->data_size + table->key_size + 1; + +#ifdef DHT_STATISTICS + table->stats->r_access++; +#endif + + // locking window of target rank with shared lock + if (MPI_Win_lock(MPI_LOCK_SHARED, proc, 0, table->window) != 0) + return DHT_MPI_ERROR; + // receive data + if (MPI_Get(table->recv_entry, bucket_size, MPI_BYTE, proc, index, + bucket_size, MPI_BYTE, table->window) != 0) { + return DHT_MPI_ERROR; + } + + // unlock window of target rank + if (MPI_Win_unlock(proc, table->window) != 0) + return DHT_MPI_ERROR; + + // if matching key was found copy data into memory of passed pointer + memcpy((char *)destination, (char *)table->recv_entry + 1 + table->key_size, + table->data_size); + + return DHT_SUCCESS; +} + int DHT_to_file(DHT *table, const char *filename) { // open file MPI_File file; @@ -257,17 +407,15 @@ int DHT_to_file(DHT *table, const char *filename) { // write header (key_size and data_size) if (rank == 0) { - if (MPI_File_write(file, &table->key_size, 1, MPI_INT, MPI_STATUS_IGNORE) != - 0) + if (MPI_File_write_shared(file, &table->key_size, 1, MPI_INT, + MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR; - if (MPI_File_write(file, &table->data_size, 1, MPI_INT, - MPI_STATUS_IGNORE) != 0) + if (MPI_File_write_shared(file, &table->data_size, 1, MPI_INT, + MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR; } - // seek file pointer behind header for all processes - if (MPI_File_seek_shared(file, DHT_FILEHEADER_SIZE, MPI_SEEK_SET) != 0) - return DHT_FILE_IO_ERROR; + MPI_Barrier(table->communicator); char *ptr; int bucket_size = table->key_size + table->data_size + 1; @@ -283,8 +431,12 @@ int DHT_to_file(DHT *table, const char *filename) { return DHT_FILE_WRITE_ERROR; } } + + MPI_Barrier(table->communicator); + // close file - if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_close(&file) != 0) + return DHT_FILE_IO_ERROR; return DHT_SUCCESS; } @@ -303,7 +455,8 @@ int DHT_from_file(DHT *table, const char *filename) { return DHT_FILE_IO_ERROR; // get file size - if (MPI_File_get_size(file, &f_size) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_get_size(file, &f_size) != 0) + return DHT_FILE_IO_ERROR; MPI_Comm_rank(table->communicator, &rank); @@ -322,8 +475,10 @@ int DHT_from_file(DHT *table, const char *filename) { return DHT_FILE_READ_ERROR; // compare if written header data and key size matches current sizes - if (*(int *)buffer != table->key_size) return DHT_WRONG_FILE; - if (*(int *)(buffer + 4) != table->data_size) return DHT_WRONG_FILE; + if (*(int *)buffer != table->key_size) + return DHT_WRONG_FILE; + if (*(int *)(buffer + 4) != table->data_size) + return DHT_WRONG_FILE; // set offset for each process offset = bucket_size * table->comm_size; @@ -348,14 +503,16 @@ int DHT_from_file(DHT *table, const char *filename) { // extract key and data and write to DHT key = buffer; data = (buffer + table->key_size); - if (DHT_write(table, key, data) == DHT_MPI_ERROR) return DHT_MPI_ERROR; + if (DHT_write(table, key, data, NULL, NULL) == DHT_MPI_ERROR) + return DHT_MPI_ERROR; // increment current position cur_pos += offset; } free(buffer); - if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR; + if (MPI_File_close(&file) != 0) + return DHT_FILE_IO_ERROR; return DHT_SUCCESS; } @@ -377,8 +534,10 @@ int DHT_free(DHT *table, int *eviction_counter, int *readerror_counter) { return DHT_MPI_ERROR; *readerror_counter = buf; } - if (MPI_Win_free(&(table->window)) != 0) return DHT_MPI_ERROR; - if (MPI_Free_mem(table->mem_alloc) != 0) return DHT_MPI_ERROR; + if (MPI_Win_free(&(table->window)) != 0) + return DHT_MPI_ERROR; + if (MPI_Free_mem(table->mem_alloc) != 0) + return DHT_MPI_ERROR; free(table->recv_entry); free(table->send_entry); free(table->index); @@ -407,7 +566,8 @@ int DHT_print_statistics(DHT *table) { #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" // obtaining all values from all processes in the communicator - if (rank == 0) read_misses = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + read_misses = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->read_misses, 1, MPI_INT, read_misses, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -416,7 +576,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->read_misses = 0; - if (rank == 0) evictions = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + evictions = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->evictions, 1, MPI_INT, evictions, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -425,7 +586,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->evictions = 0; - if (rank == 0) w_access = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + w_access = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->w_access, 1, MPI_INT, w_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -434,7 +596,8 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->w_access = 0; - if (rank == 0) r_access = (int *)malloc(table->comm_size * sizeof(int)); + if (rank == 0) + r_access = (int *)malloc(table->comm_size * sizeof(int)); if (MPI_Gather(&table->stats->r_access, 1, MPI_INT, r_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR; @@ -443,13 +606,14 @@ int DHT_print_statistics(DHT *table) { return DHT_MPI_ERROR; table->stats->r_access = 0; - if (rank == 0) written_buckets = (int *)calloc(table->comm_size, sizeof(int)); + if (rank == 0) + written_buckets = (int *)calloc(table->comm_size, sizeof(int)); if (MPI_Reduce(table->stats->writes_local, written_buckets, table->comm_size, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR; - if (rank == 0) { // only process with rank 0 will print out results as a - // table + if (rank == 0) { // only process with rank 0 will print out results as a + // table int sum_written_buckets = 0; for (int i = 0; i < table->comm_size; i++) { diff --git a/include/poet/DHT.h b/src/Chemistry/SurrogateModels/DHT.h similarity index 88% rename from include/poet/DHT.h rename to src/Chemistry/SurrogateModels/DHT.h index e8f8399f8..ba0022bff 100644 --- a/include/poet/DHT.h +++ b/src/Chemistry/SurrogateModels/DHT.h @@ -67,7 +67,7 @@ */ typedef struct { /** Count of writes to specific process this process did. */ - int* writes_local; + int *writes_local; /** Writes after last call of DHT_print_statistics. */ int old_writes; /** How many read misses occur? */ @@ -100,27 +100,36 @@ typedef struct { /** Size of the MPI communicator respectively all participating processes. */ int comm_size; /** Pointer to a hashfunction. */ - uint64_t (*hash_func)(int, const void*); + uint64_t (*hash_func)(int, const void *); /** Pre-allocated memory where a bucket can be received. */ - void* recv_entry; + void *recv_entry; /** Pre-allocated memory where a bucket to send can be stored. */ - void* send_entry; + void *send_entry; /** Allocated memory on which the MPI window was created. */ - void* mem_alloc; + void *mem_alloc; /** Count of read misses over all time. */ int read_misses; /** Count of evictions over all time. */ int evictions; /** Array of indeces where a bucket can be stored. */ - uint64_t* index; + uint64_t *index; /** Count of possible indeces. */ unsigned int index_count; + + int (*accumulate_callback)(int, void *, int, void *); #ifdef DHT_STATISTICS /** Detailed statistics of the usage of the DHT. */ - DHT_stats* stats; + DHT_stats *stats; #endif } DHT; +extern void DHT_set_accumulate_callback(DHT *table, + int (*callback_func)(int, void *, int, + void *)); + +extern int DHT_write_accumulate(DHT *table, const void *key, int send_size, + void *data, uint32_t *proc, uint32_t *index, int *callback_ret); + /** * @brief Create a DHT. * @@ -141,9 +150,9 @@ typedef struct { * @return DHT* The returned value is the \a DHT-object which serves as a handle * for all DHT operations. If an error occured NULL is returned. */ -extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, +extern DHT *DHT_create(MPI_Comm comm, uint64_t size_per_process, unsigned int data_size, unsigned int key_size, - uint64_t (*hash_func)(int, const void*)); + uint64_t (*hash_func)(int, const void *)); /** * @brief Write data into DHT. @@ -161,10 +170,14 @@ extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, * @param table Pointer to the \a DHT-object. * @param key Pointer to the key. * @param data Pointer to the data. + * @param proc If not NULL, returns the process number written to. + * @param index If not NULL, returns the index of the bucket where the data was + * written to. * @return int Returns either DHT_SUCCESS on success or correspondending error * value on eviction or error. */ -extern int DHT_write(DHT* table, void* key, void* data); +extern int DHT_write(DHT *table, void *key, void *data, uint32_t *proc, + uint32_t *index); /** * @brief Read data from DHT. @@ -187,8 +200,10 @@ extern int DHT_write(DHT* table, void* key, void* data); * @return int Returns either DHT_SUCCESS on success or correspondending error * value on read miss or error. */ -extern int DHT_read(DHT* table, void* key, void* destination); +extern int DHT_read(DHT *table, const void *key, void *destination); +extern int DHT_read_location(DHT *table, uint32_t proc, uint32_t index, + void *destination); /** * @brief Write current state of DHT to file. * @@ -203,7 +218,7 @@ extern int DHT_read(DHT* table, void* key, void* destination); * @return int Returns DHT_SUCCESS on succes, DHT_FILE_IO_ERROR if file can't be * opened/closed or DHT_WRITE_ERROR if file is not writable. */ -extern int DHT_to_file(DHT* table, const char* filename); +extern int DHT_to_file(DHT *table, const char *filename); /** * @brief Read state of DHT from file. @@ -223,7 +238,7 @@ extern int DHT_to_file(DHT* table, const char* filename); * file doesn't match expectation. This is possible if the data size or key size * is different. */ -extern int DHT_from_file(DHT* table, const char* filename); +extern int DHT_from_file(DHT *table, const char *filename); /** * @brief Free ressources of DHT. @@ -241,7 +256,7 @@ extern int DHT_from_file(DHT* table, const char* filename); * @return int Returns either DHT_SUCCESS on success or DHT_MPI_ERROR on * internal MPI error. */ -extern int DHT_free(DHT* table, int* eviction_counter, int* readerror_counter); +extern int DHT_free(DHT *table, int *eviction_counter, int *readerror_counter); /** * @brief Prints a table with statistics about current use of DHT. @@ -267,7 +282,7 @@ extern int DHT_free(DHT* table, int* eviction_counter, int* readerror_counter); * @return int Returns DHT_SUCCESS on success or DHT_MPI_ERROR on internal MPI * error. */ -extern int DHT_print_statistics(DHT* table); +extern int DHT_print_statistics(DHT *table); /** * @brief Determine destination rank and index. @@ -286,8 +301,8 @@ extern int DHT_print_statistics(DHT* table); * @param index_count Count of possible indeces. */ static void determine_dest(uint64_t hash, int comm_size, - unsigned int table_size, unsigned int* dest_rank, - uint64_t* index, unsigned int index_count); + unsigned int table_size, unsigned int *dest_rank, + uint64_t *index, unsigned int index_count); /** * @brief Set the occupied flag. @@ -296,7 +311,7 @@ static void determine_dest(uint64_t hash, int comm_size, * * @param flag_byte First byte of a bucket. */ -static void set_flag(char* flag_byte); +static void set_flag(char *flag_byte); /** * @brief Get the occupied flag. diff --git a/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp b/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp new file mode 100644 index 000000000..69491b2c2 --- /dev/null +++ b/src/Chemistry/SurrogateModels/DHT_Wrapper.cpp @@ -0,0 +1,335 @@ +// Time-stamp: "Last modified 2023-11-01 10:54:45 mluebke" + +/* +** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of +** Potsdam) +** +** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam) +** +** POET is free software; you can redistribute it and/or modify it under the +** terms of the GNU General Public License as published by the Free Software +** Foundation; either version 2 of the License, or (at your option) any later +** version. +** +** POET is distributed in the hope that it will be useful, but WITHOUT ANY +** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +** A PARTICULAR PURPOSE. See the GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along with +** this program; if not, write to the Free Software Foundation, Inc., 51 +** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + +#include "DHT_Wrapper.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +namespace poet { + +DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, + const NamedVector &key_species, + const std::vector &key_indices, + const std::vector &_output_names, + const ChemistryParams::Chem_Hook_Functions &_hooks, + uint32_t data_count, bool _with_interp) + : key_count(key_indices.size()), data_count(data_count), + input_key_elements(key_indices), communicator(dht_comm), + key_species(key_species), output_names(_output_names), hooks(_hooks), + with_interp(_with_interp) { + // initialize DHT object + // key size = count of key elements + timestep + uint32_t key_size = (key_count + 1) * sizeof(Lookup_Keyelement); + uint32_t data_size = + (data_count + (with_interp ? input_key_elements.size() : 0)) * + sizeof(double); + uint32_t buckets_per_process = + static_cast(dht_size / (data_size + key_size)); + dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, + &poet::Murmur2_64A); + + dht_signif_vector = key_species.getValues(); + + // this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); + + this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); + + const auto key_names = key_species.getNames(); + + auto tot_h = std::find(key_names.begin(), key_names.end(), "H"); + if (tot_h != key_names.end()) { + this->dht_prop_type_vector[tot_h - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto tot_o = std::find(key_names.begin(), key_names.end(), "O"); + if (tot_o != key_names.end()) { + this->dht_prop_type_vector[tot_o - key_names.begin()] = DHT_TYPE_TOTAL; + } + auto charge = std::find(key_names.begin(), key_names.end(), "Charge"); + if (charge != key_names.end()) { + this->dht_prop_type_vector[charge - key_names.begin()] = DHT_TYPE_CHARGE; + } +} + +DHT_Wrapper::~DHT_Wrapper() { + // free DHT + DHT_free(dht_object, NULL, NULL); +} +auto DHT_Wrapper::checkDHT(WorkPackage &work_package) + -> const DHT_ResultObject & { + + const auto length = work_package.size; + + std::vector bucket_writer( + this->data_count + (with_interp ? input_key_elements.size() : 0)); + + // loop over every grid cell contained in work package + for (int i = 0; i < length; i++) { + // point to current grid cell + auto &key_vector = dht_results.keys[i]; + + // overwrite input with data from DHT, IF value is found in DHT + int res = + DHT_read(this->dht_object, key_vector.data(), bucket_writer.data()); + + switch (res) { + case DHT_SUCCESS: + work_package.output[i] = + (with_interp + ? inputAndRatesToOutput(bucket_writer, work_package.input[i]) + : bucket_writer); + work_package.mapping[i] = CHEM_DHT; + this->dht_hits++; + break; + case DHT_READ_MISS: + break; + } + } + + return dht_results; +} + +void DHT_Wrapper::fillDHT(const WorkPackage &work_package) { + + const auto length = work_package.size; + + // loop over every grid cell contained in work package + dht_results.locations.resize(length); + dht_results.filledDHT = std::vector(length, false); + for (int i = 0; i < length; i++) { + // If true grid cell was simulated, needs to be inserted into dht + if (work_package.mapping[i] == CHEM_PQC) { + + // check if calcite or dolomite is absent and present, resp.n and vice + // versa in input/output. If this is the case -> Do not write to DHT! + // HACK: hardcoded, should be fixed! + if (hooks.dht_fill.isValid()) { + NamedVector old_values(output_names, work_package.input[i]); + NamedVector new_values(output_names, work_package.output[i]); + + if (hooks.dht_fill(old_values, new_values)) { + continue; + } + } + + uint32_t proc, index; + auto &key = dht_results.keys[i]; + const auto data = + (with_interp ? outputToInputAndRates(work_package.input[i], + work_package.output[i]) + : work_package.output[i]); + // void *data = (void *)&(work_package[i * this->data_count]); + // fuzz data (round, logarithm etc.) + + // insert simulated data with fuzzed key into DHT + int res = DHT_write(this->dht_object, key.data(), + const_cast(data.data()), &proc, &index); + + dht_results.locations[i] = {proc, index}; + + // if data was successfully written ... + if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { + dht_evictions++; + } + + dht_results.filledDHT[i] = true; + } + } +} + +inline std::vector +DHT_Wrapper::outputToInputAndRates(const std::vector &old_results, + const std::vector &new_results) { + const int prefix_size = this->input_key_elements.size(); + + std::vector output(prefix_size + this->data_count); + std::copy(new_results.begin(), new_results.end(), + output.begin() + prefix_size); + + for (int i = 0; i < prefix_size; i++) { + const int data_elem_i = input_key_elements[i]; + output[i] = old_results[data_elem_i]; + output[prefix_size + data_elem_i] -= old_results[data_elem_i]; + } + + return output; +} + +inline std::vector +DHT_Wrapper::inputAndRatesToOutput(const std::vector &dht_data, + const std::vector &input_values) { + const int prefix_size = this->input_key_elements.size(); + + std::vector output(input_values); + + for (int i = 0; i < prefix_size; i++) { + const int data_elem_i = input_key_elements[i]; + output[data_elem_i] += dht_data[i]; + } + + return output; +} + +inline std::vector +DHT_Wrapper::outputToRates(const std::vector &old_results, + const std::vector &new_results) { + std::vector output(new_results); + + for (const auto &data_elem_i : input_key_elements) { + output[data_elem_i] -= old_results[data_elem_i]; + } + + return output; +} + +inline std::vector +DHT_Wrapper::ratesToOutput(const std::vector &dht_data, + const std::vector &input_values) { + std::vector output(input_values); + + for (const auto &data_elem_i : input_key_elements) { + output[data_elem_i] += dht_data[data_elem_i]; + } + + return output; +} + +// void DHT_Wrapper::resultsToWP(std::vector &work_package) { +// for (int i = 0; i < dht_results.length; i++) { +// if (!dht_results.needPhreeqc[i]) { +// std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), +// work_package.begin() + (data_count * i)); +// } +// } +// } + +int DHT_Wrapper::tableToFile(const char *filename) { + int res = DHT_to_file(dht_object, filename); + return res; +} + +int DHT_Wrapper::fileToTable(const char *filename) { + int res = DHT_from_file(dht_object, filename); + if (res != DHT_SUCCESS) + return res; + +#ifdef DHT_STATISTICS + DHT_print_statistics(dht_object); +#endif + + return DHT_SUCCESS; +} + +void DHT_Wrapper::printStatistics() { + int res; + + res = DHT_print_statistics(dht_object); + + if (res != DHT_SUCCESS) { + // MPI ERROR ... WHAT TO DO NOW? + // RUNNING CIRCLES WHILE SCREAMING + } +} + +LookupKey DHT_Wrapper::fuzzForDHT_R(const std::vector &cell, + double dt) { + const auto c_zero_val = std::pow(10, AQUEOUS_EXP); + + NamedVector input_nv(this->output_names, cell); + + const std::vector eval_vec = hooks.dht_fuzz(input_nv); + assert(eval_vec.size() == this->key_count); + LookupKey vecFuzz(this->key_count + 1, {.0}); + + DHT_Rounder rounder; + + int totals_i = 0; + // introduce fuzzing to allow more hits in DHT + // loop over every variable of grid cell + for (std::uint32_t i = 0; i < eval_vec.size(); i++) { + double curr_key = eval_vec[i]; + if (curr_key != 0) { + if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { + curr_key -= base_totals[totals_i++]; + } + vecFuzz[i] = + rounder.round(curr_key, dht_signif_vector[i], + this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); + } + } + // add timestep to the end of the key as double value + vecFuzz[this->key_count].fp_element = dt; + + return vecFuzz; +} + +LookupKey DHT_Wrapper::fuzzForDHT(const std::vector &cell, double dt) { + const auto c_zero_val = std::pow(10, AQUEOUS_EXP); + + LookupKey vecFuzz(this->key_count + 1, {.0}); + DHT_Rounder rounder; + + int totals_i = 0; + // introduce fuzzing to allow more hits in DHT + // loop over every variable of grid cell + for (std::uint32_t i = 0; i < input_key_elements.size(); i++) { + if (input_key_elements[i] == DHT_KEY_INPUT_CUSTOM) { + continue; + } + double curr_key = cell[input_key_elements[i]]; + if (curr_key != 0) { + if (curr_key < c_zero_val && + this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { + continue; + } + if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { + curr_key -= base_totals[totals_i++]; + } + vecFuzz[i] = + rounder.round(curr_key, dht_signif_vector[i], + this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL); + } + } + // add timestep to the end of the key as double value + vecFuzz[this->key_count].fp_element = dt; + + return vecFuzz; +} + +void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) { + if (signif_vec.size() != this->key_count) { + throw std::runtime_error( + "Significant vector size mismatches count of key elements."); + } + + this->dht_signif_vector = signif_vec; +} +} // namespace poet diff --git a/include/poet/DHT_Wrapper.hpp b/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp similarity index 61% rename from include/poet/DHT_Wrapper.hpp rename to src/Chemistry/SurrogateModels/DHT_Wrapper.hpp index b7cf01ba6..791be2d1b 100644 --- a/include/poet/DHT_Wrapper.hpp +++ b/src/Chemistry/SurrogateModels/DHT_Wrapper.hpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-04-24 16:23:42 mluebke" +// Time-stamp: "Last modified 2023-09-08 14:43:02 mluebke" /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of @@ -23,10 +23,20 @@ #ifndef DHT_WRAPPER_H #define DHT_WRAPPER_H -#include "poet/DHT_Types.hpp" +#include "../../Base/RInsidePOET.hpp" +#include "../../Base/SimParams.hpp" +#include "../../DataStructures/DataStructures.hpp" +#include "../enums.hpp" +#include "HashFunctions.hpp" +#include "LookupKey.hpp" +#include "Rounding.hpp" + #include #include +#include #include +#include +#include #include extern "C" { @@ -37,35 +47,7 @@ extern "C" { namespace poet { -struct DHT_SCNotation { - std::int8_t exp : 8; - std::int64_t significant : 56; -}; - -union DHT_Keyelement { - double fp_elemet; - DHT_SCNotation sc_notation; -}; - -using DHT_ResultObject = struct DHTResobj { - uint32_t length; - std::vector> keys; - std::vector> results; - std::vector needPhreeqc; -}; - -/** - * @brief Return user-defined md5sum - * - * This function will calculate a hashsum with the help of md5sum. Therefore the - * md5sum for a given key is calculated and divided into two 64-bit parts. These - * will be XORed and returned as the hash. - * - * @param key_size Size of key in bytes - * @param key Pointer to key - * @return uint64_t Hashsum as an unsigned 64-bit integer - */ -static uint64_t get_md5(int key_size, void *key); +using DHT_Location = std::pair; /** * @brief C++-Wrapper around DHT implementation @@ -76,6 +58,17 @@ static uint64_t get_md5(int key_size, void *key); */ class DHT_Wrapper { public: + using DHT_ResultObject = struct DHTResobj { + std::vector keys; + std::vector locations; + std::vector filledDHT; + }; + + static constexpr std::int32_t DHT_KEY_INPUT_CUSTOM = + std::numeric_limits::min(); + + static constexpr int DHT_KEY_SIGNIF_DEFAULT = 5; + /** * @brief Construct a new dht wrapper object * @@ -91,9 +84,12 @@ public: * for key creation. * @param data_count Count of data entries */ - DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size, - const std::vector &key_indices, - uint32_t data_count); + DHT_Wrapper(MPI_Comm dht_comm, std::uint64_t dht_size, + const NamedVector &key_species, + const std::vector &key_indices, + const std::vector &output_names, + const ChemistryParams::Chem_Hook_Functions &hooks, + uint32_t data_count, bool with_interp); /** * @brief Destroy the dht wrapper object * @@ -105,6 +101,9 @@ public: */ ~DHT_Wrapper(); + DHT_Wrapper &operator=(const DHT_Wrapper &) = delete; + DHT_Wrapper(const DHT_Wrapper &) = delete; + /** * @brief Check if values of workpackage are stored in DHT * @@ -121,9 +120,7 @@ public: * @param[in,out] work_package Pointer to current work package * @param dt Current timestep of simulation */ - auto checkDHT(int length, double dt, const std::vector &work_package, - std::vector &curr_mapping) - -> const poet::DHT_ResultObject &; + auto checkDHT(WorkPackage &work_package) -> const DHT_ResultObject &; /** * @brief Write simulated values into DHT @@ -141,7 +138,7 @@ public: * outputs of the PHREEQC simulation * @param dt Current timestep of simulation */ - void fillDHT(int length, const std::vector &work_package); + void fillDHT(const WorkPackage &work_package); void resultsToWP(std::vector &work_package); @@ -183,13 +180,6 @@ public: */ auto getHits() { return this->dht_hits; }; - /** - * @brief Get the Misses object - * - * @return uint64_t Count of read misses - */ - auto getMisses() { return this->dht_miss; }; - /** * @brief Get the Evictions object * @@ -197,10 +187,42 @@ public: */ auto getEvictions() { return this->dht_evictions; }; + void resetCounter() { + this->dht_hits = 0; + this->dht_evictions = 0; + } + void SetSignifVector(std::vector signif_vec); - void setBaseTotals(const std::array &bt) { - this->base_totals = bt; + auto getDataCount() { return this->data_count; } + auto getCommunicator() { return this->communicator; } + DHT *getDHT() { return this->dht_object; }; + + DHT_ResultObject &getDHTResults() { return this->dht_results; } + + const auto &getKeyElements() const { return this->input_key_elements; } + const auto &getKeySpecies() const { return this->key_species; } + + void setBaseTotals(double tot_h, double tot_o) { + this->base_totals = {tot_h, tot_o}; + } + + std::uint32_t getInputCount() const { + return this->input_key_elements.size(); + } + + std::uint32_t getOutputCount() const { return this->data_count; } + + inline void prepareKeys(const std::vector> &input_values, + double dt) { + dht_results.keys.resize(input_values.size()); + for (std::size_t i = 0; i < input_values.size(); i++) { + if (this->hooks.dht_fuzz.isValid()) { + dht_results.keys[i] = fuzzForDHT_R(input_values[i], dt); + } else { + dht_results.keys[i] = fuzzForDHT(input_values[i], dt); + } + } } private: @@ -208,20 +230,38 @@ private: uint32_t data_count; DHT *dht_object; + MPI_Comm communicator; - std::vector fuzzForDHT(int var_count, void *key, double dt); + LookupKey fuzzForDHT(const std::vector &cell, double dt); + LookupKey fuzzForDHT_R(const std::vector &cell, double dt); + + std::vector + outputToInputAndRates(const std::vector &old_results, + const std::vector &new_results); + + std::vector + inputAndRatesToOutput(const std::vector &dht_data, + const std::vector &input_values); + + std::vector outputToRates(const std::vector &old_results, + const std::vector &new_results); + + std::vector ratesToOutput(const std::vector &dht_data, + const std::vector &input_values); uint32_t dht_hits = 0; - uint32_t dht_miss = 0; uint32_t dht_evictions = 0; - std::vector dht_signif_vector; - std::vector dht_prop_type_vector; - std::vector input_key_elements; + NamedVector key_species; - static constexpr int DHT_KEY_SIGNIF_DEFAULT = 5; - static constexpr int DHT_KEY_SIGNIF_TOTALS = 10; - static constexpr int DHT_KEY_SIGNIF_CHARGE = 3; + std::vector dht_signif_vector; + std::vector dht_prop_type_vector; + std::vector input_key_elements; + + const std::vector &output_names; + + const ChemistryParams::Chem_Hook_Functions &hooks; + const bool with_interp; DHT_ResultObject dht_results; diff --git a/src/ChemistryModule/HashFunctions.cpp b/src/Chemistry/SurrogateModels/HashFunctions.cpp similarity index 96% rename from src/ChemistryModule/HashFunctions.cpp rename to src/Chemistry/SurrogateModels/HashFunctions.cpp index b2739aea6..d93702990 100644 --- a/src/ChemistryModule/HashFunctions.cpp +++ b/src/Chemistry/SurrogateModels/HashFunctions.cpp @@ -1,4 +1,4 @@ -// Time-stamp: "Last modified 2023-04-24 16:56:18 mluebke" +// Time-stamp: "Last modified 2023-04-24 23:20:55 mluebke" /* **----------------------------------------------------------------------------- ** MurmurHash2 was written by Austin Appleby, and is placed in the public @@ -24,7 +24,7 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/HashFunctions.hpp" +#include "HashFunctions.hpp" #if defined(_MSC_VER) diff --git a/include/poet/HashFunctions.hpp b/src/Chemistry/SurrogateModels/HashFunctions.hpp similarity index 100% rename from include/poet/HashFunctions.hpp rename to src/Chemistry/SurrogateModels/HashFunctions.hpp diff --git a/src/Chemistry/SurrogateModels/Interpolation.hpp b/src/Chemistry/SurrogateModels/Interpolation.hpp new file mode 100644 index 000000000..15361f4bf --- /dev/null +++ b/src/Chemistry/SurrogateModels/Interpolation.hpp @@ -0,0 +1,270 @@ +// Time-stamp: "Last modified 2023-08-16 16:49:31 mluebke" + +#ifndef INTERPOLATION_H_ +#define INTERPOLATION_H_ + +#include "../../Base/SimParams.hpp" +#include "../../DataStructures/DataStructures.hpp" +#include "DHT_Wrapper.hpp" +#include "LookupKey.hpp" +#include "Rounding.hpp" + +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include "DHT.h" +} + +#include +#include +#include +#include + +namespace poet { + +class ProximityHashTable { +public: + using bucket_indicator = std::uint64_t; + + ProximityHashTable(uint32_t key_size, uint32_t data_size, + uint32_t entry_count, uint32_t size_per_process, + MPI_Comm communicator); + ~ProximityHashTable(); + + // delete assign and copy operator + ProximityHashTable &operator=(const ProximityHashTable *) = delete; + ProximityHashTable(const ProximityHashTable &) = delete; + + struct PHT_Result { + std::uint32_t size; + std::vector> in_values; + std::vector> out_values; + + std::uint32_t getMemSize() const { + std::uint32_t sum{0}; + for (const auto &results : out_values) { + sum += results.size() * sizeof(double); + } + return sum; + } + }; + + void setSourceDHT(DHT *src) { + this->source_dht = src; + this->dht_key_count = src->key_size / sizeof(Lookup_Keyelement); + this->dht_data_count = src->data_size / sizeof(double); + this->dht_buffer.resize(src->data_size + src->key_size); + } + + void writeLocationToPHT(LookupKey key, DHT_Location location); + + const PHT_Result &query(const LookupKey &key, + std::uint32_t min_entries_needed, + std::uint32_t input_count, + std::uint32_t output_count); + + std::uint64_t getLocations(const LookupKey &key); + + void getEntriesFromLocation(const PHT_Result &locations, + const std::vector &signif); + + void writeStats() { DHT_print_statistics(this->prox_ht); } + + DHT *getDHTObject() { return this->prox_ht; } + + auto getPHTWriteTime() const -> double { return this->pht_write_t; }; + auto getPHTReadTime() const -> double { return this->pht_read_t; }; + auto getDHTGatherTime() const -> double { return this->pht_gather_dht_t; }; + + auto getLocalCacheHits() const -> std::vector { + return this->all_cache_hits; + } + + void storeAndResetCounter() { + all_cache_hits.push_back(cache_hits); + cache_hits = 0; + } + +#if POET_PHT_ADD + void incrementReadCounter(const LookupKey &key); +#endif + +private: + enum { INTERP_CB_OK, INTERP_CB_FULL, INTERP_CB_ALREADY_IN }; + + static int PHT_callback_function(int in_data_size, void *in_data, + int out_data_size, void *out_data); + + static std::vector convertKeysFromDHT(Lookup_Keyelement *keys_in, + std::uint32_t key_size); + + static bool similarityCheck(const LookupKey &fine, const LookupKey &coarse, + const std::vector &signif); + + char *bucket_store; + + class Cache + : private std::unordered_map { + public: + void operator()(const LookupKey &key, const PHT_Result val); + + std::pair operator[](const LookupKey &key); + void flush() { this->clear(); } + + protected: + private: + static constexpr std::int64_t MAX_CACHE_SIZE = 100E6; + + std::int64_t free_mem{MAX_CACHE_SIZE}; + + std::list lru_queue; + using lru_iterator = typename std::list::iterator; + + std::unordered_map keyfinder; + }; + + Cache localCache; + DHT *prox_ht; + + std::uint32_t dht_evictions = 0; + + DHT *source_dht = nullptr; + + PHT_Result lookup_results; + std::vector dht_buffer; + + std::uint32_t dht_key_count; + std::uint32_t dht_data_count; + + MPI_Comm communicator; + + double pht_write_t = 0.; + double pht_read_t = 0.; + double pht_gather_dht_t = 0.; + + std::uint32_t cache_hits{0}; + std::vector all_cache_hits{}; +}; + +class InterpolationModule { +public: + using InterpFunction = std::vector (*)( + const std::vector &, const std::vector &, + const std::vector> &, + const std::vector> &); + + InterpolationModule(std::uint32_t entries_per_bucket, + std::uint64_t size_per_process, + std::uint32_t min_entries_needed, DHT_Wrapper &dht, + const NamedVector &interp_key_signifs, + const std::vector &dht_key_indices, + const std::vector &out_names, + const ChemistryParams::Chem_Hook_Functions &hooks); + + enum result_status { RES_OK, INSUFFICIENT_DATA, NOT_NEEDED }; + + struct InterpolationResult { + std::vector> results; + std::vector status; + + void ResultsToWP(std::vector &currWP); + }; + + void setInterpolationFunction(InterpFunction func) { + this->f_interpolate = func; + } + + void setMinEntriesNeeded(std::uint32_t entries) { + this->min_entries_needed = entries; + } + + auto getMinEntriesNeeded() { return this->min_entries_needed; } + + void writePairs(); + + void tryInterpolation(WorkPackage &work_package); + + void resultsToWP(std::vector &work_package) const; + + auto getPHTWriteTime() const { return pht->getPHTWriteTime(); }; + auto getPHTReadTime() const { return pht->getPHTReadTime(); }; + auto getDHTGatherTime() const { return pht->getDHTGatherTime(); }; + auto getInterpolationTime() const { return this->interp_t; }; + + auto getInterpolationCount() const -> std::uint32_t { + return this->interpolations; + } + + auto getPHTLocalCacheHits() const -> std::vector { + return this->pht->getLocalCacheHits(); + } + + void resetCounter() { + this->interpolations = 0; + this->pht->storeAndResetCounter(); + } + + void writePHTStats() { this->pht->writeStats(); } + void dumpPHTState(const std::string &filename) { + DHT_to_file(this->pht->getDHTObject(), filename.c_str()); + } + + static constexpr std::uint32_t COARSE_DIFF = 2; + static constexpr std::uint32_t COARSE_SIGNIF_DEFAULT = + DHT_Wrapper::DHT_KEY_SIGNIF_DEFAULT - COARSE_DIFF; + +private: + void initPHT(std::uint32_t key_count, std::uint32_t entries_per_bucket, + std::uint32_t size_per_process, MPI_Comm communicator); + + static std::vector dummy(const std::vector &, + const std::vector &, + const std::vector> &, + const std::vector> &) { + return {}; + } + + double interp_t = 0.; + + std::uint32_t interpolations{0}; + + InterpFunction f_interpolate = dummy; + + std::uint32_t min_entries_needed = 5; + + std::unique_ptr pht; + + DHT_Wrapper &dht_instance; + + NamedVector key_signifs; + std::vector key_indices; + + InterpolationResult interp_result; + PHT_Rounder rounder; + + LookupKey roundKey(const LookupKey &in_key) { + LookupKey out_key; + + for (std::uint32_t i = 0; i < key_indices.size(); i++) { + out_key.push_back(rounder.round(in_key[key_indices[i]], key_signifs[i])); + } + + // timestep + out_key.push_back(in_key.back()); + + return out_key; + } + + const ChemistryParams::Chem_Hook_Functions &hooks; + const std::vector &out_names; + const std::vector dht_names; +}; +} // namespace poet + +#endif // INTERPOLATION_H_ diff --git a/src/Chemistry/SurrogateModels/InterpolationModule.cpp b/src/Chemistry/SurrogateModels/InterpolationModule.cpp new file mode 100644 index 000000000..26bfd4ab0 --- /dev/null +++ b/src/Chemistry/SurrogateModels/InterpolationModule.cpp @@ -0,0 +1,153 @@ +// Time-stamp: "Last modified 2023-08-16 17:02:31 mluebke" +#include "Interpolation.hpp" + +#include "../../DataStructures/DataStructures.hpp" +#include "DHT_Wrapper.hpp" +#include "HashFunctions.hpp" +#include "LookupKey.hpp" +#include "Rounding.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include "DHT.h" +} + +namespace poet { + +InterpolationModule::InterpolationModule( + std::uint32_t entries_per_bucket, std::uint64_t size_per_process, + std::uint32_t min_entries_needed, DHT_Wrapper &dht, + const NamedVector &interp_key_signifs, + const std::vector &dht_key_indices, + const std::vector &_out_names, + const ChemistryParams::Chem_Hook_Functions &_hooks) + : dht_instance(dht), key_signifs(interp_key_signifs), + key_indices(dht_key_indices), min_entries_needed(min_entries_needed), + dht_names(dht.getKeySpecies().getNames()), out_names(_out_names), + hooks(_hooks) { + + initPHT(this->key_signifs.size(), entries_per_bucket, size_per_process, + dht.getCommunicator()); + + pht->setSourceDHT(dht.getDHT()); +} + +void InterpolationModule::initPHT(std::uint32_t key_count, + std::uint32_t entries_per_bucket, + std::uint32_t size_per_process, + MPI_Comm communicator) { + uint32_t key_size = key_count * sizeof(Lookup_Keyelement) + sizeof(double); + uint32_t data_size = sizeof(DHT_Location); + + pht = std::make_unique( + key_size, data_size, entries_per_bucket, size_per_process, communicator); +} + +void InterpolationModule::writePairs() { + const auto in = this->dht_instance.getDHTResults(); + for (int i = 0; i < in.filledDHT.size(); i++) { + if (in.filledDHT[i]) { + const auto coarse_key = roundKey(in.keys[i]); + pht->writeLocationToPHT(coarse_key, in.locations[i]); + } + } +} + +void InterpolationModule::tryInterpolation(WorkPackage &work_package) { + interp_result.status.resize(work_package.size, NOT_NEEDED); + + const auto dht_results = this->dht_instance.getDHTResults(); + + for (int wp_i = 0; wp_i < work_package.size; wp_i++) { + if (work_package.mapping[wp_i] != CHEM_PQC) { + interp_result.status[wp_i] = NOT_NEEDED; + continue; + } + + const auto rounded_key = roundKey(dht_results.keys[wp_i]); + + auto pht_result = + pht->query(rounded_key, this->min_entries_needed, + dht_instance.getInputCount(), dht_instance.getOutputCount()); + + if (pht_result.size < this->min_entries_needed) { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } + + if (hooks.interp_pre.isValid()) { + NamedVector nv_in(this->out_names, work_package.input[wp_i]); + + auto rm_indices = hooks.interp_pre(nv_in, pht_result.in_values); + + pht_result.size -= rm_indices.size(); + + if (pht_result.size < this->min_entries_needed) { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } + + for (const auto &index : rm_indices) { + pht_result.in_values.erase( + std::next(pht_result.in_values.begin(), index - 1)); + pht_result.out_values.erase( + std::next(pht_result.out_values.begin(), index - 1)); + } + } + +#ifdef POET_PHT_ADD + this->pht->incrementReadCounter(roundKey(rounded_key)); +#endif + + double start_fc = MPI_Wtime(); + + work_package.output[wp_i] = + f_interpolate(dht_instance.getKeyElements(), work_package.input[wp_i], + pht_result.in_values, pht_result.out_values); + + if (hooks.interp_post.isValid()) { + NamedVector nv_result(this->out_names, work_package.output[wp_i]); + if (hooks.interp_post(nv_result)) { + interp_result.status[wp_i] = INSUFFICIENT_DATA; + continue; + } + } + + // interp_result.results[i][0] = mean_water; + this->interp_t += MPI_Wtime() - start_fc; + + this->interpolations++; + + work_package.mapping[wp_i] = CHEM_INTERP; + interp_result.status[wp_i] = RES_OK; + } +} + +void InterpolationModule::resultsToWP(std::vector &work_package) const { + for (uint32_t i = 0; i < interp_result.status.size(); i++) { + if (interp_result.status[i] == RES_OK) { + const std::size_t length = + interp_result.results[i].end() - interp_result.results[i].begin(); + std::copy(interp_result.results[i].begin(), + interp_result.results[i].end(), + work_package.begin() + (length * i)); + } + } +} + +} // namespace poet diff --git a/src/Chemistry/SurrogateModels/LookupKey.hpp b/src/Chemistry/SurrogateModels/LookupKey.hpp new file mode 100644 index 000000000..f64b92721 --- /dev/null +++ b/src/Chemistry/SurrogateModels/LookupKey.hpp @@ -0,0 +1,49 @@ +// Time-stamp: "Last modified 2023-08-11 10:12:52 mluebke" + +#ifndef LOOKUPKEY_H_ +#define LOOKUPKEY_H_ + +#include "HashFunctions.hpp" + +#include +#include +#include + +namespace poet { + +struct Lookup_SC_notation { + std::int8_t exp : 8; + std::int64_t significant : 56; +}; + +union Lookup_Keyelement { + double fp_element; + Lookup_SC_notation sc_notation; + + bool operator==(const Lookup_Keyelement &other) const { + return std::memcmp(this, &other, sizeof(Lookup_Keyelement)) == 0 ? true + : false; + } +}; + +class LookupKey : public std::vector { +public: + using std::vector::vector; + + std::vector to_double() const; + static Lookup_SC_notation round_from_double(const double in, + std::uint32_t signif); + static double to_double(const Lookup_SC_notation in); +}; + +struct LookupKeyHasher { + std::uint64_t operator()(const LookupKey &k) const { + const uint32_t key_size = k.size() * sizeof(Lookup_Keyelement); + + return poet::Murmur2_64A(key_size, k.data()); + } +}; + +} // namespace poet + +#endif // LOOKUPKEY_H_ diff --git a/src/Chemistry/SurrogateModels/ProximityHashTable.cpp b/src/Chemistry/SurrogateModels/ProximityHashTable.cpp new file mode 100644 index 000000000..f1589fa21 --- /dev/null +++ b/src/Chemistry/SurrogateModels/ProximityHashTable.cpp @@ -0,0 +1,255 @@ +// Time-stamp: "Last modified 2023-08-15 14:50:59 mluebke" +#include "Interpolation.hpp" + +#include "DHT_Wrapper.hpp" +#include "HashFunctions.hpp" +#include "LookupKey.hpp" +#include "Rounding.hpp" + +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include "DHT.h" +} + +namespace poet { + +ProximityHashTable::ProximityHashTable(uint32_t key_size, uint32_t data_size, + uint32_t entry_count, + uint32_t size_per_process, + MPI_Comm communicator_) + : communicator(communicator_) { + + data_size *= entry_count; + data_size += sizeof(bucket_indicator); + +#ifdef POET_PHT_ADD + data_size += sizeof(std::uint64_t); +#endif + + bucket_store = new char[data_size]; + + uint32_t buckets_per_process = + static_cast(size_per_process / (data_size + key_size)); + + this->prox_ht = DHT_create(communicator, buckets_per_process, data_size, + key_size, &poet::Murmur2_64A); + + DHT_set_accumulate_callback(this->prox_ht, PHT_callback_function); +} + +ProximityHashTable::~ProximityHashTable() { + delete[] bucket_store; + if (prox_ht) { + DHT_free(prox_ht, NULL, NULL); + } +} + +int ProximityHashTable::PHT_callback_function(int in_data_size, void *in_data, + int out_data_size, + void *out_data) { + const int max_elements_per_bucket = + static_cast((out_data_size - sizeof(bucket_indicator) +#ifdef POET_PHT_ADD + - sizeof(std::uint64_t) +#endif + ) / + in_data_size); + DHT_Location *input = reinterpret_cast(in_data); + + bucket_indicator *occupied_buckets = + reinterpret_cast(out_data); + DHT_Location *pairs = reinterpret_cast(occupied_buckets + 1); + + if (*occupied_buckets == max_elements_per_bucket) { + return INTERP_CB_FULL; + } + + for (bucket_indicator i = 0; i < *occupied_buckets; i++) { + if (pairs[i] == *input) { + return INTERP_CB_ALREADY_IN; + } + } + + pairs[(*occupied_buckets)++] = *input; + + return INTERP_CB_OK; +} + +void ProximityHashTable::writeLocationToPHT(LookupKey key, + DHT_Location location) { + + double start = MPI_Wtime(); + + // if (localCache[key].first) { + // return; + // } + + int ret_val; + + int status = DHT_write_accumulate(prox_ht, key.data(), sizeof(location), + &location, NULL, NULL, &ret_val); + + if (status == DHT_WRITE_SUCCESS_WITH_EVICTION) { + this->dht_evictions++; + } + + // if (ret_val == INTERP_CB_FULL) { + // localCache(key, {}); + // } + + this->pht_write_t += MPI_Wtime() - start; +} + +const ProximityHashTable::PHT_Result &ProximityHashTable::query( + const LookupKey &key, const std::uint32_t min_entries_needed, + const std::uint32_t input_count, const std::uint32_t output_count) { + + double start_r = MPI_Wtime(); + const auto cache_ret = localCache[key]; + if (cache_ret.first) { + cache_hits++; + return (lookup_results = cache_ret.second); + } + + int res = DHT_read(prox_ht, key.data(), bucket_store); + this->pht_read_t += MPI_Wtime() - start_r; + + if (res != DHT_SUCCESS) { + this->lookup_results.size = 0; + return lookup_results; + } + + auto *bucket_element_count = + reinterpret_cast(bucket_store); + auto *bucket_elements = + reinterpret_cast(bucket_element_count + 1); + + if (*bucket_element_count < min_entries_needed) { + this->lookup_results.size = 0; + return lookup_results; + } + + lookup_results.size = *bucket_element_count; + auto locations = std::vector( + bucket_elements, bucket_elements + *(bucket_element_count)); + + lookup_results.in_values.clear(); + lookup_results.in_values.reserve(*bucket_element_count); + + lookup_results.out_values.clear(); + lookup_results.out_values.reserve(*bucket_element_count); + + for (const auto &loc : locations) { + double start_g = MPI_Wtime(); + DHT_read_location(source_dht, loc.first, loc.second, dht_buffer.data()); + this->pht_gather_dht_t += MPI_Wtime() - start_g; + + auto *buffer = reinterpret_cast(dht_buffer.data()); + + lookup_results.in_values.push_back( + std::vector(buffer, buffer + input_count)); + + buffer += input_count; + lookup_results.out_values.push_back( + std::vector(buffer, buffer + output_count)); + } + + if (lookup_results.size != 0) { + localCache(key, lookup_results); + } + + return lookup_results; +} + +inline bool +ProximityHashTable::similarityCheck(const LookupKey &fine, + const LookupKey &coarse, + const std::vector &signif) { + + PHT_Rounder rounder; + + for (int i = 0; i < signif.size(); i++) { + if (!(rounder.round(fine[i], signif[i]) == coarse[i])) { + return false; + } + } + + return true; +} + +inline std::vector +ProximityHashTable::convertKeysFromDHT(Lookup_Keyelement *keys_in, + std::uint32_t key_size) { + std::vector output(key_size); + DHT_Rounder rounder; + for (int i = 0; i < key_size; i++) { + output[i] = rounder.convert(keys_in[i]); + } + + return output; +} + +void ProximityHashTable::Cache::operator()(const LookupKey &key, + const PHT_Result val) { + const auto elemIt = this->find(key); + + if (elemIt == this->end()) { + + if (this->free_mem < 0) { + const LookupKey &to_del = this->lru_queue.back(); + const auto elem_d = this->find(to_del); + this->free_mem += elem_d->second.getMemSize(); + this->erase(to_del); + this->keyfinder.erase(to_del); + this->lru_queue.pop_back(); + } + + this->insert({key, val}); + this->lru_queue.emplace_front(key); + this->keyfinder[key] = lru_queue.begin(); + this->free_mem -= val.getMemSize(); + return; + } + + elemIt->second = val; +} + +std::pair +ProximityHashTable::Cache::operator[](const LookupKey &key) { + const auto elemIt = this->find(key); + + if (elemIt == this->end()) { + return {false, {}}; + } + + this->lru_queue.splice(lru_queue.begin(), lru_queue, this->keyfinder[key]); + return {true, elemIt->second}; +} + +#ifdef POET_PHT_ADD +static int PHT_increment_counter(int in_data_size, void *in_data, + int out_data_size, void *out_data) { + char *start = reinterpret_cast(out_data); + std::uint64_t *counter = reinterpret_cast( + start + out_data_size - sizeof(std::uint64_t)); + *counter += 1; + + return 0; +} + +void ProximityHashTable::incrementReadCounter(const LookupKey &key) { + auto *old_func_ptr = this->prox_ht->accumulate_callback; + DHT_set_accumulate_callback(prox_ht, PHT_increment_counter); + int ret, dummy; + DHT_write_accumulate(prox_ht, key.data(), 0, NULL, NULL, NULL, &ret); + DHT_set_accumulate_callback(prox_ht, old_func_ptr); +} +#endif +} // namespace poet diff --git a/src/Chemistry/SurrogateModels/Rounding.hpp b/src/Chemistry/SurrogateModels/Rounding.hpp new file mode 100644 index 000000000..3f659290e --- /dev/null +++ b/src/Chemistry/SurrogateModels/Rounding.hpp @@ -0,0 +1,92 @@ +#ifndef ROUNDING_H_ +#define ROUNDING_H_ + +#include "LookupKey.hpp" +#include +#include + +namespace poet { + +constexpr std::int8_t AQUEOUS_EXP = -13; + +template +class IRounding { +public: + virtual Output round(const Input &, std::uint32_t signif) = 0; + virtual ConvertTo convert(const Output &) = 0; +}; + +class DHT_Rounder { +public: + Lookup_Keyelement round(const double &value, std::uint32_t signif, + bool is_ho) { + std::int8_t exp = + static_cast(std::floor(std::log10(std::fabs(value)))); + + if (!is_ho) { + if (exp < AQUEOUS_EXP) { + return {.sc_notation = {0, 0}}; + } + + std::int8_t diff = exp - signif + 1; + if (diff < AQUEOUS_EXP) { + signif -= AQUEOUS_EXP - diff; + } + } + + Lookup_Keyelement elem; + elem.sc_notation.exp = exp; + elem.sc_notation.significant = + static_cast(value * std::pow(10, signif - exp - 1)); + + return elem; + } + + double convert(const Lookup_Keyelement &key_elem) { + std::int32_t normalized_exp = static_cast( + -std::log10(std::fabs(key_elem.sc_notation.significant))); + + // add stored exponent to normalized exponent + normalized_exp += key_elem.sc_notation.exp; + + // return significant times 10 to the power of exponent + return key_elem.sc_notation.significant * std::pow(10., normalized_exp); + } +}; + +class PHT_Rounder : public IRounding { +public: + Lookup_Keyelement round(const Lookup_Keyelement &value, + std::uint32_t signif) { + Lookup_Keyelement new_val = value; + + std::uint32_t diff_signif = + static_cast( + std::ceil(std::log10(std::abs(value.sc_notation.significant)))) - + signif; + + new_val.sc_notation.significant = static_cast( + value.sc_notation.significant / std::pow(10., diff_signif)); + + if (new_val.sc_notation.significant == 0) { + new_val.sc_notation.exp = 0; + } + + return new_val; + } + + double convert(const Lookup_Keyelement &key_elem) { + std::int32_t normalized_exp = static_cast( + -std::log10(std::fabs(key_elem.sc_notation.significant))); + + // add stored exponent to normalized exponent + normalized_exp += key_elem.sc_notation.exp; + + // return significant times 10 to the power of exponent + return key_elem.sc_notation.significant * std::pow(10., normalized_exp); + } +}; + +} // namespace poet + +#endif // ROUNDING_H_ diff --git a/src/ChemistryModule/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp similarity index 58% rename from src/ChemistryModule/WorkerFunctions.cpp rename to src/Chemistry/WorkerFunctions.cpp index 3101a3152..4e67e33d5 100644 --- a/src/ChemistryModule/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -1,10 +1,11 @@ -// Time-stamp: "Last modified 2023-07-21 17:22:19 mluebke" - -#include "IrmResult.h" -#include "poet/ChemistryModule.hpp" +#include "ChemistryModule.hpp" +#include "SurrogateModels/DHT_Wrapper.hpp" +#include "SurrogateModels/Interpolation.hpp" +#include #include -#include +#include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include +namespace poet { inline std::string get_string(int root, MPI_Comm communicator) { int count; MPI_Bcast(&count, 1, MPI_INT, root, communicator); @@ -48,40 +50,10 @@ void poet::ChemistryModule::WorkerLoop() { break; } case CHEM_INIT_SPECIES: { - Field dummy{0}; + Field dummy; initializeField(dummy); break; } - case CHEM_DHT_ENABLE: { - bool enable; - ChemBCast(&enable, 1, MPI_CXX_BOOL); - - uint32_t size_mb; - ChemBCast(&size_mb, 1, MPI_UINT32_T); - - std::vector name_dummy; - - SetDHTEnabled(enable, size_mb, name_dummy); - break; - } - case CHEM_DHT_SIGNIF_VEC: { - std::vector input_vec; - - SetDHTSignifVector(input_vec); - break; - } - case CHEM_DHT_SNAPS: { - int type; - ChemBCast(&type, 1, MPI_INT); - - SetDHTSnaps(type, get_string(0, this->group_comm)); - - break; - } - case CHEM_DHT_READ_FILE: { - ReadDHTFile(get_string(0, this->group_comm)); - break; - } case CHEM_WORK_LOOP: { WorkerProcessPkgs(timings, iteration); break; @@ -96,12 +68,6 @@ void poet::ChemistryModule::WorkerLoop() { WorkerMetricsToMaster(type); break; } - case CHEM_PROGRESSBAR: { - bool enable; - ChemBCast(&enable, 1, MPI_CXX_BOOL); - setProgressBarPrintout(enable); - break; - } case CHEM_BREAK_MAIN_LOOP: { WorkerPostSim(iteration); loop = false; @@ -148,8 +114,6 @@ void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings, void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, int double_count, struct worker_s &timings) { - int local_work_package_size = 0; - static int counter = 1; double dht_get_start, dht_get_end; @@ -160,12 +124,11 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, double dt; double current_sim_time; - const uint32_t n_cells_times_props = this->prop_count * this->wp_size; - std::vector vecCurrWP(n_cells_times_props + BUFFER_OFFSET); int count = double_count; + std::vector mpi_buffer(count); /* receive */ - MPI_Recv(vecCurrWP.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, + MPI_Recv(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, this->group_comm, MPI_STATUS_IGNORE); /* decrement count of work_package by BUFFER_OFFSET */ @@ -175,63 +138,70 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, * mpi_buffer */ // work_package_size - local_work_package_size = vecCurrWP[count]; + poet::WorkPackage s_curr_wp(mpi_buffer[count]); // current iteration of simulation - iteration = vecCurrWP[count + 1]; + iteration = mpi_buffer[count + 1]; // current timestep size - dt = vecCurrWP[count + 2]; + dt = mpi_buffer[count + 2]; // current simulation time ('age' of simulation) - current_sim_time = vecCurrWP[count + 3]; + current_sim_time = mpi_buffer[count + 3]; /* 4th double value is currently a placeholder */ // placeholder = mpi_buffer[count+4]; - vecCurrWP.resize(n_cells_times_props); - std::vector vecMappingWP(local_work_package_size); + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { + s_curr_wp.input[wp_i] = + std::vector(mpi_buffer.begin() + this->prop_count * wp_i, + mpi_buffer.begin() + this->prop_count * (wp_i + 1)); + } - { - std::uint32_t i = 0; - std::generate(vecMappingWP.begin(), vecMappingWP.end(), - [&] { return i++; }); + // std::cout << this->comm_rank << ":" << counter++ << std::endl; + if (dht_enabled || interp_enabled) { + dht->prepareKeys(s_curr_wp.input, dt); } if (dht_enabled) { /* check for values in DHT */ dht_get_start = MPI_Wtime(); - dht->checkDHT(local_work_package_size, dt, vecCurrWP, vecMappingWP); + dht->checkDHT(s_curr_wp); dht_get_end = MPI_Wtime(); + timings.dht_get += dht_get_end - dht_get_start; + } - // DHT_Results.ResultsToMapping(vecMappingWP); + if (interp_enabled) { + interp->tryInterpolation(s_curr_wp); } phreeqc_time_start = MPI_Wtime(); - if (WorkerRunWorkPackage(vecCurrWP, vecMappingWP, current_sim_time, dt) != - IRM_OK) { - throw std::runtime_error("Phreeqc threw an error!"); + if (WorkerRunWorkPackage(s_curr_wp, current_sim_time, dt) != IRM_OK) { + std::cerr << "Phreeqc error" << std::endl; }; phreeqc_time_end = MPI_Wtime(); - if (dht_enabled) { - dht->resultsToWP(vecCurrWP); + for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { + std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(), + mpi_buffer.begin() + this->prop_count * wp_i); } /* send results to master */ MPI_Request send_req; - MPI_Isend(vecCurrWP.data(), count, MPI_DOUBLE, 0, LOOP_WORK, MPI_COMM_WORLD, + MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, LOOP_WORK, MPI_COMM_WORLD, &send_req); - if (dht_enabled) { + if (dht_enabled || interp_enabled) { /* write results to DHT */ dht_fill_start = MPI_Wtime(); - dht->fillDHT(local_work_package_size, vecCurrWP); + dht->fillDHT(s_curr_wp); dht_fill_end = MPI_Wtime(); - timings.dht_get += dht_get_end - dht_get_start; + if (interp_enabled) { + interp->writePairs(); + } timings.dht_fill += dht_fill_end - dht_fill_start; } @@ -244,24 +214,48 @@ void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status, uint32_t iteration) { MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm, MPI_STATUS_IGNORE); - if (this->dht_enabled) { - this->dht->printStatistics(); - if (this->dht_snaps_type == DHT_FILES_ITEREND) { + if (this->dht_enabled) { + dht_hits.push_back(dht->getHits()); + dht_evictions.push_back(dht->getEvictions()); + dht->resetCounter(); + + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); } } + + if (this->interp_enabled) { + std::stringstream out; + interp_calls.push_back(interp->getInterpolationCount()); + interp->resetCounter(); + interp->writePHTStats(); + if (this->dht_snaps_type == DHT_SNAPS_ITEREND) { + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } + } + + RInsidePOET::getInstance().parseEvalQ("gc()"); } + void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) { - if (this->dht_enabled && this->dht_snaps_type == DHT_FILES_SIMEND) { + if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { WorkerWriteDHTDump(iteration); } + if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) { + std::stringstream out; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".pht"; + interp->dumpPHTState(out.str()); + } } void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { std::stringstream out; - out << this->dht_file_out_dir << "/iter_" << std::setfill('0') << std::setw(3) - << iteration << ".dht"; + out << this->dht_file_out_dir << "/iter_" << std::setfill('0') + << std::setw(this->file_pad) << iteration << ".dht"; int res = dht->tableToFile(out.str().c_str()); if (res != DHT_SUCCESS && this->comm_rank == 2) std::cerr @@ -270,6 +264,7 @@ void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) { std::cout << "CPP: Worker: Successfully written DHT to file " << out.str() << "\n"; } + void poet::ChemistryModule::WorkerReadDHTDump( const std::string &dht_input_file) { int res = dht->fileToTable((char *)dht_input_file.c_str()); @@ -291,36 +286,39 @@ void poet::ChemistryModule::WorkerReadDHTDump( } IRM_RESULT -poet::ChemistryModule::WorkerRunWorkPackage( - std::vector &vecWP, std::vector &vecMapping, - double dSimTime, double dTimestep) { - if ((this->wp_size * this->prop_count) != vecWP.size()) { - return IRM_INVALIDARG; - } - +poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package, + double dSimTime, double dTimestep) { // check if we actually need to start phreeqc - bool bRunPhreeqc = false; - for (const auto &aMappingNum : vecMapping) { - if (aMappingNum != -1) { - bRunPhreeqc = true; - break; + std::vector pqc_mapping; + + for (std::size_t i = 0; i < work_package.size; i++) { + if (work_package.mapping[i] == CHEM_PQC) { + pqc_mapping.push_back(i); } } - if (!bRunPhreeqc) { + if (pqc_mapping.empty()) { return IRM_OK; } IRM_RESULT result; - this->PhreeqcRM::setPOETMapping(vecMapping); - this->setDumpedField(vecWP); + this->PhreeqcRM::setPOETMapping(pqc_mapping); + this->setDumpedField(work_package.input); this->PhreeqcRM::SetTime(dSimTime); this->PhreeqcRM::SetTimeStep(dTimestep); result = this->PhreeqcRM::RunCells(); - this->getDumpedField(vecWP); + std::vector> output_tmp(work_package.size); + + this->getDumpedField(output_tmp); + + for (std::size_t i = 0; i < work_package.size; i++) { + if (work_package.mapping[i] == CHEM_PQC) { + work_package.output[i] = output_tmp[i]; + } + } return result; } @@ -348,6 +346,26 @@ void poet::ChemistryModule::WorkerPerfToMaster(int type, this->group_comm); break; } + case WORKER_IP_WRITE: { + double val = interp->getPHTWriteTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_READ: { + double val = interp->getPHTReadTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_GATHER: { + double val = interp->getDHTGatherTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } + case WORKER_IP_FC: { + double val = interp->getInterpolationTime(); + MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm); + break; + } default: { throw std::runtime_error("Unknown perf type in master's message."); } @@ -355,24 +373,46 @@ void poet::ChemistryModule::WorkerPerfToMaster(int type, } void poet::ChemistryModule::WorkerMetricsToMaster(int type) { - uint32_t value; + MPI_Comm worker_comm = dht->getCommunicator(); + int worker_rank; + MPI_Comm_rank(worker_comm, &worker_rank); + + MPI_Comm &group_comm = this->group_comm; + + auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm]( + std::vector &send_buffer, int tag) { + std::vector to_master(send_buffer.size()); + MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(), + MPI_UINT32_T, MPI_SUM, 0, worker_comm); + + if (worker_rank == 0) { + MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag, + group_comm); + } + }; + switch (type) { case WORKER_DHT_HITS: { - value = dht->getHits(); - break; - } - case WORKER_DHT_MISS: { - value = dht->getMisses(); + reduce_and_send(dht_hits, WORKER_DHT_HITS); break; } case WORKER_DHT_EVICTIONS: { - value = dht->getEvictions(); + reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS); break; } + case WORKER_IP_CALLS: { + reduce_and_send(interp_calls, WORKER_IP_CALLS); + return; + } + case WORKER_PHT_CACHE_HITS: { + std::vector input = this->interp->getPHTLocalCacheHits(); + reduce_and_send(input, WORKER_PHT_CACHE_HITS); + return; + } default: { throw std::runtime_error("Unknown perf type in master's message."); } } - MPI_Gather(&value, 1, MPI_UINT32_T, NULL, 1, MPI_UINT32_T, 0, - this->group_comm); } + +} // namespace poet diff --git a/src/Chemistry/enums.hpp b/src/Chemistry/enums.hpp new file mode 100644 index 000000000..11c3c1281 --- /dev/null +++ b/src/Chemistry/enums.hpp @@ -0,0 +1,10 @@ +#ifndef ENUMS_H_ +#define ENUMS_H_ + +namespace poet { +enum DHT_PROP_TYPES { DHT_TYPE_DEFAULT, DHT_TYPE_CHARGE, DHT_TYPE_TOTAL }; + +enum CHEMISTRY_OUT_SOURCE { CHEM_PQC, CHEM_DHT, CHEM_INTERP }; +} // namespace poet + +#endif // ENUMS_H_ diff --git a/src/ChemistryModule/ChemistryModule.cpp b/src/ChemistryModule/ChemistryModule.cpp deleted file mode 100644 index d3e6f7e45..000000000 --- a/src/ChemistryModule/ChemistryModule.cpp +++ /dev/null @@ -1,330 +0,0 @@ -#include "poet/ChemistryModule.hpp" -#include "PhreeqcRM.h" -#include "poet/DHT_Wrapper.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -poet::ChemistryModule::ChemistryModule(uint32_t nxyz, uint32_t wp_size, - MPI_Comm communicator) - : PhreeqcRM(nxyz, 1), group_comm(communicator), wp_size(wp_size) { - - MPI_Comm_size(communicator, &this->comm_size); - MPI_Comm_rank(communicator, &this->comm_rank); - - this->is_sequential = (this->comm_size == 1); - this->is_master = (this->comm_rank == 0); - - if (!is_sequential && is_master) { - MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, this->group_comm); - } -} - -poet::ChemistryModule::~ChemistryModule() { - if (dht) { - delete dht; - } -} - -poet::ChemistryModule -poet::ChemistryModule::createWorker(MPI_Comm communicator) { - uint32_t wp_size; - MPI_Bcast(&wp_size, 1, MPI_UINT32_T, 0, communicator); - - return ChemistryModule(wp_size, wp_size, communicator); -} - -void poet::ChemistryModule::RunInitFile(const std::string &input_script_path) { - if (this->is_master) { - int f_type = CHEM_INIT; - PropagateFunctionType(f_type); - - int count = input_script_path.size(); - ChemBCast(&count, 1, MPI_INT); - ChemBCast(const_cast(input_script_path.data()), count, MPI_CHAR); - } - - this->RunFile(true, true, false, input_script_path); - this->RunString(true, false, false, "DELETE; -all; PRINT; -warnings 0;"); - - this->FindComponents(); - - PhreeqcRM::initializePOET(this->speciesPerModule, this->prop_names); - this->prop_count = prop_names.size(); - - char exchange = (speciesPerModule[1] == 0 ? -1 : 1); - char kinetics = (speciesPerModule[2] == 0 ? -1 : 1); - char equilibrium = (speciesPerModule[3] == 0 ? -1 : 1); - char surface = (speciesPerModule[4] == 0 ? -1 : 1); - - std::vector ic1; - ic1.resize(this->nxyz * 7, -1); - // TODO: hardcoded reaction modules - for (int i = 0; i < nxyz; i++) { - ic1[i] = 1; // Solution 1 - ic1[nxyz + i] = equilibrium; // Equilibrium 1 - ic1[2 * nxyz + i] = exchange; // Exchange none - ic1[3 * nxyz + i] = surface; // Surface none - ic1[4 * nxyz + i] = -1; // Gas phase none - ic1[5 * nxyz + i] = -1; // Solid solutions none - ic1[6 * nxyz + i] = kinetics; // Kinetics 1 - } - - this->InitialPhreeqc2Module(ic1); -} - -void poet::ChemistryModule::initializeField(const Field &trans_field) { - - if (is_master) { - int f_type = CHEM_INIT_SPECIES; - PropagateFunctionType(f_type); - } - - std::vector essentials_backup{ - prop_names.begin() + speciesPerModule[0], prop_names.end()}; - - std::vector new_solution_names{ - this->prop_names.begin(), this->prop_names.begin() + speciesPerModule[0]}; - - if (is_master) { - for (auto &prop : trans_field.GetProps()) { - if (std::find(new_solution_names.begin(), new_solution_names.end(), - prop) == new_solution_names.end()) { - int size = prop.size(); - ChemBCast(&size, 1, MPI_INT); - ChemBCast(prop.data(), prop.size(), MPI_CHAR); - new_solution_names.push_back(prop); - } - } - int end = 0; - ChemBCast(&end, 1, MPI_INT); - } else { - constexpr int MAXSIZE = 128; - MPI_Status status; - int recv_size; - char recv_buffer[MAXSIZE]; - while (1) { - ChemBCast(&recv_size, 1, MPI_INT); - if (recv_size == 0) { - break; - } - ChemBCast(recv_buffer, recv_size, MPI_CHAR); - recv_buffer[recv_size] = '\0'; - new_solution_names.push_back(std::string(recv_buffer)); - } - } - - // now sort the new values - std::sort(new_solution_names.begin() + 4, new_solution_names.end()); - - // and append other processes than solutions - std::vector new_prop_names = new_solution_names; - new_prop_names.insert(new_prop_names.end(), essentials_backup.begin(), - essentials_backup.end()); - - std::vector old_prop_names{this->prop_names}; - this->prop_names = std::move(new_prop_names); - this->prop_count = prop_names.size(); - - this->SetPOETSolutionNames(new_solution_names); - - if (is_master) { - this->n_cells = trans_field.GetRequestedVecSize(); - chem_field = Field(n_cells); - - std::vector phreeqc_init; - this->getDumpedField(phreeqc_init); - - if (is_sequential) { - std::vector init_vec{phreeqc_init}; - this->unshuffleField(phreeqc_init, n_cells, prop_count, 1, init_vec); - chem_field.InitFromVec(init_vec, prop_names); - return; - } - - std::vector> initial_values; - - for (int i = 0; i < phreeqc_init.size(); i++) { - std::vector init(n_cells, phreeqc_init[i]); - initial_values.push_back(std::move(init)); - } - - chem_field.InitFromVec(initial_values, prop_names); - } -} - -void poet::ChemistryModule::SetDHTEnabled( - bool enable, uint32_t size_mb, - const std::vector &key_species) { - - constexpr uint32_t MB_FACTOR = 1E6; - std::vector key_inidices; - - if (this->is_master) { - int ftype = CHEM_DHT_ENABLE; - PropagateFunctionType(ftype); - ChemBCast(&enable, 1, MPI_CXX_BOOL); - ChemBCast(&size_mb, 1, MPI_UINT32_T); - - key_inidices = parseDHTSpeciesVec(key_species); - int vec_size = key_inidices.size(); - ChemBCast(&vec_size, 1, MPI_INT); - ChemBCast(key_inidices.data(), key_inidices.size(), MPI_UINT32_T); - } else { - int vec_size; - ChemBCast(&vec_size, 1, MPI_INT); - key_inidices.resize(vec_size); - ChemBCast(key_inidices.data(), vec_size, MPI_UINT32_T); - } - - this->dht_enabled = enable; - - if (enable) { - MPI_Comm dht_comm; - - if (this->is_master) { - MPI_Comm_split(this->group_comm, MPI_UNDEFINED, this->comm_rank, - &dht_comm); - return; - } - - MPI_Comm_split(this->group_comm, 1, this->comm_rank, &dht_comm); - - if (this->dht) { - delete this->dht; - } - - const uint32_t dht_size = size_mb * MB_FACTOR; - - this->dht = - new DHT_Wrapper(dht_comm, dht_size, key_inidices, this->prop_count); - // this->dht->setBaseTotals(this->base_totals); - } -} - -inline std::vector poet::ChemistryModule::parseDHTSpeciesVec( - const std::vector &species_vec) const { - std::vector species_indices; - - if (species_vec.empty()) { - species_indices.resize(this->prop_count); - - int i = 0; - std::generate(species_indices.begin(), species_indices.end(), - [&] { return i++; }); - return species_indices; - } - - species_indices.reserve(species_vec.size()); - - for (const auto &name : species_vec) { - auto it = std::find(this->prop_names.begin(), this->prop_names.end(), name); - if (it == prop_names.end()) { - throw std::runtime_error( - "DHT species name was not found in prop name vector!"); - } - const std::uint32_t index = it - prop_names.begin(); - species_indices.push_back(index); - } - - return species_indices; -} - -void poet::ChemistryModule::SetDHTSnaps(int type, const std::string &out_dir) { - if (this->is_master) { - int ftype = CHEM_DHT_SNAPS; - PropagateFunctionType(ftype); - - int str_size = out_dir.size(); - - ChemBCast(&type, 1, MPI_INT); - ChemBCast(&str_size, 1, MPI_INT); - ChemBCast(const_cast(out_dir.data()), str_size, MPI_CHAR); - } - - this->dht_snaps_type = type; - this->dht_file_out_dir = out_dir; -} - -void poet::ChemistryModule::SetDHTSignifVector( - std::vector &signif_vec) { - if (this->is_master) { - int ftype = CHEM_DHT_SIGNIF_VEC; - PropagateFunctionType(ftype); - - int data_count = signif_vec.size(); - ChemBCast(&data_count, 1, MPI_INT); - ChemBCast(signif_vec.data(), signif_vec.size(), MPI_UINT32_T); - - return; - } - - int data_count; - ChemBCast(&data_count, 1, MPI_INT); - signif_vec.resize(data_count); - ChemBCast(signif_vec.data(), data_count, MPI_UINT32_T); - - this->dht->SetSignifVector(signif_vec); -} - -void poet::ChemistryModule::ReadDHTFile(const std::string &input_file) { - if (this->is_master) { - int ftype = CHEM_DHT_READ_FILE; - PropagateFunctionType(ftype); - int str_size = input_file.size(); - - ChemBCast(&str_size, 1, MPI_INT); - ChemBCast(const_cast(input_file.data()), str_size, MPI_CHAR); - } - - if (!this->dht_enabled) { - throw std::runtime_error("DHT file cannot be read. DHT is not enabled."); - } - - if (!this->is_master) { - WorkerReadDHTDump(input_file); - } -} - -std::vector -poet::ChemistryModule::shuffleField(const std::vector &in_field, - uint32_t size_per_prop, uint32_t prop_count, - uint32_t wp_count) { - std::vector out_buffer(in_field.size()); - uint32_t write_i = 0; - for (uint32_t i = 0; i < wp_count; i++) { - for (uint32_t j = i; j < size_per_prop; j += wp_count) { - for (uint32_t k = 0; k < prop_count; k++) { - out_buffer[(write_i * prop_count) + k] = - in_field[(k * size_per_prop) + j]; - } - write_i++; - } - } - return out_buffer; -} - -void poet::ChemistryModule::unshuffleField(const std::vector &in_buffer, - uint32_t size_per_prop, - uint32_t prop_count, - uint32_t wp_count, - std::vector &out_field) { - uint32_t read_i = 0; - - for (uint32_t i = 0; i < wp_count; i++) { - for (uint32_t j = i; j < size_per_prop; j += wp_count) { - for (uint32_t k = 0; k < prop_count; k++) { - out_field[(k * size_per_prop) + j] = - in_buffer[(read_i * prop_count) + k]; - } - read_i++; - } - } -} diff --git a/src/ChemistryModule/DHT_Wrapper.cpp b/src/ChemistryModule/DHT_Wrapper.cpp deleted file mode 100644 index 48ccbbaf5..000000000 --- a/src/ChemistryModule/DHT_Wrapper.cpp +++ /dev/null @@ -1,215 +0,0 @@ -// Time-stamp: "Last modified 2023-07-21 17:22:00 mluebke" - -/* -** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of -** Potsdam) -** -** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam) -** -** POET is free software; you can redistribute it and/or modify it under the -** terms of the GNU General Public License as published by the Free Software -** Foundation; either version 2 of the License, or (at your option) any later -** version. -** -** POET is distributed in the hope that it will be useful, but WITHOUT ANY -** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -** A PARTICULAR PURPOSE. See the GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License along with -** this program; if not, write to the Free Software Foundation, Inc., 51 -** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -*/ - -#include "poet/DHT_Wrapper.hpp" -#include "poet/DHT_Types.hpp" -#include "poet/HashFunctions.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace poet; -using namespace std; - -inline DHT_SCNotation round_key_element(double value, std::uint32_t signif) { - std::int8_t exp = - static_cast(std::floor(std::log10(std::fabs(value)))); - std::int64_t significant = - static_cast(value * std::pow(10, signif - exp - 1)); - - DHT_SCNotation elem; - elem.exp = exp; - elem.significant = significant; - - return elem; -} - -DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size, - const std::vector &key_indices, - uint32_t data_count) - : key_count(key_indices.size()), data_count(data_count), - input_key_elements(key_indices) { - // initialize DHT object - uint32_t key_size = (key_count + 1) * sizeof(DHT_Keyelement); - uint32_t data_size = data_count * sizeof(double); - uint32_t buckets_per_process = dht_size / (1 + data_size + key_size); - dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, - &poet::Murmur2_64A); - - this->dht_signif_vector.resize(key_size, DHT_KEY_SIGNIF_DEFAULT); - - this->dht_prop_type_vector.resize(key_count, DHT_TYPE_DEFAULT); - this->dht_prop_type_vector[0] = DHT_TYPE_TOTAL; - this->dht_prop_type_vector[1] = DHT_TYPE_TOTAL; - this->dht_prop_type_vector[2] = DHT_TYPE_CHARGE; -} - -DHT_Wrapper::~DHT_Wrapper() { - // free DHT - DHT_free(dht_object, NULL, NULL); -} -auto DHT_Wrapper::checkDHT(int length, double dt, - const std::vector &work_package, - std::vector &curr_mapping) - -> const poet::DHT_ResultObject & { - - dht_results.length = length; - dht_results.keys.resize(length); - dht_results.results.resize(length); - dht_results.needPhreeqc.resize(length); - - std::vector new_mapping; - - // loop over every grid cell contained in work package - for (int i = 0; i < length; i++) { - // point to current grid cell - void *key = (void *)&(work_package[i * this->data_count]); - auto &data = dht_results.results[i]; - auto &key_vector = dht_results.keys[i]; - - data.resize(this->data_count); - key_vector = fuzzForDHT(this->key_count, key, dt); - - // overwrite input with data from DHT, IF value is found in DHT - int res = DHT_read(this->dht_object, key_vector.data(), data.data()); - - switch (res) { - case DHT_SUCCESS: - dht_results.needPhreeqc[i] = false; - this->dht_hits++; - break; - case DHT_READ_MISS: - dht_results.needPhreeqc[i] = true; - new_mapping.push_back(curr_mapping[i]); - this->dht_miss++; - break; - } - } - - curr_mapping = std::move(new_mapping); - - return dht_results; -} - -void DHT_Wrapper::fillDHT(int length, const std::vector &work_package) { - // loop over every grid cell contained in work package - for (int i = 0; i < length; i++) { - // If true grid cell was simulated, needs to be inserted into dht - if (dht_results.needPhreeqc[i]) { - const auto &key = dht_results.keys[i]; - void *data = (void *)&(work_package[i * this->data_count]); - // fuzz data (round, logarithm etc.) - - // insert simulated data with fuzzed key into DHT - int res = DHT_write(this->dht_object, (void *)key.data(), data); - - // if data was successfully written ... - if ((res != DHT_SUCCESS) && (res == DHT_WRITE_SUCCESS_WITH_EVICTION)) { - dht_evictions++; - } - } - } -} - -void DHT_Wrapper::resultsToWP(std::vector &work_package) { - for (int i = 0; i < dht_results.length; i++) { - if (!dht_results.needPhreeqc[i]) { - std::copy(dht_results.results[i].begin(), dht_results.results[i].end(), - work_package.begin() + (data_count * i)); - } - } -} - -int DHT_Wrapper::tableToFile(const char *filename) { - int res = DHT_to_file(dht_object, filename); - return res; -} - -int DHT_Wrapper::fileToTable(const char *filename) { - int res = DHT_from_file(dht_object, filename); - if (res != DHT_SUCCESS) - return res; - -#ifdef DHT_STATISTICS - DHT_print_statistics(dht_object); -#endif - - return DHT_SUCCESS; -} - -void DHT_Wrapper::printStatistics() { - int res; - - res = DHT_print_statistics(dht_object); - - if (res != DHT_SUCCESS) { - // MPI ERROR ... WHAT TO DO NOW? - // RUNNING CIRCLES WHILE SCREAMING - } -} - -std::vector DHT_Wrapper::fuzzForDHT(int var_count, void *key, - double dt) { - constexpr double zero_val = 10E-14; - - std::vector vecFuzz(var_count + 1); - std::memset(&vecFuzz[0], 0, sizeof(DHT_Keyelement) * var_count); - - int totals_i = 0; - // introduce fuzzing to allow more hits in DHT - // loop over every variable of grid cell - for (std::uint32_t i = 0; i < input_key_elements.size(); i++) { - double curr_key = ((double *)key)[input_key_elements[i]]; - if (curr_key != 0) { - if (curr_key < zero_val && - this->dht_prop_type_vector[i] == DHT_TYPE_DEFAULT) { - continue; - } - if (this->dht_prop_type_vector[i] == DHT_TYPE_TOTAL) { - curr_key -= base_totals[totals_i++]; - } - vecFuzz[i].sc_notation = - round_key_element(curr_key, dht_signif_vector[i]); - } - } - // if timestep differs over iterations set current current time step at the - // end of fuzzing buffer - vecFuzz[var_count].fp_elemet = dt; - - return vecFuzz; -} - -void poet::DHT_Wrapper::SetSignifVector(std::vector signif_vec) { - if (signif_vec.size() != this->key_count) { - throw std::runtime_error( - "Significant vector size mismatches count of key elements."); - } - - this->dht_signif_vector = signif_vec; -} diff --git a/include/poet/Field.hpp b/src/DataStructures/DataStructures.hpp similarity index 67% rename from include/poet/Field.hpp rename to src/DataStructures/DataStructures.hpp index cb4e46996..30f69e5dd 100644 --- a/include/poet/Field.hpp +++ b/src/DataStructures/DataStructures.hpp @@ -1,15 +1,54 @@ -#ifndef FIELD_H_ -#define FIELD_H_ +#ifndef DATASTRUCTURES_H_ +#define DATASTRUCTURES_H_ +#include "../Chemistry/enums.hpp" + +#include + +#include +#include #include -#include -#include +#include +#include #include -#include +#include #include namespace poet { +struct WorkPackage { + std::size_t size; + std::vector> input; + std::vector> output; + std::vector mapping; + + WorkPackage(size_t _size) : size(_size) { + input.resize(size); + output.resize(size); + mapping.resize(size, CHEM_PQC); + } +}; + +template class NamedVector : public Rcpp::NumericVector { +public: + NamedVector() : Rcpp::NumericVector(){}; + + NamedVector(const std::vector &in_names, + const std::vector &in_values) + : Rcpp::NumericVector(Rcpp::wrap(in_values)) { + this->names() = Rcpp::CharacterVector(Rcpp::wrap(in_names)); + } + + NamedVector(const SEXP &s) : Rcpp::NumericVector(s){}; + + bool empty() const { return (this->size() == 0); } + + std::vector getNames() const { + return Rcpp::as>(this->names()); + } + std::vector getValues() const { return Rcpp::as>(*this); } +}; + using FieldColumn = std::vector; /** @@ -23,18 +62,21 @@ using FieldColumn = std::vector; */ class Field : private std::unordered_map { public: + Field(){}; + /** * Creates a new instance of a field with fixed expected vector size. * - * \param vec_s expected vector size of each component/column. \ + * \param vec_s expected vector size of each component/column. */ - Field(uint32_t vec_s) : req_vec_size(vec_s){}; + Field(std::uint32_t vec_s) : req_vec_size(vec_s){}; /** * Initializes instance with a 2D vector and according names for each columnn. * There is no check if names were given multiple times. The order of name * vector also defines the ordering of the output. * + * \param vec_s expected vector size of each component/column. * \param input 2D vector using STL semantic describing the current state of * the field. * \param prop_vec Name of each vector in the input. Shall match @@ -43,14 +85,15 @@ public: * \exception std::runtime_error If prop_vec size doesn't match input vector * size or column vectors size doesn't match expected vector size. */ - void InitFromVec(const std::vector> &input, - const std::vector &prop_vec); + Field(std::uint32_t vec_s, const std::vector> &input, + const std::vector &prop_vec, bool row_major = false); /** * Initializes instance with a 1D continious memory vector and according names * for each columnn. There is no check if names were given multiple times. The * order of name vector also defines the ordering of the output. * + * \param vec_s expected vector size of each component/column. * \param input 1D vector using STL semantic describing the current state of * the field storing each column starting at index *i times requested vector * size*. @@ -60,12 +103,50 @@ public: * \exception std::runtime_error If prop_vec size doesn't match input vector * size or column vectors size doesn't match expected vector size. */ - void InitFromVec(const std::vector &input, - const std::vector &prop_vec); + Field(std::uint32_t vec_s, const std::vector &input, + const std::vector &prop_vec); + + Field(const SEXP &s_init) { fromSEXP(s_init); } Field &operator=(const Field &rhs) { this->req_vec_size = rhs.req_vec_size; this->props = rhs.props; + + this->clear(); + + for (const auto &pair : rhs) { + this->insert(pair); + } + return *this; + } + + /** + * Read in a (previously exported) 1D vector. It has to have the same + * dimensions as the current column count times the requested vector size of + * this instance. Import order is given by the species name vector. + * + * \param cont_field 1D field as vector. + * + * \exception std::runtime_error Input vector does not match the expected + * size. + */ + Field &operator=(const FieldColumn &cont_field); + + /** + * Read in a (previously exported) 2D vector. It has to have the same + * dimensions as the current column count of this instance and each vector + * must have the size of the requested vector size. Import order is given by + * the species name vector. + * + * \param cont_field 2D field as vector. + * + * \exception std::runtime_error Input vector has more or less elements than + * the instance or a column vector does not match expected vector size. + */ + Field &operator=(const std::vector &cont_field); + + Field &operator=(const SEXP &s_rhs) { + fromSEXP(s_rhs); return *this; } @@ -103,7 +184,7 @@ public: * * \param input Field to update the current instance's columns. */ - void UpdateFromField(const Field &input); + void update(const Field &input); /** * Builds a new 1D vector with the current state of the instance. The output @@ -124,36 +205,17 @@ public: */ auto As2DVector() const -> std::vector; - /** - * Read in a (previously exported) 1D vector. It has to have the same - * dimensions as the current column count times the requested vector size of - * this instance. Import order is given by the species name vector. - * - * \param cont_field 1D field as vector. - * - * \exception std::runtime_error Input vector does not match the expected - * size. - */ - void SetFromVector(const FieldColumn &cont_field); + SEXP asSEXP() const; - /** - * Read in a (previously exported) 2D vector. It has to have the same - * dimensions as the current column count of this instance and each vector - * must have the size of the requested vector size. Import order is given by - * the species name vector. - * - * \param cont_field 2D field as vector. - * - * \exception std::runtime_error Input vector has more or less elements than - * the instance or a column vector does not match expected vector size. - */ - void SetFromVector(const std::vector &cont_field); + std::size_t cols() const { return this->props.size(); } private: - std::uint32_t req_vec_size; + std::uint32_t req_vec_size{0}; - std::vector props; + std::vector props{{}}; + + void fromSEXP(const SEXP &s_rhs); }; } // namespace poet -#endif // FIELD_H_ +#endif // DATASTRUCTURES_H_ diff --git a/src/DataStructures/Field.cpp b/src/DataStructures/Field.cpp index 13b143c1e..71d51dea4 100644 --- a/src/DataStructures/Field.cpp +++ b/src/DataStructures/Field.cpp @@ -1,31 +1,54 @@ -#include "poet/Field.hpp" +#include "DataStructures.hpp" + +#include +#include +#include #include #include #include #include #include -void poet::Field::InitFromVec(const std::vector> &input, - const std::vector &prop_vec) { - if (prop_vec.size() != input.size()) { - throw std::runtime_error("Prop vector shall name all elements."); - } - - auto name_it = prop_vec.begin(); - - for (const auto &in_vec : input) { - if (in_vec.size() != req_vec_size) { - throw std::runtime_error( - "Input vector doesn't match expected vector size."); +poet::Field::Field(std::uint32_t vec_s, + const std::vector> &input, + const std::vector &prop_vec, bool row_major) + : req_vec_size(vec_s), props(prop_vec) { + if (row_major) { + if (this->props.size() != input[0].size()) { + throw std::runtime_error("Prop vector shall name all elements."); } - this->insert({*(name_it++), in_vec}); - } - this->props = prop_vec; + const std::size_t n_input = input.size(); + + for (std::size_t i = 0; i < this->props.size(); i++) { + std::vector curr_col(n_input); + + for (std::size_t j = 0; j < n_input; j++) { + curr_col[j] = input[j][i]; + } + + this->insert({this->props[i], std::move(curr_col)}); + } + } else { + if (this->props.size() != input.size()) { + throw std::runtime_error("Prop vector shall name all elements."); + } + + auto name_it = this->props.begin(); + + for (const auto &in_vec : input) { + if (in_vec.size() != req_vec_size) { + throw std::runtime_error( + "Input vector doesn't match expected vector size."); + } + this->insert({*(name_it++), in_vec}); + } + } } -void poet::Field::InitFromVec(const std::vector &input, - const std::vector &prop_vec) { +poet::Field::Field(std::uint32_t vec_s, const std::vector &input, + const std::vector &prop_vec) + : Field(vec_s) { const uint32_t expected_size = prop_vec.size() * req_vec_size; if (expected_size != input.size()) { @@ -62,47 +85,7 @@ auto poet::Field::AsVector() const -> poet::FieldColumn { return output; } -void poet::Field::SetFromVector(const poet::FieldColumn &cont_field) { - if (cont_field.size() != this->size() * req_vec_size) { - throw std::runtime_error( - "Field::SetFromVector: vector does not match expected size"); - } - - uint32_t vec_p = 0; - for (const auto &elem : props) { - const auto start = cont_field.begin() + vec_p; - const auto end = start + req_vec_size; - - const auto map_it = this->find(elem); - - map_it->second = FieldColumn(start, end); - - vec_p += req_vec_size; - } -} - -void poet::Field::SetFromVector( - const std::vector &cont_field) { - if (cont_field.size() != this->size()) { - throw std::runtime_error( - "Input field contains more or less elements than this container."); - } - - auto in_field_it = cont_field.begin(); - - for (const auto &elem : props) { - if (in_field_it->size() != req_vec_size) { - throw std::runtime_error( - "One vector contains more or less elements than expected."); - } - - const auto map_it = this->find(elem); - - map_it->second = *(in_field_it++); - } -} - -void poet::Field::UpdateFromField(const poet::Field &input) { +void poet::Field::update(const poet::Field &input) { for (const auto &input_elem : input) { auto it_self = this->find(input_elem.first); if (it_self == this->end()) { @@ -132,3 +115,118 @@ poet::FieldColumn &poet::Field::operator[](const std::string &key) { return std::unordered_map::operator[](key); } + +SEXP poet::Field::asSEXP() const { + const std::size_t cols = this->props.size(); + + SEXP s_names = PROTECT(Rf_allocVector(STRSXP, cols)); + SEXP s_output = PROTECT(Rf_allocVector(VECSXP, cols)); + + for (std::size_t prop_i = 0; prop_i < this->props.size(); prop_i++) { + const auto &name = this->props[prop_i]; + SEXP s_values = PROTECT(Rf_allocVector(REALSXP, this->req_vec_size)); + const auto values = this->find(name)->second; + + SET_STRING_ELT(s_names, prop_i, Rf_mkChar(name.c_str())); + + for (std::size_t i = 0; i < this->req_vec_size; i++) { + REAL(s_values)[i] = values[i]; + } + + SET_VECTOR_ELT(s_output, prop_i, s_values); + + UNPROTECT(1); + } + + SEXP s_rownames = PROTECT(Rf_allocVector(INTSXP, this->req_vec_size)); + for (std::size_t i = 0; i < this->req_vec_size; i++) { + INTEGER(s_rownames)[i] = static_cast(i + 1); + } + + Rf_setAttrib(s_output, R_ClassSymbol, + Rf_ScalarString(Rf_mkChar("data.frame"))); + Rf_setAttrib(s_output, R_NamesSymbol, s_names); + Rf_setAttrib(s_output, R_RowNamesSymbol, s_rownames); + + UNPROTECT(3); + + return s_output; +} + +poet::Field &poet::Field::operator=(const FieldColumn &cont_field) { + if (cont_field.size() != this->size() * req_vec_size) { + throw std::runtime_error( + "Field::SetFromVector: vector does not match expected size"); + } + + uint32_t vec_p = 0; + for (const auto &elem : props) { + const auto start = cont_field.begin() + vec_p; + const auto end = start + req_vec_size; + + const auto map_it = this->find(elem); + + map_it->second = FieldColumn(start, end); + + vec_p += req_vec_size; + } + + return *this; +} + +poet::Field & +poet::Field::operator=(const std::vector &cont_field) { + if (cont_field.size() != this->size()) { + throw std::runtime_error( + "Input field contains more or less elements than this container."); + } + + auto in_field_it = cont_field.begin(); + + for (const auto &elem : props) { + if (in_field_it->size() != req_vec_size) { + throw std::runtime_error( + "One vector contains more or less elements than expected."); + } + + const auto map_it = this->find(elem); + + map_it->second = *(in_field_it++); + } + return *this; +} + +void poet::Field::fromSEXP(const SEXP &s_rhs) { + this->clear(); + + SEXP s_vec = PROTECT(Rf_coerceVector(s_rhs, VECSXP)); + SEXP s_names = PROTECT(Rf_getAttrib(s_vec, R_NamesSymbol)); + + std::size_t cols = static_cast(Rf_length(s_vec)); + + this->props.clear(); + this->props.reserve(cols); + + for (std::size_t i = 0; i < cols; i++) { + const std::string prop_name(CHAR(STRING_ELT(s_names, i))); + this->props.push_back(prop_name); + + SEXP s_values = PROTECT(VECTOR_ELT(s_vec, i)); + + if (i == 0) { + this->req_vec_size = static_cast(Rf_length(s_values)); + } + + FieldColumn input(this->req_vec_size); + + for (std::size_t j = 0; j < this->req_vec_size; j++) { + input[j] = static_cast(REAL(s_values)[j]); + } + + UNPROTECT(1); + + this->insert({prop_name, input}); + } + + UNPROTECT(2); +} diff --git a/src/DiffusionModule.cpp b/src/Transport/DiffusionModule.cpp similarity index 90% rename from src/DiffusionModule.cpp rename to src/Transport/DiffusionModule.cpp index 609d9cf91..abbecce95 100644 --- a/src/DiffusionModule.cpp +++ b/src/Transport/DiffusionModule.cpp @@ -18,14 +18,15 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/SimParams.hpp" -#include "tug/BoundaryCondition.hpp" -#include "tug/Diffusion.hpp" +#include "DiffusionModule.hpp" +#include "../Base/Macros.hpp" + +#include +#include + #include #include #include -#include -#include #include #include @@ -57,17 +58,18 @@ inline const char *convert_bc_to_config_file(uint8_t in) { DiffusionModule::DiffusionModule(const poet::DiffusionParams &diffu_args, const poet::GridParams &grid_params) - : t_field{grid_params.total_n}, n_cells_per_prop(grid_params.total_n) { + : n_cells_per_prop(grid_params.total_n) { this->diff_input.setGridCellN(grid_params.n_cells[0], grid_params.n_cells[1]); this->diff_input.setDomainSize(grid_params.s_cells[0], grid_params.s_cells[1]); this->dim = grid_params.dim; - this->initialize(diffu_args); + this->initialize(diffu_args, grid_params.total_n); } -void DiffusionModule::initialize(const poet::DiffusionParams &args) { +void DiffusionModule::initialize(const poet::DiffusionParams &args, + std::uint32_t n_grid_cells) { // const poet::DiffusionParams args(this->R); // name of props @@ -86,7 +88,7 @@ void DiffusionModule::initialize(const poet::DiffusionParams &args) { this->alpha.push_back(args.alpha[this->prop_names[i]]); const double val = args.initial_t[prop_names[i]]; - std::vector init_val(t_field.GetRequestedVecSize(), val); + std::vector init_val(n_grid_cells, val); initial_values.push_back(std::move(init_val)); if (this->dim == this->DIM_2D) { @@ -99,7 +101,7 @@ void DiffusionModule::initialize(const poet::DiffusionParams &args) { } } - t_field.InitFromVec(initial_values, prop_names); + this->t_field = Field(n_grid_cells, initial_values, prop_names); // apply boundary conditions to each ghost node uint8_t bc_size = (this->dim == this->DIM_1D ? 2 : 4); @@ -153,8 +155,8 @@ void DiffusionModule::simulate(double dt) { sim_b_transport = MPI_Wtime(); - std::cout << "DiffusionModule::simulate(): Starting diffusion ..." - << std::flush; + MSG_NOENDL("DiffusionModule::simulate(): Starting diffusion ..."); + std::cout << std::flush; std::vector> field_2d = t_field.As2DVector(); @@ -173,13 +175,13 @@ void DiffusionModule::simulate(double dt) { } } - t_field.SetFromVector(field_2d); - - std::cout << " done!\n"; + t_field = field_2d; sim_a_transport = MPI_Wtime(); transport_t += sim_a_transport - sim_b_transport; + std::cout << " done in " << sim_a_transport - sim_b_transport << "sec" + << std::endl; } void DiffusionModule::end() { diff --git a/include/poet/DiffusionModule.hpp b/src/Transport/DiffusionModule.hpp similarity index 92% rename from include/poet/DiffusionModule.hpp rename to src/Transport/DiffusionModule.hpp index e9b9c61f5..a31bc0738 100644 --- a/include/poet/DiffusionModule.hpp +++ b/src/Transport/DiffusionModule.hpp @@ -21,16 +21,17 @@ #ifndef DIFFUSION_MODULE_H #define DIFFUSION_MODULE_H -#include "Field.hpp" -#include "SimParams.hpp" -#include "poet/SimParams.hpp" +#include "../Base/Grid.hpp" +#include "../Base/SimParams.hpp" +#include "../DataStructures/DataStructures.hpp" + +#include +#include + #include #include #include -#include #include -#include -#include #include namespace poet { @@ -94,7 +95,8 @@ private: enum { DIM_1D = 1, DIM_2D }; - void initialize(const poet::DiffusionParams &args); + void initialize(const poet::DiffusionParams &args, + std::uint32_t n_grid_cells); uint8_t dim; diff --git a/app/poet.cpp b/src/poet.cpp similarity index 72% rename from app/poet.cpp rename to src/poet.cpp index 93f301c4b..10c706a0d 100644 --- a/app/poet.cpp +++ b/src/poet.cpp @@ -18,14 +18,18 @@ ** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include "poet/ChemistryModule.hpp" +#include "Base/Grid.hpp" +#include "Base/Macros.hpp" +#include "Base/RInsidePOET.hpp" +#include "Base/SimParams.hpp" +#include "Chemistry/ChemistryModule.hpp" +#include "Transport/DiffusionModule.hpp" + +#include + #include #include #include -#include -#include -#include -#include #include #include @@ -33,7 +37,6 @@ #include #include -#include using namespace std; using namespace poet; @@ -70,7 +73,7 @@ void writeFieldsToR(RInside &R, const Field &trans, const Field &chem) { void set_chem_parameters(poet::ChemistryModule &chem, uint32_t wp_size, const std::string &database_path) { chem.SetErrorHandlerMode(1); - chem.SetComponentH2O(true); + chem.SetComponentH2O(false); chem.SetRebalanceFraction(0.5); chem.SetRebalanceByCell(true); chem.UseSolutionDensityVolume(false); @@ -112,7 +115,6 @@ void set_chem_parameters(poet::ChemistryModule &chem, uint32_t wp_size, } inline double RunMasterLoop(SimParams ¶ms, RInside &R, - ChemistryParams &chem_params, const GridParams &g_params, uint32_t nxyz_master) { DiffusionParams d_params{R}; @@ -123,10 +125,11 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, double sim_time = .0; - ChemistryModule chem(nxyz_master, params.getNumParams().wp_size, - MPI_COMM_WORLD); - set_chem_parameters(chem, nxyz_master, chem_params.database_path); - chem.RunInitFile(chem_params.input_script); + ChemistryModule chem(nxyz_master, params.getNumParams().wp_size, maxiter, + params.getChemParams(), MPI_COMM_WORLD); + + set_chem_parameters(chem, nxyz_master, params.getChemParams().database_path); + chem.RunInitFile(params.getChemParams().input_script); poet::ChemistryModule::SingleCMap init_df = DFToHashMap(d_params.initial_t); chem.initializeField(diffusion.getField()); @@ -135,52 +138,40 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, chem.setProgressBarPrintout(true); } - if (params.getNumParams().dht_enabled) { - chem.SetDHTEnabled(true, params.getNumParams().dht_size_per_process, - chem_params.dht_species); - if (!chem_params.dht_signif.empty()) { - chem.SetDHTSignifVector(chem_params.dht_signif); - } - if (!params.getDHTFile().empty()) { - chem.ReadDHTFile(params.getDHTFile()); - } - if (params.getNumParams().dht_snaps > 0) { - chem.SetDHTSnaps(params.getNumParams().dht_snaps, params.getOutDir()); - } - } - /* SIMULATION LOOP */ - double dStartTime = MPI_Wtime(); + double dSimTime{0}; for (uint32_t iter = 1; iter < maxiter + 1; iter++) { + double start_t = MPI_Wtime(); uint32_t tick = 0; // cout << "CPP: Evaluating next time step" << endl; // R.parseEvalQ("mysetup <- master_iteration_setup(mysetup)"); double dt = Rcpp::as( R.parseEval("mysetup$timesteps[" + std::to_string(iter) + "]")); - cout << "CPP: Next time step is " << dt << "[s]" << endl; + + // cout << "CPP: Next time step is " << dt << "[s]" << endl; + MSG("Next time step is " + std::to_string(dt) + " [s]"); /* displaying iteration number, with C++ and R iterator */ - cout << "CPP: Going through iteration " << iter << endl; - cout << "CPP: R's $iter: " << ((uint32_t)(R.parseEval("mysetup$iter"))) - << ". Iteration" << endl; + MSG("Going through iteration " + std::to_string(iter)); + MSG("R's $iter: " + + std::to_string((uint32_t)(R.parseEval("mysetup$iter"))) + + ". Iteration"); /* run transport */ // TODO: transport to diffusion diffusion.simulate(dt); - chem.getField().UpdateFromField(diffusion.getField()); + chem.getField().update(diffusion.getField()); - cout << "CPP: Chemistry" << endl; - - chem.SetTimeStep(dt); + MSG("Chemistry step"); chem.SetTimeStep(dt); chem.RunCells(); writeFieldsToR(R, diffusion.getField(), chem.GetField()); - diffusion.getField().UpdateFromField(chem.GetField()); + diffusion.getField().update(chem.GetField()); R["req_dt"] = dt; R["simtime"] = (sim_time += dt); @@ -194,13 +185,13 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, // store_result is TRUE) R.parseEvalQ("mysetup <- master_iteration_end(setup=mysetup)"); - cout << endl - << "CPP: End of *coupling* iteration " << iter << "/" << maxiter - << endl - << endl; + MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + + std::to_string(maxiter)); + MSG(); // MPI_Barrier(MPI_COMM_WORLD); - + double end_t = MPI_Wtime(); + dSimTime += end_t - start_t; } // END SIMULATION LOOP R.parseEvalQ("profiling <- list()"); @@ -229,11 +220,9 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, // R["phreeqc_count"] = phreeqc_counts; // R.parseEvalQ("profiling$phreeqc_count <- phreeqc_count"); - if (params.getNumParams().dht_enabled) { + if (params.getChemParams().use_dht) { R["dht_hits"] = Rcpp::wrap(chem.GetWorkerDHTHits()); R.parseEvalQ("profiling$dht_hits <- dht_hits"); - R["dht_miss"] = Rcpp::wrap(chem.GetWorkerDHTMiss()); - R.parseEvalQ("profiling$dht_miss <- dht_miss"); R["dht_evictions"] = Rcpp::wrap(chem.GetWorkerDHTEvictions()); R.parseEvalQ("profiling$dht_evictions <- dht_evictions"); R["dht_get_time"] = Rcpp::wrap(chem.GetWorkerDHTGetTimings()); @@ -241,11 +230,26 @@ inline double RunMasterLoop(SimParams ¶ms, RInside &R, R["dht_fill_time"] = Rcpp::wrap(chem.GetWorkerDHTFillTimings()); R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time"); } + if (params.getChemParams().use_interp) { + R["interp_w"] = Rcpp::wrap(chem.GetWorkerInterpolationWriteTimings()); + R.parseEvalQ("profiling$interp_write <- interp_w"); + R["interp_r"] = Rcpp::wrap(chem.GetWorkerInterpolationReadTimings()); + R.parseEvalQ("profiling$interp_read <- interp_r"); + R["interp_g"] = Rcpp::wrap(chem.GetWorkerInterpolationGatherTimings()); + R.parseEvalQ("profiling$interp_gather <- interp_g"); + R["interp_fc"] = + Rcpp::wrap(chem.GetWorkerInterpolationFunctionCallTimings()); + R.parseEvalQ("profiling$interp_function_calls <- interp_fc"); + R["interp_calls"] = Rcpp::wrap(chem.GetWorkerInterpolationCalls()); + R.parseEvalQ("profiling$interp_calls <- interp_calls"); + R["interp_cached"] = Rcpp::wrap(chem.GetWorkerPHTCacheHits()); + R.parseEvalQ("profiling$interp_cached <- interp_cached"); + } chem.MasterLoopBreak(); diffusion.end(); - return MPI_Wtime() - dStartTime; + return dSimTime; } int main(int argc, char *argv[]) { @@ -259,42 +263,46 @@ int main(int argc, char *argv[]) { MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + RInsidePOET &R = RInsidePOET::getInstance(); + if (world_rank == 0) { - cout << "Running POET in version " << poet_version << endl << endl; + MSG("Running POET version " + std::string(poet_version)); } if (world_rank > 0) { { - uint32_t c_size; - MPI_Bcast(&c_size, 1, MPI_UINT32_T, 0, MPI_COMM_WORLD); + SimParams params(world_rank, world_size); + int pret = params.parseFromCmdl(argv, R); - char *buffer = new char[c_size + 1]; - MPI_Bcast(buffer, c_size, MPI_CHAR, 0, MPI_COMM_WORLD); - buffer[c_size] = '\0'; + if (pret == poet::PARSER_ERROR) { + MPI_Finalize(); + return EXIT_FAILURE; + } else if (pret == poet::PARSER_HELP) { + MPI_Finalize(); + return EXIT_SUCCESS; + } // ChemistryModule worker(nxyz, nxyz, MPI_COMM_WORLD); - ChemistryModule worker = - poet::ChemistryModule::createWorker(MPI_COMM_WORLD); - set_chem_parameters(worker, worker.GetWPSize(), std::string(buffer)); - - delete[] buffer; + ChemistryModule worker = poet::ChemistryModule::createWorker( + MPI_COMM_WORLD, params.getChemParams()); + set_chem_parameters(worker, worker.GetWPSize(), + params.getChemParams().database_path); worker.WorkerLoop(); } MPI_Barrier(MPI_COMM_WORLD); - cout << "CPP: finished, cleanup of process " << world_rank << endl; + + MSG("finished, cleanup of process " + std::to_string(world_rank)); + MPI_Finalize(); return EXIT_SUCCESS; } - RInsidePOET &R = RInsidePOET::getInstance(); - /*Loading Dependencies*/ // TODO: kann raus - std::string r_load_dependencies = "source('../R_lib/kin_r_library.R');"; - R.parseEvalQ(r_load_dependencies); + R.parseEvalQ(kin_r_library); SimParams params(world_rank, world_size); int pret = params.parseFromCmdl(argv, R); @@ -307,7 +315,7 @@ int main(int argc, char *argv[]) { return EXIT_SUCCESS; } - cout << "CPP: R Init (RInside) on process " << world_rank << endl; + MSG("RInside initialized on process " + std::to_string(world_rank)); R.parseEvalQ("mysetup <- setup"); // if (world_rank == 0) { // get timestep vector from @@ -321,31 +329,23 @@ int main(int argc, char *argv[]) { // MDL: store all parameters if (world_rank == 0) { - cout << "CPP: Calling R Function to store calling parameters" << endl; + MSG("Calling R Function to store calling parameters"); R.parseEvalQ("StoreSetup(setup=mysetup)"); } if (world_rank == 0) { - cout << "CPP: Init done on process with rank " << world_rank << endl; + MSG("Init done on process with rank " + std::to_string(world_rank)); } // MPI_Barrier(MPI_COMM_WORLD); - poet::ChemistryParams chem_params(R); - - /* THIS IS EXECUTED BY THE MASTER */ - std::string db_path = chem_params.database_path; - uint32_t c_size = db_path.size(); - MPI_Bcast(&c_size, 1, MPI_UINT32_T, 0, MPI_COMM_WORLD); - - MPI_Bcast(db_path.data(), c_size, MPI_CHAR, 0, MPI_COMM_WORLD); uint32_t nxyz_master = (world_size == 1 ? g_params.total_n : 1); - dSimTime = RunMasterLoop(params, R, chem_params, g_params, nxyz_master); + dSimTime = RunMasterLoop(params, R, g_params, nxyz_master); - cout << "CPP: finished simulation loop" << endl; + MSG("finished simulation loop"); - cout << "CPP: start timing profiling" << endl; + MSG("start timing profiling"); R["simtime"] = dSimTime; R.parseEvalQ("profiling$simtime <- simtime"); @@ -354,16 +354,16 @@ int main(int argc, char *argv[]) { r_vis_code = "saveRDS(profiling, file=paste0(fileout,'/timings.rds'));"; R.parseEval(r_vis_code); - cout << "CPP: Done! Results are stored as R objects into <" - << params.getOutDir() << "/timings.rds>" << endl; + MSG("Done! Results are stored as R objects into <" + params.getOutDir() + + "/timings.rds>"); MPI_Barrier(MPI_COMM_WORLD); - cout << "CPP: finished, cleanup of process " << world_rank << endl; + MSG("finished, cleanup of process " + std::to_string(world_rank)); MPI_Finalize(); if (world_rank == 0) { - cout << "CPP: done, bye!" << endl; + MSG("done, bye!"); } exit(0); diff --git a/src/poet.hpp.in b/src/poet.hpp.in new file mode 100644 index 000000000..666fbaa08 --- /dev/null +++ b/src/poet.hpp.in @@ -0,0 +1,10 @@ +#ifndef POET_H +#define POET_H + +#include +static const char *poet_version = "@POET_VERSION@"; + +// using the Raw string literal to avoid escaping the quotes +static inline std::string kin_r_library = R"(@R_KIN_LIB@)"; + +#endif // POET_H diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9d3a5ac43..884750735 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,7 +3,12 @@ file(GLOB test_SRC "*.cpp" "*.c") add_executable(testPOET ${test_SRC}) -target_link_libraries(testPOET doctest poet_lib) +target_link_libraries(testPOET doctest poetlib) +target_include_directories(testPOET PRIVATE "${PROJECT_SOURCE_DIR}/src") + +get_filename_component(TEST_RInsideSourceFile "RInsidePOET_funcs.R" REALPATH) +configure_file(testDataStructures.hpp.in testDataStructures.hpp) +target_include_directories(testPOET PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") add_custom_target(check COMMAND $ diff --git a/test/RInsidePOET_funcs.R b/test/RInsidePOET_funcs.R new file mode 100644 index 000000000..668b05169 --- /dev/null +++ b/test/RInsidePOET_funcs.R @@ -0,0 +1,22 @@ +simple_named_vec <- function(input) { + input["Ca"] <- 0 + + return(input) +} + +extended_named_vec <- function(input, additional) { + return(input["H"] + additional["H"]) +} + +bool_named_vec <- function(input) { + return(input["H"] == 0) +} + +simple_field <- function(field) { + field$Na <- 0 + return(field) +} + +extended_field <- function(field, additional) { + return(field + additional) +} diff --git a/test/testDataStructures.hpp.in b/test/testDataStructures.hpp.in new file mode 100644 index 000000000..c1d10271a --- /dev/null +++ b/test/testDataStructures.hpp.in @@ -0,0 +1,6 @@ +#ifndef TESTRINSIDEPOET_H_ +#define TESTRINSIDEPOET_H_ + +inline const char *RInside_source_file = "@TEST_RInsideSourceFile@"; + +#endif // TESTRINSIDEPOET_H_ diff --git a/test/testDataStructures.cpp b/test/testField.cpp similarity index 71% rename from test/testDataStructures.cpp rename to test/testField.cpp index c3046f275..acfe8d1a8 100644 --- a/test/testDataStructures.cpp +++ b/test/testField.cpp @@ -1,12 +1,18 @@ +#include #include #include +#include #include #include #include -#include #include #include +#include +#include + +#include "testDataStructures.hpp" + using namespace poet; #define CHECK_AND_FAIL_LOOP(val1, val2) \ @@ -19,14 +25,17 @@ TEST_CASE("Field") { constexpr uint32_t VEC_COUNT = 3; constexpr double INIT_VAL = 1; - Field dut(VEC_SIZE); - std::vector names = {"C", "Ca", "Na"}; std::vector init_values(names.size(), FieldColumn(VEC_SIZE, INIT_VAL)); + RInsidePOET &R = RInsidePOET::getInstance(); + + R["sourcefile"] = RInside_source_file; + R.parseEval("source(sourcefile)"); + SUBCASE("Initialize field with 2D vector") { - dut.InitFromVec(init_values, names); + Field dut(VEC_SIZE, init_values, names); auto props = dut.GetProps(); @@ -43,7 +52,7 @@ TEST_CASE("Field") { SUBCASE("Initialize field with 2D vector") { std::vector init_values(VEC_SIZE * VEC_COUNT, 1); - dut.InitFromVec(init_values, names); + Field dut(VEC_SIZE, init_values, names); auto props = dut.GetProps(); @@ -58,7 +67,7 @@ TEST_CASE("Field") { } } - dut.InitFromVec(init_values, names); + Field dut(VEC_SIZE, init_values, names); SUBCASE("Return vector") { std::vector output = dut.AsVector(); @@ -66,6 +75,37 @@ TEST_CASE("Field") { CHECK(output.size() == VEC_SIZE * VEC_COUNT); } + SUBCASE("SEXP return") { + R["test"] = dut.asSEXP(); + + Field field_constructed = R.parseEval("test"); + + Rcpp::DataFrame R_df = R.parseEval("test"); + Rcpp::CharacterVector R_names = R_df.names(); + + for (std::size_t i = 0; i < VEC_COUNT; i++) { + const auto r_values = Rcpp::as>(R_df[i]); + CHECK_EQ(r_values, dut[names[i]]); + CHECK_EQ(R_names[i], names[i]); + CHECK_EQ(field_constructed[names[i]], dut[names[i]]); + } + } + + SUBCASE("Apply R function (set Na to zero)") { + RHookFunction to_call(R, "simple_field"); + Field field_proc = to_call(dut.asSEXP()); + + CHECK_EQ(field_proc["Na"], FieldColumn(dut.GetRequestedVecSize(), 0)); + } + + SUBCASE("Apply R function (add two fields)") { + RHookFunction to_call(R, "extended_field"); + Field field_proc = to_call(dut.asSEXP(), dut.asSEXP()); + + CHECK_EQ(field_proc["Na"], + FieldColumn(dut.GetRequestedVecSize(), INIT_VAL + INIT_VAL)); + } + constexpr double NEW_VAL = 2.; std::vector new_val_vec(VEC_SIZE, NEW_VAL); @@ -102,7 +142,7 @@ TEST_CASE("Field") { } } - dut.SetFromVector(new_field); + dut = new_field; SUBCASE("SetFromVector return correct field vector") { auto ret_vec = dut.AsVector(); @@ -147,19 +187,18 @@ TEST_CASE("Field") { // reset field names = dut.GetProps(); - dut.SetFromVector( - std::vector(names.size(), FieldColumn(VEC_SIZE, INIT_VAL))); + + dut = std::vector(names.size(), FieldColumn(VEC_SIZE, INIT_VAL)); constexpr double SOME_OTHER_VAL = -0.5; - Field some_other_field(VEC_SIZE); std::vector some_other_props = {"Na", "Cl"}; std::vector some_other_values(VEC_SIZE * some_other_props.size(), SOME_OTHER_VAL); - some_other_field.InitFromVec(some_other_values, some_other_props); + Field some_other_field(VEC_SIZE, some_other_values, some_other_props); SUBCASE("Update existing field from another field") { - dut.UpdateFromField(some_other_field); + dut.update(some_other_field); auto ret_vec = dut.As2DVector(); auto ret_it = ret_vec.begin(); diff --git a/test/testNamedVector.cpp b/test/testNamedVector.cpp new file mode 100644 index 000000000..d5ee908f5 --- /dev/null +++ b/test/testNamedVector.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "testDataStructures.hpp" + +TEST_CASE("NamedVector") { + RInsidePOET &R = RInsidePOET::getInstance(); + + R["sourcefile"] = RInside_source_file; + R.parseEval("source(sourcefile)"); + + const std::vector names{{"H", "O", "Ca"}}; + const std::vector values{{0.1, 0.2, 0.3}}; + + poet::NamedVector nv(names, values); + + SUBCASE("SEXP conversion") { + R["test"] = nv; + + const Rcpp::NumericVector R_value = R.parseEval("test"); + const Rcpp::CharacterVector R_names = R_value.names(); + + const poet::NamedVector nv_constructed = R["test"]; + + for (std::size_t i = 0; i < R_value.size(); i++) { + CHECK_EQ(R_value[i], values[i]); + CHECK_EQ(R_names[i], names[i]); + CHECK_EQ(nv_constructed[i], values[i]); + CHECK_EQ(nv_constructed.getNames()[i], names[i]); + } + } + + SUBCASE("Apply R function (set to zero)") { + RHookFunction> to_call(R, "simple_named_vec"); + nv = to_call(nv); + + CHECK_EQ(nv[2], 0); + } + + SUBCASE("Apply R function (second NamedVector)") { + RHookFunction> to_call(R, "extended_named_vec"); + + const std::vector names{{"C", "H", "Mg"}}; + const std::vector values{{0, 1, 2}}; + + poet::NamedVector second_nv(names, values); + + nv = to_call(nv, second_nv); + + CHECK_EQ(nv[0], 1.1); + } + + SUBCASE("Apply R function (check if zero)") { + RHookFunction to_call(R, "bool_named_vec"); + + CHECK_FALSE(to_call(nv)); + } +} diff --git a/test/testRounding.cpp b/test/testRounding.cpp new file mode 100644 index 000000000..be50fac99 --- /dev/null +++ b/test/testRounding.cpp @@ -0,0 +1,79 @@ +#include + +#include + +TEST_CASE("Rounding") { + constexpr int signif = 3; + + poet::DHT_Rounder rounder; + + SUBCASE("+exp - +sig") { + double input = 1.2345; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("+exp - -sig") { + double input = -1.2345; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - +sig") { + double input = 1.23456789E-5; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, -5); + } + + SUBCASE("-exp - -sig") { + double input = -1.23456789E-5; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, -5); + } + + SUBCASE("-exp - +sig (exceeding aqueous minimum)") { + double input = 1.23456789E-14; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 0); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - -sig (exceeding aqueous minimum)") { + double input = -1.23456789E-14; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 0); + CHECK_EQ(rounded.sc_notation.exp, 0); + } + + SUBCASE("-exp - +sig (diff exceeds aqueous minimum)") { + double input = 1.23456789E-13; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, 1); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - -sig (diff exceeds aqueous minimum)") { + double input = -1.23456789E-13; + auto rounded = rounder.round(input, signif, false); + CHECK_EQ(rounded.sc_notation.significant, -1); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - +sig (diff exceeds aqueous minimum - but H or O)") { + double input = 1.23456789E-13; + auto rounded = rounder.round(input, signif, true); + CHECK_EQ(rounded.sc_notation.significant, 123); + CHECK_EQ(rounded.sc_notation.exp, -13); + } + + SUBCASE("-exp - -sig (diff exceeds aqueous minimum - but H or O)") { + double input = -1.23456789E-13; + auto rounded = rounder.round(input, signif, true); + CHECK_EQ(rounded.sc_notation.significant, -123); + CHECK_EQ(rounded.sc_notation.exp, -13); + } +} diff --git a/util/data_evaluation/RFun_Eval.R b/util/data_evaluation/RFun_Eval.R index ed0068689..8243b8c05 100644 --- a/util/data_evaluation/RFun_Eval.R +++ b/util/data_evaluation/RFun_Eval.R @@ -1,10 +1,11 @@ ## Simple library of functions to assess and visualize the results of the coupled simulations -## Time-stamp: "Last modified 2023-04-24 16:09:55 mluebke" +## Time-stamp: "Last modified 2023-05-29 13:51:21 mluebke" require(RedModRphree) require(Rmufits) ## essentially for PlotCartCellData require(Rcpp) +require(stringr) curdir <- dirname(sys.frame(1)$ofile) ##path.expand(".") @@ -16,13 +17,18 @@ ConvertDHTKey <- function(value) { rcpp_key_convert(value) } +ConvertToUInt64 <- function(double_data) { + rcpp_uint64_convert(double_data) +} + ## function which reads all simulation results in a given directory ReadRTSims <- function(dir) { files_full <- list.files(dir, pattern="iter.*rds", full.names=TRUE) files_name <- list.files(dir, pattern="iter.*rds", full.names=FALSE) res <- lapply(files_full, readRDS) names(res) <- gsub(".rds","",files_name, fixed=TRUE) - return(res) + + return(res[str_sort(names(res), numeric = TRUE)]) } ## function which reads all successive DHT stored in a given directory @@ -68,6 +74,61 @@ ReadDHT <- function(file, new_scheme = TRUE) { return(res) } +## function which reads all successive DHT stored in a given directory +ReadAllPHT <- function(dir, with_info = FALSE) { + files_full <- list.files(dir, pattern="iter.*pht", full.names=TRUE) + files_name <- list.files(dir, pattern="iter.*pht", full.names=FALSE) + res <- lapply(files_full, ReadPHT, with_info = with_info) + names(res) <- gsub(".pht","",files_name, fixed=TRUE) + return(res) +} + +## function which reads one .dht file and gives a matrix +ReadPHT <- function(file, with_info = FALSE) { + conn <- file(file, "rb") ## open for reading in binary mode + if (!isSeekable(conn)) + stop("Connection not seekable") + + ## we first reposition ourselves to the end of the file... + tmp <- seek(conn, where=0, origin = "end") + ## ... and then back to the origin so to store the length in bytes + flen <- seek(conn, where=0, origin = "start") + + ## we read the first 2 integers (4 bytes each) containing dimensions in bytes + dims <- readBin(conn, what="integer", n=2) + + ## compute dimensions of the data + tots <- sum(dims) + ncol <- tots/8 + nrow <- (flen - 8)/tots ## 8 here is 2*sizeof("int") + buff <- readBin(conn, what="double", n=ncol*nrow) + ## close connection + close(conn) + res <- matrix(buff, nrow=nrow, ncol=ncol, byrow=TRUE) + + nkeys <- dims[1] / 8 + keys <- res[, 1:nkeys - 1] + + timesteps <- res[, nkeys] + conv <- apply(keys, 2, ConvertDHTKey) + #res[, 1:nkeys - 1] <- conv + + ndata <- dims[2] / 8 + fill_rate <- ConvertToUInt64(res[, nkeys + 1]) + + buff <- c(conv, timesteps, fill_rate) + + if (with_info) { + ndata <- dims[2]/8 + visit_count <- ConvertToUInt64(res[, nkeys + ndata]) + buff <- c(buff, visit_count) + } + + res <- matrix(buff, nrow = nrow, byrow = FALSE) + + return(res) +} + ## Scatter plots of each variable in the iteration PlotScatter <- function(sam1, sam2, which=NULL, labs=c("NO DHT", "DHT"), pch=".", cols=3, ...) { if ((!is.data.frame(sam1)) & ("T" %in% names(sam1))) @@ -228,3 +289,33 @@ Plot2DCellData <- function (data, grid, nx, ny, contour = TRUE, } invisible(pp) } + +PlotAsMP4 <- function(data, nx, ny, to_plot, out_dir, name, + contour = FALSE, scale = FALSE, framerate = 30) { + sort_data <- data[str_sort(names(data), numeric = TRUE)] + plot_data <- lapply(sort_data, function(x) x$C[[to_plot]]) + pad_size <- ceiling(log10(length(plot_data))) + + dir.create(out_dir, showWarnings = FALSE) + output_files <- paste0(out_dir, "/", name, "_%0", pad_size, "d.png") + output_mp4 <- paste0(out_dir, "/", name, ".mp4") + + png(output_files, + width = 297, height = 210, units = "mm", + res = 100 + ) + + for (i in 1:length(plot_data)) { + Rmufits::PlotCartCellData(plot_data[[i]], nx = nx, ny = ny, contour = contour, scale = scale) + } + dev.off() + + ffmpeg_command <- paste( + "ffmpeg -framerate", framerate, "-i", output_files, + "-c:v libx264 -crf 22", output_mp4 + ) + + unlink(output_mp4) + system(ffmpeg_command) + +} diff --git a/util/data_evaluation/interpret_keys.cpp b/util/data_evaluation/interpret_keys.cpp index 777f95d1d..26419076e 100644 --- a/util/data_evaluation/interpret_keys.cpp +++ b/util/data_evaluation/interpret_keys.cpp @@ -35,3 +35,16 @@ std::vector rcpp_key_convert(std::vector input) { return output; } + +// [[Rcpp::export]] +std::vector rcpp_uint64_convert(std::vector input) { + std::vector output; + output.reserve(input.size()); + + for (double &value : input) { + std::uint64_t *as_int = reinterpret_cast(&value); + output.push_back(*as_int); + } + + return output; +}