From f832a9e0db713900bde786ef108921f8dde2a9e2 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Mon, 18 Mar 2024 23:06:52 +0100 Subject: [PATCH] Save latest changes, boundaries still missing or incomplete --- R_lib/init_r_lib.R | 9 +- ext/iphreeqc | 2 +- src/CMakeLists.txt | 1 + src/Init/DiffusionInit.cpp | 166 ++++++++++++++++ src/Init/InitialList.cpp | 1 + src/Init/InitialList.hpp | 11 ++ src/poet.hpp | 396 +++++++++++++++++++++++++++++++++++++ 7 files changed, 581 insertions(+), 5 deletions(-) create mode 100644 src/Init/DiffusionInit.cpp create mode 100644 src/poet.hpp diff --git a/R_lib/init_r_lib.R b/R_lib/init_r_lib.R index 83eaa7293..8879ac703 100644 --- a/R_lib/init_r_lib.R +++ b/R_lib/init_r_lib.R @@ -17,7 +17,7 @@ pqc_to_grid <- function(pqc_in, grid) { # Iterate over each ID in the vector for (id_mat in id_vector) { # Find the matching row in the data.table - matching_dt <- dt[id == id_mat] + matching_dt <- dt[dt$id == id_mat] # Append the matching data.table row to the result data.table result_dt <- rbind(result_dt, matching_dt) @@ -36,14 +36,15 @@ test <- pqc_init modify_module_sizes <- function(mod_sizes, pqc_mat, init_grid) { # Find all unique IDs in init_grid - unique_ids <- unique(as.vector(init_grid$id)) + unique_ids <- unique(init_grid$id) # remove rows from pqc_mat that are not in init_grid pqc_mat <- as.data.frame(pqc_mat) pqc_mat <- pqc_mat[pqc_mat$id %in% unique_ids, ] - # Find the column indices where all rows are NA - na_cols <- which(colSums(is.na(pqc_mat)) == nrow(pqc_mat)) + # Find the column indices where all rows are NaN + na_cols <- which(sapply(pqc_mat, function(x) all(is.na(x)))) + # na_cols <- which(colSums(is.nan(pqc_mat)) == nrow(pqc_mat)) # Build cumsum over mod_sizes cum_mod_sizes <- cumsum(mod_sizes) diff --git a/ext/iphreeqc b/ext/iphreeqc index 54136c8e0..d92ab3176 160000 --- a/ext/iphreeqc +++ b/ext/iphreeqc @@ -1 +1 @@ -Subproject commit 54136c8e0c638d0dc47e85cef1428822b6c1512d +Subproject commit d92ab31765d3850beb0b9e980b7b82696e56e4fc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d53484cd1..e0dcb8a55 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(poetinitlib Init/InitialList.cpp Init/GridInit.cpp + Init/DiffusionInit.cpp ) target_link_libraries(poetinitlib PUBLIC diff --git a/src/Init/DiffusionInit.cpp b/src/Init/DiffusionInit.cpp new file mode 100644 index 000000000..becaaa977 --- /dev/null +++ b/src/Init/DiffusionInit.cpp @@ -0,0 +1,166 @@ +#include +// leave above Rcpp includes, as eigen seem to have problems with a preceding +// Rcpp include + +#include +#include + +#include "InitialList.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace poet { + +enum SEXP_TYPE { SEXP_IS_LIST, SEXP_IS_MAT, SEXP_IS_NUM }; + +const std::map tug_side_mapping = { + {tug::BC_SIDE_RIGHT, "E"}, + {tug::BC_SIDE_LEFT, "W"}, + {tug::BC_SIDE_TOP, "N"}, + {tug::BC_SIDE_BOTTOM, "S"}}; + +static Rcpp::List parseBoundaries2D(const Rcpp::List &boundaries_list, + std::uint32_t n_cols, + std::uint32_t n_rows) { + Rcpp::List out_list; + + for (const auto &side : tug_side_mapping) { + Rcpp::NumericVector type = + (side.first == tug::BC_SIDE_RIGHT || side.first == tug::BC_SIDE_LEFT) + ? Rcpp::NumericVector(n_rows, tug::BC_TYPE_CLOSED) + : Rcpp::NumericVector(n_cols, tug::BC_TYPE_CLOSED); + + Rcpp::NumericVector value = + (side.first == tug::BC_SIDE_RIGHT || side.first == tug::BC_SIDE_LEFT) + ? Rcpp::NumericVector(n_rows, 0) + : Rcpp::NumericVector(n_cols, 0); + + if (boundaries_list.containsElementNamed(side.second.c_str())) { + const Rcpp::List mapping = boundaries_list[side.second]; + + const Rcpp::NumericVector cells = mapping["cells"]; + const Rcpp::NumericVector values = mapping["sol_id"]; + + if (cells.size() != values.size()) { + throw std::runtime_error("Boundary [" + side.second + + "] cells and values are not the same length"); + } + } + + out_list[side.second] = Rcpp::List::create(Rcpp::Named("type") = type, + Rcpp::Named("value") = value); + } +} + +static inline SEXP_TYPE get_datatype(const SEXP &input) { + Rcpp::Function check_list("is.list"); + Rcpp::Function check_mat("is.matrix"); + + if (Rcpp::as(check_list(input))) { + return SEXP_IS_LIST; + } else if (Rcpp::as(check_mat(input))) { + return SEXP_IS_MAT; + } else { + return SEXP_IS_NUM; + } +} + +static Rcpp::List parseAlphas2D(const SEXP &input, + const std::vector &transport_names, + std::uint32_t n_cols, std::uint32_t n_rows) { + Rcpp::List out_list; + + SEXP_TYPE input_type = get_datatype(input); + + switch (input_type) { + case SEXP_IS_LIST: { + const Rcpp::List input_list(input); + + if (input_list.size() != transport_names.size()) { + throw std::runtime_error( + "Length of alphas list does not match number of transport names"); + } + + for (const auto &name : transport_names) { + if (!input_list.containsElementNamed(name.c_str())) { + throw std::runtime_error("Alphas list does not contain transport name"); + } + + const Rcpp::NumericMatrix alpha_mat(input_list[name]); + + if (alpha_mat.size() == 1) { + Rcpp::NumericVector alpha(n_cols * n_rows, alpha_mat(0, 0)); + out_list[name] = Rcpp::wrap(alpha); + } else { + if (alpha_mat.nrow() != n_rows || alpha_mat.ncol() != n_cols) { + throw std::runtime_error( + "Alpha matrix does not match grid dimensions"); + } + + out_list[name] = alpha_mat; + } + } + break; + } + case SEXP_IS_MAT: { + Rcpp::NumericMatrix input_mat(input); + + Rcpp::NumericVector alpha(n_cols * n_rows, input_mat(0, 0)); + + if (input_mat.size() != 1) { + if (input_mat.nrow() != n_rows || input_mat.ncol() != n_cols) { + throw std::runtime_error("Alpha matrix does not match grid dimensions"); + } + + for (std::size_t i = 0; i < n_rows; i++) { + for (std::size_t j = 0; j < n_cols; j++) { + alpha[i * n_cols + j] = input_mat(i, j); + } + } + } + + for (const auto &name : transport_names) { + out_list[name] = alpha; + } + + break; + } + case SEXP_IS_NUM: { + Rcpp::NumericVector alpha(n_cols * n_rows, Rcpp::as(input)); + for (const auto &name : transport_names) { + out_list[name] = alpha; + } + break; + } + default: + throw std::runtime_error("Unknown data type"); + } + + return out_list; +} +void InitialList::initDiffusion(const Rcpp::List &diffusion_input) { + const Rcpp::List &boundaries = diffusion_input["boundaries"]; + const SEXP &alpha_x = diffusion_input["alpha_x"]; + const SEXP &alpha_y = diffusion_input["alpha_y"]; + + std::vector colnames = + Rcpp::as>(this->initial_grid.names()); + + std::vector transport_names(colnames.begin() + 1, + colnames.begin() + 1 + + this->module_sizes[POET_SOL]); + + this->alpha_x = + parseAlphas2D(alpha_x, transport_names, this->n_cols, this->n_rows); + + this->alpha_y = + parseAlphas2D(alpha_y, transport_names, this->n_cols, this->n_rows); +} +} // namespace poet \ No newline at end of file diff --git a/src/Init/InitialList.cpp b/src/Init/InitialList.cpp index d12c6e314..d37fee1fc 100644 --- a/src/Init/InitialList.cpp +++ b/src/Init/InitialList.cpp @@ -3,5 +3,6 @@ namespace poet { void InitialList::initializeFromList(const Rcpp::List &setup) { initGrid(setup["grid"]); + initDiffusion(setup["diffusion"]); } } // namespace poet \ No newline at end of file diff --git a/src/Init/InitialList.hpp b/src/Init/InitialList.hpp index d236d19be..30289c252 100644 --- a/src/Init/InitialList.hpp +++ b/src/Init/InitialList.hpp @@ -6,9 +6,13 @@ #include #include +#include +#include namespace poet { +using TugType = double; + class InitialList { public: InitialList(RInside &R) : R(R){}; @@ -43,5 +47,12 @@ private: // Chemistry members IPhreeqcPOET::ModulesArray module_sizes; + + // Diffusion members + void initDiffusion(const Rcpp::List &diffusion_input); + Rcpp::List boundaries; + + Rcpp::List alpha_x; + Rcpp::List alpha_y; }; } // namespace poet \ No newline at end of file diff --git a/src/poet.hpp b/src/poet.hpp new file mode 100644 index 000000000..3bee0eba6 --- /dev/null +++ b/src/poet.hpp @@ -0,0 +1,396 @@ +#ifndef POET_H +#define POET_H + +#include +static const char *poet_version = "tmp/ml/v0.2-107-g9990c12"; + +// using the Raw string literal to avoid escaping the quotes +static inline std::string kin_r_library = R"(## Time-stamp: "Last modified 2023-08-15 11:58:23 delucia" + +### Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam) +### +### POET is free software; you can redistribute it and/or modify it under the +### terms of the GNU General Public License as published by the Free Software +### Foundation; either version 2 of the License, or (at your option) any later +### version. +### +### POET is distributed in the hope that it will be useful, but WITHOUT ANY +### WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +### A PARTICULAR PURPOSE. See the GNU General Public License for more details. +### +### You should have received a copy of the GNU General Public License along with +### this program; if not, write to the Free Software Foundation, Inc., 51 +### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + +## Simple function to check file extension. It is needed to check if +## the GridFile is SUM (MUFITS format) or rds/RData +FileExt <- function(x) { + pos <- regexpr("\\.([[:alnum:]]+)$", x) + ifelse(pos > -1L, substring(x, pos + 1L), "") +} + +master_init <- function(setup) { + msgm("Process with rank 0 reading GRID properties") + + ## Setup the directory where we will store the results + verb <- FALSE + if (local_rank == 0) { + verb <- TRUE ## verbosity loading MUFITS results + if (!dir.exists(fileout)) { + dir.create(fileout) + msgm("created directory ", fileout) + } else { + msgm("dir ", fileout, " already exists, I will overwrite!") + } + if (!exists("store_result")) { + msgm("store_result doesn't exist!") + } else { + msgm("store_result is ", store_result) + } + } else { + + } + + setup$iter <- 1 + setup$maxiter <- setup$iterations + setup$timesteps <- setup$timesteps + setup$simulation_time <- 0 + + if (is.null(setup[["store_result"]])) { + setup$store_result <- TRUE + } + + if (setup$store_result) { + if (is.null(setup[["out_save"]])) { + setup$out_save <- seq(1, setup$iterations) + } + } + + return(setup) +} + +## This function, called only by master, stores on disk the last +## calculated time step if store_result is TRUE and increments the +## iteration counter +master_iteration_end <- function(setup) { + iter <- setup$iter + ## max digits for iterations + dgts <- as.integer(ceiling(log10(setup$iterations + 1))) + ## string format to use in sprintf + fmt <- paste0("%0", dgts, "d") + + ## Write on disk state_T and state_C after every iteration + ## comprised in setup$out_save + if (setup$store_result) { + if (iter %in% setup$out_save) { + nameout <- paste0(fileout, "/iter_", sprintf(fmt=fmt, iter), ".rds") + info <- list( + tr_req_dt = as.integer(setup$req_dt) + ## tr_allow_dt = setup$allowed_dt, + ## tr_inniter = as.integer(setup$inniter) + ) + saveRDS(list( + T = setup$state_T, C = setup$state_C, + simtime = as.integer(setup$simtime), + tr_info = info + ), file = nameout) + msgm("results stored in <", nameout, ">") + } + } + msgm("done iteration", iter, "/", setup$maxiter) + setup$iter <- setup$iter + 1 + return(setup) +} + +## function for the workers to compute chemistry through PHREEQC +slave_chemistry <- function(setup, data) { + base <- setup$base + first <- setup$first + prop <- setup$prop + immobile <- setup$immobile + kin <- setup$kin + ann <- setup$ann + + iter <- setup$iter + timesteps <- setup$timesteps + dt <- timesteps[iter] + + state_T <- data ## not the global field, but the work-package + + ## treat special H+/pH, e-/pe cases + state_T <- RedModRphree::Act2pH(state_T) + + ## reduction of the problem + if (setup$reduce) { + reduced <- ReduceStateOmit(state_T, omit = setup$ann) + } else { + reduced <- state_T + } + + ## form the PHREEQC input script for the current work package + inplist <- SplitMultiKin( + data = reduced, procs = 1, base = base, first = first, + ann = ann, prop = prop, minerals = immobile, kin = kin, dt = dt + ) + + ## if (local_rank==1 & iter==1) + ## RPhreeWriteInp("FirstInp", inplist) + + tmpC <- RunPQC(inplist, procs = 1, second = TRUE) + + ## recompose after the reduction + if (setup$reduce) { + state_C <- RecomposeState(tmpC, reduced) + } else { + state_C <- tmpC + } + + ## the next line is needed since we don't need all columns of + ## PHREEQC output + return(state_C[, prop]) +} + +## This function, called by master +master_chemistry <- function(setup, data) { + state_T <- setup$state_T + + msgm(" chemistry iteration", setup$iter) + + ## treat special H+/pH, e-/pe cases + state_T <- RedModRphree::Act2pH(state_T) + + ## reduction of the problem + if (setup$reduce) { + reduced <- ReduceStateOmit(state_T, omit = setup$ann) + } else { + reduced <- state_T + } + + ## inject data from workers + res_C <- data + + rownames(res_C) <- NULL + + ## print(res_C) + + if (nrow(res_C) > nrow(reduced)) { + res_C <- res_C[seq(2, nrow(res_C), by = 2), ] + } + + ## recompose after the reduction + if (setup$reduce) { + state_C <- RecomposeState(res_C, reduced) + } else { + state_C <- res_C + } + + setup$state_C <- state_C + setup$reduced <- reduced + + return(setup) +} + + +## Adapted version for "reduction" +ReduceStateOmit <- function(data, omit = NULL, sign = 6) { + require(mgcv) + + rem <- colnames(data) + if (is.list(omit)) { + indomi <- match(names(omit), colnames(data)) + datao <- data[, -indomi] + } else { + datao <- data + } + + datao <- signif(datao, sign) + red <- mgcv::uniquecombs(datao) + inds <- attr(red, "index") + now <- ncol(red) + + + ## reattach the omitted column(s) + ## FIXME: control if more than one ann is present + if (is.list(omit)) { + red <- cbind(red, rep(data[1, indomi], nrow(red))) + + colnames(red)[now + 1] <- names(omit) + + ret <- red[, colnames(data)] + } else { + ret <- red + } + rownames(ret) <- NULL + attr(ret, "index") <- inds + return(ret) +} + + + +## Attach the name of the calling function to the message displayed on +## R's stdout +msgm <- function(...) { + if (local_rank == 0) { + fname <- as.list(sys.call(-1))[[1]] + prefix <- paste0("R: ", fname, " ::") + cat(paste(prefix, ..., "\n")) + } + invisible() +} + + +## Function called by master R process to store on disk all relevant +## parameters for the simulation +StoreSetup <- function(setup) { + + to_store <- vector(mode = "list", length = 4) + ## names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT") + names(to_store) <- c("Sim", "Transport", "DHT", "Cmdline") + + ## read the setup R file, which is sourced in kin.cpp + tmpbuff <- file(filesim, "r") + setupfile <- readLines(tmpbuff) + close.connection(tmpbuff) + + to_store$Sim <- setupfile + + ## to_store$Flow <- list( + ## snapshots = setup$snapshots, + ## gridfile = setup$gridfile, + ## phase = setup$phase, + ## density = setup$density, + ## dt_differ = setup$dt_differ, + ## prolong = setup$prolong, + ## maxiter = setup$maxiter, + ## saved_iter = setup$iter_output, + ## out_save = setup$out_save ) + + to_store$Transport <- setup$diffusion + + ## to_store$Chemistry <- list( + ## nprocs = n_procs, + ## wp_size = work_package_size, + ## base = setup$base, + ## first = setup$first, + ## init = setup$initsim, + ## db = db, + ## kin = setup$kin, + ## ann = setup$ann) + + if (dht_enabled) { + to_store$DHT <- list( + enabled = dht_enabled, + log = dht_log + ## signif = dht_final_signif, + ## proptype = dht_final_proptype + ) + } else { + to_store$DHT <- FALSE + } + + if (dht_enabled) { + to_store$DHT <- list( + enabled = dht_enabled, + log = dht_log + #signif = dht_final_signif, + #proptype = dht_final_proptype + ) + } else { + to_store$DHT <- FALSE + } + + saveRDS(to_store, file = paste0(fileout, "/setup.rds")) + msgm("initialization stored in ", paste0(fileout, "/setup.rds")) +} + +GetWorkPackageSizesVector <- function(n_packages, package_size, len) { + ids <- rep(1:n_packages, times=package_size, each = 1)[1:len] + return(as.integer(table(ids))) +} +)"; + +static inline std::string init_r_library = R"(input <- readRDS("/home/max/Storage/poet/build/apps/out.rds") + +grid_def <- matrix(c(2, 3), nrow = 2, ncol = 5) + +library(data.table) + +pqc_to_grid <- function(pqc_in, grid) { + # Convert the input DataFrame to a data.table + dt <- data.table(pqc_in) + + # Flatten the matrix into a vector + id_vector <- as.vector(t(grid)) + + # Initialize an empty data.table to store the results + result_dt <- data.table() + + # Iterate over each ID in the vector + for (id_mat in id_vector) { + # Find the matching row in the data.table + matching_dt <- dt[id == id_mat] + + # Append the matching data.table row to the result data.table + result_dt <- rbind(result_dt, matching_dt) + } + + # Remove all columns which only contain NaN + # result_dt <- result_dt[, colSums(is.na(result_dt)) != nrow(result_dt)] + + res_df <- as.data.frame(result_dt) + + return(res_df[, colSums(is.na(res_df)) != nrow(res_df)]) +} + +pqc_init <- pqc_to_grid(input, grid_def) +test <- pqc_init + +modify_module_sizes <- function(mod_sizes, pqc_mat, init_grid) { + # Find all unique IDs in init_grid + unique_ids <- unique(as.vector(init_grid$id)) + + # remove rows from pqc_mat that are not in init_grid + pqc_mat <- as.data.frame(pqc_mat) + pqc_mat <- pqc_mat[pqc_mat$id %in% unique_ids, ] + + # Find the column indices where all rows are NA + na_cols <- which(colSums(is.na(pqc_mat)) == nrow(pqc_mat)) + + # Build cumsum over mod_sizes + cum_mod_sizes <- cumsum(mod_sizes) + + # Find the indices where the value of na_cols is equal to the value of cum_mod_sizes + idx <- which(cum_mod_sizes %in% na_cols) + + # Set the value of mod_sizes to 0 at the indices found in the previous step + mod_sizes[idx] <- 0 + + return(mod_sizes) +} + +# mod_sizes <- c(7, 0, 4, 2, 0) + +# unique_ids <- unique(as.vector(pqc_init$id)) + +# tmp <- as.data.frame(input) +# pqc_test <- tmp[tmp$id %in% unique_ids, ] +# na_cols <- which(colSums(is.na(pqc_test)) == nrow(pqc_test)) +# cum_mod_sizes <- cumsum(mod_sizes) + +# # Get the indices of the columns of cum_mod_sizes where the value of the column is equal to the value of na_cols +# idx <- which(cum_mod_sizes %in% na_cols) +# mod_sizes[idx] <- 0 + +# idx <- which(na_cols == cum_mod_sizes) + + + +# idx <- which(na_cols[1] >= cum_mod_sizes) + +# mod_sizes <- modify_module_sizes(mod_sizes, pqc_init, pqc_init) + +# remove column with all NA +)"; + +#endif // POET_H