Save latest changes, boundaries still missing or incomplete

2025-12-16 12:54:50 +01:00 · 2024-03-18 23:06:52 +01:00 · 2024-03-18 23:06:52 +01:00 · 2b74fca740
commit 2b74fca740
parent 371eb1367a
7 changed files with 581 additions and 5 deletions
--- a/R_lib/init_r_lib.R
+++ b/R_lib/init_r_lib.R
@ -17,7 +17,7 @@ pqc_to_grid <- function(pqc_in, grid) {
    # Iterate over each ID in the vector
    for (id_mat in id_vector) {
        # Find the matching row in the data.table
-        matching_dt <- dt[id == id_mat]
+        matching_dt <- dt[dt$id == id_mat]
        # Append the matching data.table row to the result data.table
        result_dt <- rbind(result_dt, matching_dt)
@ -36,14 +36,15 @@ test <- pqc_init
 modify_module_sizes <- function(mod_sizes, pqc_mat, init_grid) {
    # Find all unique IDs in init_grid
-    unique_ids <- unique(as.vector(init_grid$id))
+    unique_ids <- unique(init_grid$id)
    # remove rows from pqc_mat that are not in init_grid
    pqc_mat <- as.data.frame(pqc_mat)
    pqc_mat <- pqc_mat[pqc_mat$id %in% unique_ids, ]
-    # Find the column indices where all rows are NA
+    # Find the column indices where all rows are NaN
-    na_cols <- which(colSums(is.na(pqc_mat)) == nrow(pqc_mat))
+    na_cols <- which(sapply(pqc_mat, function(x) all(is.na(x))))
    # na_cols <- which(colSums(is.nan(pqc_mat)) == nrow(pqc_mat))
    # Build cumsum over mod_sizes
    cum_mod_sizes <- cumsum(mod_sizes)
--- a/ext/iphreeqc
+++ b/ext/iphreeqc
@ -1 +1 @@
-Subproject commit 54136c8e0c638d0dc47e85cef1428822b6c1512d
+Subproject commit d92ab31765d3850beb0b9e980b7b82696e56e4fc
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -1,6 +1,7 @@
 add_library(poetinitlib
  Init/InitialList.cpp
  Init/GridInit.cpp  
  Init/DiffusionInit.cpp
 )
 target_link_libraries(poetinitlib PUBLIC
--- a/src/Init/DiffusionInit.cpp
+++ b/src/Init/DiffusionInit.cpp
@ -0,0 +1,166 @@
 #include <tug/Boundary.hpp>
 // leave above Rcpp includes, as eigen seem to have problems with a preceding
 // Rcpp include
 #include <map>
 #include <set>
 #include "InitialList.hpp"
 #include <Rcpp/proxy/ProtectedProxy.h>
 #include <Rcpp/vector/instantiation.h>
 #include <Rinternals.h>
 #include <cstddef>
 #include <cstdint>
 #include <stdexcept>
 #include <string>
 #include <vector>
 namespace poet {
 enum SEXP_TYPE { SEXP_IS_LIST, SEXP_IS_MAT, SEXP_IS_NUM };
 const std::map<std::uint8_t, std::string> tug_side_mapping = {
    {tug::BC_SIDE_RIGHT, "E"},
    {tug::BC_SIDE_LEFT, "W"},
    {tug::BC_SIDE_TOP, "N"},
    {tug::BC_SIDE_BOTTOM, "S"}};
 static Rcpp::List parseBoundaries2D(const Rcpp::List &boundaries_list,
                                    std::uint32_t n_cols,
                                    std::uint32_t n_rows) {
  Rcpp::List out_list;
  for (const auto &side : tug_side_mapping) {
    Rcpp::NumericVector type =
        (side.first == tug::BC_SIDE_RIGHT || side.first == tug::BC_SIDE_LEFT)
            ? Rcpp::NumericVector(n_rows, tug::BC_TYPE_CLOSED)
            : Rcpp::NumericVector(n_cols, tug::BC_TYPE_CLOSED);
    Rcpp::NumericVector value =
        (side.first == tug::BC_SIDE_RIGHT || side.first == tug::BC_SIDE_LEFT)
            ? Rcpp::NumericVector(n_rows, 0)
            : Rcpp::NumericVector(n_cols, 0);
    if (boundaries_list.containsElementNamed(side.second.c_str())) {
      const Rcpp::List mapping = boundaries_list[side.second];
      const Rcpp::NumericVector cells = mapping["cells"];
      const Rcpp::NumericVector values = mapping["sol_id"];
      if (cells.size() != values.size()) {
        throw std::runtime_error("Boundary [" + side.second +
                                 "] cells and values are not the same length");
      }
    }
    out_list[side.second] = Rcpp::List::create(Rcpp::Named("type") = type,
                                               Rcpp::Named("value") = value);
  }
 }
 static inline SEXP_TYPE get_datatype(const SEXP &input) {
  Rcpp::Function check_list("is.list");
  Rcpp::Function check_mat("is.matrix");
  if (Rcpp::as<bool>(check_list(input))) {
    return SEXP_IS_LIST;
  } else if (Rcpp::as<bool>(check_mat(input))) {
    return SEXP_IS_MAT;
  } else {
    return SEXP_IS_NUM;
  }
 }
 static Rcpp::List parseAlphas2D(const SEXP &input,
                                const std::vector<std::string> &transport_names,
                                std::uint32_t n_cols, std::uint32_t n_rows) {
  Rcpp::List out_list;
  SEXP_TYPE input_type = get_datatype(input);
  switch (input_type) {
  case SEXP_IS_LIST: {
    const Rcpp::List input_list(input);
    if (input_list.size() != transport_names.size()) {
      throw std::runtime_error(
          "Length of alphas list does not match number of transport names");
    }
    for (const auto &name : transport_names) {
      if (!input_list.containsElementNamed(name.c_str())) {
        throw std::runtime_error("Alphas list does not contain transport name");
      }
      const Rcpp::NumericMatrix alpha_mat(input_list[name]);
      if (alpha_mat.size() == 1) {
        Rcpp::NumericVector alpha(n_cols * n_rows, alpha_mat(0, 0));
        out_list[name] = Rcpp::wrap(alpha);
      } else {
        if (alpha_mat.nrow() != n_rows || alpha_mat.ncol() != n_cols) {
          throw std::runtime_error(
              "Alpha matrix does not match grid dimensions");
        }
        out_list[name] = alpha_mat;
      }
    }
    break;
  }
  case SEXP_IS_MAT: {
    Rcpp::NumericMatrix input_mat(input);
    Rcpp::NumericVector alpha(n_cols * n_rows, input_mat(0, 0));
    if (input_mat.size() != 1) {
      if (input_mat.nrow() != n_rows || input_mat.ncol() != n_cols) {
        throw std::runtime_error("Alpha matrix does not match grid dimensions");
      }
      for (std::size_t i = 0; i < n_rows; i++) {
        for (std::size_t j = 0; j < n_cols; j++) {
          alpha[i * n_cols + j] = input_mat(i, j);
        }
      }
    }
    for (const auto &name : transport_names) {
      out_list[name] = alpha;
    }
    break;
  }
  case SEXP_IS_NUM: {
    Rcpp::NumericVector alpha(n_cols * n_rows, Rcpp::as<TugType>(input));
    for (const auto &name : transport_names) {
      out_list[name] = alpha;
    }
    break;
  }
  default:
    throw std::runtime_error("Unknown data type");
  }
  return out_list;
 }
 void InitialList::initDiffusion(const Rcpp::List &diffusion_input) {
  const Rcpp::List &boundaries = diffusion_input["boundaries"];
  const SEXP &alpha_x = diffusion_input["alpha_x"];
  const SEXP &alpha_y = diffusion_input["alpha_y"];
  std::vector<std::string> colnames =
      Rcpp::as<std::vector<std::string>>(this->initial_grid.names());
  std::vector<std::string> transport_names(colnames.begin() + 1,
                                           colnames.begin() + 1 +
                                               this->module_sizes[POET_SOL]);
  this->alpha_x =
      parseAlphas2D(alpha_x, transport_names, this->n_cols, this->n_rows);
  this->alpha_y =
      parseAlphas2D(alpha_y, transport_names, this->n_cols, this->n_rows);
 }
 } // namespace poet
--- a/src/Init/InitialList.cpp
+++ b/src/Init/InitialList.cpp
@ -3,5 +3,6 @@
 namespace poet {
 void InitialList::initializeFromList(const Rcpp::List &setup) {
  initGrid(setup["grid"]);
  initDiffusion(setup["diffusion"]);
 }
 } // namespace poet
--- a/src/Init/InitialList.hpp
+++ b/src/Init/InitialList.hpp
@ -6,9 +6,13 @@
 #include <cstdint>
 #include <IPhreeqcPOET.hpp>
 #include <utility>
 #include <vector>
 namespace poet {
 using TugType = double;
 class InitialList {
 public:
  InitialList(RInside &R) : R(R){};
@ -43,5 +47,12 @@ private:
  // Chemistry members
  IPhreeqcPOET::ModulesArray module_sizes;
  // Diffusion members
  void initDiffusion(const Rcpp::List &diffusion_input);
  Rcpp::List boundaries;
  Rcpp::List alpha_x;
  Rcpp::List alpha_y;
 };
 } // namespace poet
--- a/src/poet.hpp
+++ b/src/poet.hpp
@ -0,0 +1,396 @@
 #ifndef POET_H
 #define POET_H
 #include <string>
 static const char *poet_version = "tmp/ml/v0.2-107-g9990c12";
 // using the Raw string literal to avoid escaping the quotes
 static inline std::string kin_r_library = R"(## Time-stamp: "Last modified 2023-08-15 11:58:23 delucia"
 ### Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam)
 ###
 ### POET is free software; you can redistribute it and/or modify it under the
 ### terms of the GNU General Public License as published by the Free Software
 ### Foundation; either version 2 of the License, or (at your option) any later
 ### version.
 ###
 ### POET is distributed in the hope that it will be useful, but WITHOUT ANY
 ### WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 ### A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 ###
 ### You should have received a copy of the GNU General Public License along with
 ### this program; if not, write to the Free Software Foundation, Inc., 51
 ### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 ## Simple function to check file extension. It is needed to check if
 ## the GridFile is SUM (MUFITS format) or rds/RData
 FileExt <- function(x) {
    pos <- regexpr("\\.([[:alnum:]]+)$", x)
    ifelse(pos > -1L, substring(x, pos + 1L), "")
 }
 master_init <- function(setup) {
    msgm("Process with rank 0 reading GRID properties")
    ## Setup the directory where we will store the results
    verb <- FALSE
    if (local_rank == 0) {
        verb <- TRUE ## verbosity loading MUFITS results
        if (!dir.exists(fileout)) {
            dir.create(fileout)
            msgm("created directory ", fileout)
        } else {
            msgm("dir ", fileout, " already exists, I will overwrite!")
        }
        if (!exists("store_result")) {
            msgm("store_result doesn't exist!")
        } else {
            msgm("store_result is ", store_result)
        }
    } else {
    }
    setup$iter <- 1
    setup$maxiter <- setup$iterations
    setup$timesteps <- setup$timesteps
    setup$simulation_time <- 0
    if (is.null(setup[["store_result"]])) {
        setup$store_result <- TRUE
    }
    if (setup$store_result) {
        if (is.null(setup[["out_save"]])) {
            setup$out_save <- seq(1, setup$iterations)
        }
    }
    return(setup)
 }
 ## This function, called only by master, stores on disk the last
 ## calculated time step if store_result is TRUE and increments the
 ## iteration counter
 master_iteration_end <- function(setup) {
    iter <- setup$iter
    ## max digits for iterations
    dgts <-  as.integer(ceiling(log10(setup$iterations + 1)))
    ## string format to use in sprintf 
    fmt <- paste0("%0", dgts, "d")
    ## Write on disk state_T and state_C after every iteration
    ## comprised in setup$out_save
    if (setup$store_result) {
        if (iter %in% setup$out_save) {
            nameout <- paste0(fileout, "/iter_", sprintf(fmt=fmt, iter), ".rds")
            info <- list(
                tr_req_dt = as.integer(setup$req_dt)
                ## tr_allow_dt = setup$allowed_dt,
                ## tr_inniter = as.integer(setup$inniter)
            )
            saveRDS(list(
                T = setup$state_T, C = setup$state_C,
                simtime = as.integer(setup$simtime),
                tr_info = info
            ), file = nameout)
            msgm("results stored in <", nameout, ">")
        }
    }
    msgm("done iteration", iter, "/", setup$maxiter)
    setup$iter <- setup$iter + 1
    return(setup)
 }
 ## function for the workers to compute chemistry through PHREEQC
 slave_chemistry <- function(setup, data) {
    base <- setup$base
    first <- setup$first
    prop <- setup$prop
    immobile <- setup$immobile
    kin <- setup$kin
    ann <- setup$ann
    iter <- setup$iter
    timesteps <- setup$timesteps
    dt <- timesteps[iter]
    state_T <- data ## not the global field, but the work-package
    ## treat special H+/pH, e-/pe cases
    state_T <- RedModRphree::Act2pH(state_T)
    ## reduction of the problem
    if (setup$reduce) {
        reduced <- ReduceStateOmit(state_T, omit = setup$ann)
    } else {
        reduced <- state_T
    }
    ## form the PHREEQC input script for the current work package
    inplist <- SplitMultiKin(
        data = reduced, procs = 1, base = base, first = first,
        ann = ann, prop = prop, minerals = immobile, kin = kin, dt = dt
    )
    ## if (local_rank==1 & iter==1)
    ##         RPhreeWriteInp("FirstInp", inplist)
    tmpC <- RunPQC(inplist, procs = 1, second = TRUE)
    ## recompose after the reduction
    if (setup$reduce) {
        state_C <- RecomposeState(tmpC, reduced)
    } else {
        state_C <- tmpC
    }
    ## the next line is needed since we don't need all columns of
    ## PHREEQC output
    return(state_C[, prop])
 }
 ## This function, called by master
 master_chemistry <- function(setup, data) {
    state_T <- setup$state_T
    msgm(" chemistry iteration", setup$iter)
    ## treat special H+/pH, e-/pe cases
    state_T <- RedModRphree::Act2pH(state_T)
    ## reduction of the problem
    if (setup$reduce) {
        reduced <- ReduceStateOmit(state_T, omit = setup$ann)
    } else {
        reduced <- state_T
    }
    ## inject data from workers
    res_C <- data
    rownames(res_C) <- NULL
    ## print(res_C)
    if (nrow(res_C) > nrow(reduced)) {
        res_C <- res_C[seq(2, nrow(res_C), by = 2), ]
    }
    ## recompose after the reduction
    if (setup$reduce) {
        state_C <- RecomposeState(res_C, reduced)
    } else {
        state_C <- res_C
    }
    setup$state_C <- state_C
    setup$reduced <- reduced
    return(setup)
 }
 ## Adapted version for "reduction"
 ReduceStateOmit <- function(data, omit = NULL, sign = 6) {
    require(mgcv)
    rem <- colnames(data)
    if (is.list(omit)) {
        indomi <- match(names(omit), colnames(data))
        datao <- data[, -indomi]
    } else {
        datao <- data
    }
    datao <- signif(datao, sign)
    red <- mgcv::uniquecombs(datao)
    inds <- attr(red, "index")
    now <- ncol(red)
    ## reattach the omitted column(s)
    ## FIXME: control if more than one ann is present
    if (is.list(omit)) {
        red <- cbind(red, rep(data[1, indomi], nrow(red)))
        colnames(red)[now + 1] <- names(omit)
        ret <- red[, colnames(data)]
    } else {
        ret <- red
    }
    rownames(ret) <- NULL
    attr(ret, "index") <- inds
    return(ret)
 }
 ## Attach the name of the calling function to the message displayed on
 ## R's stdout
 msgm <- function(...) {
    if (local_rank == 0) {
        fname <- as.list(sys.call(-1))[[1]]
        prefix <- paste0("R: ", fname, " ::")
        cat(paste(prefix, ..., "\n"))
    }
    invisible()
 }
 ## Function called by master R process to store on disk all relevant
 ## parameters for the simulation
 StoreSetup <- function(setup) {
    to_store <- vector(mode = "list", length = 4)
    ## names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT")
    names(to_store) <- c("Sim", "Transport", "DHT", "Cmdline")
    ## read the setup R file, which is sourced in kin.cpp
    tmpbuff <- file(filesim, "r")
    setupfile <- readLines(tmpbuff)
    close.connection(tmpbuff)
    to_store$Sim <- setupfile
    ## to_store$Flow <- list(
    ##     snapshots  = setup$snapshots,
    ##     gridfile   = setup$gridfile,
    ##     phase      = setup$phase,
    ##     density    = setup$density,
    ##     dt_differ  = setup$dt_differ,
    ##     prolong    = setup$prolong,
    ##     maxiter    = setup$maxiter,
    ##     saved_iter = setup$iter_output,
    ##     out_save   = setup$out_save )
    to_store$Transport <- setup$diffusion
    ## to_store$Chemistry <- list(
    ##    nprocs   = n_procs,
    ##    wp_size  = work_package_size,
    ##    base     = setup$base,
    ##    first    = setup$first,
    ##    init     = setup$initsim,
    ##    db       = db,
    ##    kin      = setup$kin,
    ##    ann      = setup$ann)
    if (dht_enabled) {
        to_store$DHT <- list(
            enabled   = dht_enabled,
            log       = dht_log
            ## signif    = dht_final_signif,
            ## proptype  = dht_final_proptype
        )
    } else {
        to_store$DHT <- FALSE
    }
  if (dht_enabled) {
    to_store$DHT <- list(
      enabled   = dht_enabled,
      log       = dht_log
      #signif    = dht_final_signif,
      #proptype  = dht_final_proptype
    )
  } else {
    to_store$DHT <- FALSE
  }
  saveRDS(to_store, file = paste0(fileout, "/setup.rds"))
  msgm("initialization stored in ", paste0(fileout, "/setup.rds"))
 }
 GetWorkPackageSizesVector <- function(n_packages, package_size, len) {
    ids <- rep(1:n_packages, times=package_size, each = 1)[1:len]
    return(as.integer(table(ids)))
 }
 )";
 static inline std::string init_r_library = R"(input <- readRDS("/home/max/Storage/poet/build/apps/out.rds")
 grid_def <- matrix(c(2, 3), nrow = 2, ncol = 5)
 library(data.table)
 pqc_to_grid <- function(pqc_in, grid) {
    # Convert the input DataFrame to a data.table
    dt <- data.table(pqc_in)
    # Flatten the matrix into a vector
    id_vector <- as.vector(t(grid))
    # Initialize an empty data.table to store the results
    result_dt <- data.table()
    # Iterate over each ID in the vector
    for (id_mat in id_vector) {
        # Find the matching row in the data.table
        matching_dt <- dt[id == id_mat]
        # Append the matching data.table row to the result data.table
        result_dt <- rbind(result_dt, matching_dt)
    }
    # Remove all columns which only contain NaN
    # result_dt <- result_dt[, colSums(is.na(result_dt)) != nrow(result_dt)]
    res_df <- as.data.frame(result_dt)
    return(res_df[, colSums(is.na(res_df)) != nrow(res_df)])
 }
 pqc_init <- pqc_to_grid(input, grid_def)
 test <- pqc_init
 modify_module_sizes <- function(mod_sizes, pqc_mat, init_grid) {
    # Find all unique IDs in init_grid
    unique_ids <- unique(as.vector(init_grid$id))
    # remove rows from pqc_mat that are not in init_grid
    pqc_mat <- as.data.frame(pqc_mat)
    pqc_mat <- pqc_mat[pqc_mat$id %in% unique_ids, ]
    # Find the column indices where all rows are NA
    na_cols <- which(colSums(is.na(pqc_mat)) == nrow(pqc_mat))
    # Build cumsum over mod_sizes
    cum_mod_sizes <- cumsum(mod_sizes)
    # Find the indices where the value of na_cols is equal to the value of cum_mod_sizes
    idx <- which(cum_mod_sizes %in% na_cols)
    # Set the value of mod_sizes to 0 at the indices found in the previous step
    mod_sizes[idx] <- 0
    return(mod_sizes)
 }
 # mod_sizes <- c(7, 0, 4, 2, 0)
 # unique_ids <- unique(as.vector(pqc_init$id))
 # tmp <- as.data.frame(input)
 # pqc_test <- tmp[tmp$id %in% unique_ids, ]
 # na_cols <- which(colSums(is.na(pqc_test)) == nrow(pqc_test))
 # cum_mod_sizes <- cumsum(mod_sizes)
 # # Get the indices of the columns of cum_mod_sizes where the value of the column is equal to the value of na_cols
 # idx <- which(cum_mod_sizes %in% na_cols)
 # mod_sizes[idx] <- 0
 # idx <- which(na_cols == cum_mod_sizes)
 # idx <- which(na_cols[1] >= cum_mod_sizes)
 # mod_sizes <- modify_module_sizes(mod_sizes, pqc_init, pqc_init)
 # remove column with all NA
 )";
 #endif // POET_H
		`@ -1 +1 @@`
			`Subproject commit 54136c8e0c638d0dc47e85cef1428822b6c1512d`				`Subproject commit d92ab31765d3850beb0b9e980b7b82696e56e4fc`