From 103b26a097df33e0bdd66f6ae3ba479f39ee8f9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Max=20L=C3=BCbke?= Date: Tue, 9 Apr 2024 08:15:42 +0000 Subject: [PATCH] Refactor process to output as DataFrames --- R_lib/init_r_lib.R | 31 +++++++++----------- R_lib/kin_r_library.R | 16 +++++------ ext/doctest | 1 + src/DataStructures/Field.cpp | 7 +++-- src/Init/DiffusionInit.cpp | 56 ++++++++++++++++++++---------------- src/poet.cpp | 9 +++--- test/RInsidePOET_funcs.R | 3 ++ 7 files changed, 66 insertions(+), 57 deletions(-) create mode 160000 ext/doctest diff --git a/R_lib/init_r_lib.R b/R_lib/init_r_lib.R index 33bba851e..640c0e07b 100644 --- a/R_lib/init_r_lib.R +++ b/R_lib/init_r_lib.R @@ -34,28 +34,23 @@ resolve_pqc_bound <- function(pqc_mat, transport_spec, id) { return(value) } -add_column_after_position <- function(df, new_col, pos, new_col_name) { - # Split the data frame into two parts - df_left <- df[, 1:(pos)] - df_right <- df[, (pos + 1):ncol(df)] +add_missing_transport_species <- function(init_grid, new_names) { + # add 'ID' to new_names front, as it is not a transport species but required + new_names <- c("ID", new_names) + sol_length <- length(new_names) - # Add the new column to the left part - df_left[[new_col_name]] <- new_col + new_grid <- data.frame(matrix(0, nrow = nrow(init_grid), ncol = sol_length)) + names(new_grid) <- new_names - # Combine the left part, new column, and right part - df_new <- cbind(df_left, df_right) + matching_cols <- intersect(names(init_grid), new_names) - return(df_new) -} + # Copy matching columns from init_grid to new_grid + new_grid[, matching_cols] <- init_grid[, matching_cols] -add_missing_transport_species <- function(init_grid, new_names, old_size) { - # skip the ID column - column_index <- old_size + 1 - for (name in new_names) { - init_grid <- add_column_after_position(init_grid, rep(0, nrow(init_grid)), column_index, name) - column_index <- column_index + 1 - } + # Add missing columns to new_grid + append_df <- init_grid[, !(names(init_grid) %in% new_names)] + new_grid <- cbind(new_grid, append_df) - return(init_grid) + return(new_grid) } \ No newline at end of file diff --git a/R_lib/kin_r_library.R b/R_lib/kin_r_library.R index 4ecffdff7..835eea080 100644 --- a/R_lib/kin_r_library.R +++ b/R_lib/kin_r_library.R @@ -41,6 +41,7 @@ master_init <- function(setup, out_dir, init_field) { if (setup$store_result) { init_field_out <- paste0(out_dir, "/iter_0.rds") + init_field <- data.frame(init_field, check.names = FALSE) saveRDS(init_field, file = init_field_out) msgm("Stored initial field in ", init_field_out) if (is.null(setup[["out_save"]])) { @@ -68,16 +69,15 @@ master_iteration_end <- function(setup, state_T, state_C) { ## comprised in setup$out_save if (setup$store_result) { if (iter %in% setup$out_save) { - print(head(state_T)) - print(head(state_C)) nameout <- paste0(setup$out_dir, "/iter_", sprintf(fmt = fmt, iter), ".rds") - print(nameout) - # saveRDS(list( - # T = state_T, C = state_C, - # simtime = as.integer(setup$simulation_time) - # ), file = nameout) + + state_T <- data.frame(state_T, check.names = FALSE) + state_C <- data.frame(state_C, check.names = FALSE) + saveRDS(list( - T = state_T, C = state_C + T = state_T, + C = state_C, + simtime = as.integer(setup$simulation_time) ), file = nameout) msgm("results stored in <", nameout, ">") } diff --git a/ext/doctest b/ext/doctest new file mode 160000 index 000000000..ae7a13539 --- /dev/null +++ b/ext/doctest @@ -0,0 +1 @@ +Subproject commit ae7a13539fb71f270b87eb2e874fbac80bc8dda2 diff --git a/src/DataStructures/Field.cpp b/src/DataStructures/Field.cpp index 5f942640d..6eb6cab1f 100644 --- a/src/DataStructures/Field.cpp +++ b/src/DataStructures/Field.cpp @@ -127,7 +127,7 @@ SEXP poet::Field::asSEXP() const { output[elem] = Rcpp::wrap(map_it->second); } - return Rcpp::DataFrame(output); + return output; } poet::Field &poet::Field::operator=(const FieldColumn &cont_field) { @@ -190,8 +190,9 @@ void poet::Field::fromSEXP(const SEXP &s_rhs) { this->props = Rcpp::as>(in_list.names()); - this->req_vec_size = - static_cast(Rcpp::DataFrame(in_list).nrow()); + const Rcpp::NumericVector &in_vec = in_list[0]; + + this->req_vec_size = static_cast(in_vec.size()); for (const auto &elem : this->props) { const auto values = Rcpp::as>(in_list[elem]); diff --git a/src/Init/DiffusionInit.cpp b/src/Init/DiffusionInit.cpp index d39636d4e..1b2406a3a 100644 --- a/src/Init/DiffusionInit.cpp +++ b/src/Init/DiffusionInit.cpp @@ -1,3 +1,4 @@ +#include #include // leave above Rcpp includes, as eigen seem to have problems with a preceding // Rcpp include @@ -52,10 +53,11 @@ static std::vector colMajToRowMaj(const Rcpp::NumericVector &vec, } } -static std::vector extend_transport_names( - std::unique_ptr &phreeqc, const Rcpp::List &boundaries_list, - const Rcpp::List &inner_boundaries, - const std::vector &old_trans_names, Rcpp::List &initial_grid) { +static std::vector +extend_transport_names(std::unique_ptr &phreeqc, + const Rcpp::List &boundaries_list, + const Rcpp::List &inner_boundaries, + const std::vector &old_trans_names) { std::vector transport_names = old_trans_names; std::set constant_pqc_ids; @@ -77,21 +79,23 @@ static std::vector extend_transport_names( "length"); } - for (std::size_t i = 0; i < cells.size(); i++) { + for (auto i = 0; i < cells.size(); i++) { if (type_str[i] == "constant") { - constant_pqc_ids.insert(values[i]); + constant_pqc_ids.insert(static_cast(values[i])); } } } if (inner_boundaries.size() > 0) { const Rcpp::NumericVector values = inner_boundaries["sol_id"]; - for (std::size_t i = 0; i < values.size(); i++) { - constant_pqc_ids.insert(values[i]); + for (auto i = 0; i < values.size(); i++) { + constant_pqc_ids.insert(static_cast(values[i])); } } if (!constant_pqc_ids.empty()) { + constexpr std::size_t keep_h_o_charge = 3; + for (const auto &pqc_id : constant_pqc_ids) { const auto solution_names = phreeqc->getSolutionNames(pqc_id); @@ -99,7 +103,11 @@ static std::vector extend_transport_names( for (const auto &name : solution_names) { if (std::find(transport_names.begin(), transport_names.end(), name) == transport_names.end()) { - transport_names.push_back(name); + auto position = + std::lower_bound(transport_names.begin() + keep_h_o_charge, + transport_names.end(), name); + + transport_names.insert(position, name); } } } @@ -108,15 +116,15 @@ static std::vector extend_transport_names( return transport_names; } -static Rcpp::List extend_initial_grid(const Rcpp::List &initial_grid, - std::vector transport_names, - std::size_t old_size) { - std::vector names_to_add(transport_names.begin() + old_size, - transport_names.end()); +static Rcpp::List +extend_initial_grid(const Rcpp::List &initial_grid, + const std::vector &transport_names) { + // std::vector names_to_add(transport_names.begin() + old_size, + // transport_names.end()); Rcpp::Function extend_grid_R("add_missing_transport_species"); - return extend_grid_R(initial_grid, Rcpp::wrap(names_to_add), old_size); + return extend_grid_R(initial_grid, Rcpp::wrap(transport_names)); } std::pair @@ -128,15 +136,14 @@ InitialList::resolveBoundaries(const Rcpp::List &boundaries_list, const std::size_t old_transport_size = this->transport_names.size(); - this->transport_names = - extend_transport_names(this->phreeqc, boundaries_list, inner_boundaries, - this->transport_names, this->initial_grid); + this->transport_names = extend_transport_names( + this->phreeqc, boundaries_list, inner_boundaries, this->transport_names); const std::size_t new_transport_size = this->transport_names.size(); if (old_transport_size != new_transport_size) { - this->initial_grid = extend_initial_grid( - this->initial_grid, this->transport_names, old_transport_size); + this->initial_grid = + extend_initial_grid(this->initial_grid, this->transport_names); } for (const auto &species : this->transport_names) { @@ -166,12 +173,13 @@ InitialList::resolveBoundaries(const Rcpp::List &boundaries_list, "] cells and values are not the same length"); } - for (std::size_t i = 0; i < cells.size(); i++) { + for (auto i = 0; i < cells.size(); i++) { + const auto c_id = cells[i] - 1; if (type_str[i] == "closed") { - c_type[cells[i] - 1] = tug::BC_TYPE_CLOSED; + c_type[c_id] = tug::BC_TYPE_CLOSED; } else if (type_str[i] == "constant") { - c_type[cells[i] - 1] = tug::BC_TYPE_CONSTANT; - c_value[cells[i] - 1] = Rcpp::as( + c_type[c_id] = tug::BC_TYPE_CONSTANT; + c_value[c_id] = Rcpp::as( resolve_R(this->phreeqc_mat, Rcpp::wrap(species), values[i])); } else { throw std::runtime_error("Unknown boundary type"); diff --git a/src/poet.cpp b/src/poet.cpp index aa92a9f39..68c39592f 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -266,10 +266,11 @@ static Rcpp::List RunMasterLoop(const RuntimeParameters ¶ms, // --ignore-results is not given (and thus the R variable // store_result is TRUE) { - Rcpp::DataFrame t_field = diffusion.getField().asSEXP(); - Rcpp::DataFrame c_field = chem.getField().asSEXP(); - *global_rt_setup = - master_iteration_end_R.value()(*global_rt_setup, t_field, c_field); + const auto &trans_field = diffusion.getField().asSEXP(); + const auto &chem_field = chem.getField().asSEXP(); + + *global_rt_setup = master_iteration_end_R.value()( + *global_rt_setup, trans_field, chem_field); } MSG("End of *coupling* iteration " + std::to_string(iter) + "/" + diff --git a/test/RInsidePOET_funcs.R b/test/RInsidePOET_funcs.R index 668b05169..787a42252 100644 --- a/test/RInsidePOET_funcs.R +++ b/test/RInsidePOET_funcs.R @@ -13,10 +13,13 @@ bool_named_vec <- function(input) { } simple_field <- function(field) { + field <- as.data.frame(field, check.names = FALSE) field$Na <- 0 return(field) } extended_field <- function(field, additional) { + field <- as.data.frame(field, check.names = FALSE) + additional <- as.data.frame(additional, check.names = FALSE) return(field + additional) }