Merge branch 'origin/ai-surrogate-v03-temp-mdl' into ai_surrogate_merge

2025-12-15 12:28:22 +01:00 · 2024-06-05 15:59:00 +02:00 · 2024-06-05 15:59:00 +02:00 · 742ac96406
commit 742ac96406
parent 25df2df5b3 dcfe2c58db
9 changed files with 198 additions and 23 deletions
--- a/R_lib/ai_surrogate_model.R
+++ b/R_lib/ai_surrogate_model.R
@ -5,8 +5,8 @@
 ## in the variable "ai_surrogate_input_script". See the barite_200.R file as an
 ## example and the general README for more information.

-library(keras)
-library(tensorflow)
+## library(keras3)
+## library(tensorflow)

 initiate_model <- function() {
  hidden_layers <- c(48, 96, 24)
@ -54,6 +54,10 @@ preprocess <- function(df, backtransform = FALSE, outputs = FALSE) {
  return(df)
 }

+postprocess <- function(df, backtransform = TRUE, outputs = TRUE) {
+  return(df)
+}
+
 set_valid_predictions <- function(temp_field, prediction, validity) {
  temp_field[validity == 1, ] <- prediction[validity == 1, ]
  return(temp_field)
--- a/bench/barite/barite_200ai_surrogate_input_script.R
+++ b/bench/barite/barite_200ai_surrogate_input_script.R
@ -38,7 +38,7 @@ mass_balance <- function(predictors, prediction) {
 }

 validate_predictions <- function(predictors, prediction) {
-  epsilon <- 0.000000003
+  epsilon <- 3e-5
  mb <- mass_balance(predictors, prediction)
  msgm("Mass balance mean:", mean(mb))
  msgm("Mass balance variance:", var(mb))
--- a/bench/barite/barite_50ai.R
+++ b/bench/barite/barite_50ai.R
@ -0,0 +1,60 @@
+## Time-stamp: "Last modified 2024-05-30 13:34:14 delucia"
+cols <- 50
+rows <- 50
+
+s_cols <- 0.25
+s_rows <- 0.25
+
+grid_def <- matrix(2, nrow = rows, ncol = cols)
+
+# Define grid configuration for POET model
+grid_setup <- list(
+  pqc_in_file = "./barite.pqi",
+  pqc_db_file = "./db_barite.dat", ## Path to the database file for Phreeqc
+  grid_def = grid_def,             ## Definition of the grid, containing IDs according to the Phreeqc input script
+  grid_size = c(s_rows, s_cols),   ## Size of the grid in meters
+  constant_cells = c()             ## IDs of cells with constant concentration
+)
+
+bound_length <- 2
+
+bound_def <- list(
+  "type" = rep("constant", bound_length),
+  "sol_id" = rep(3, bound_length),
+  "cell" = seq(1, bound_length)
+)
+
+homogenous_alpha <- 1e-8
+
+diffusion_setup <- list(
+  boundaries = list(
+    "W" = bound_def,
+    "N" = bound_def
+  ),
+  alpha_x = homogenous_alpha,
+  alpha_y = homogenous_alpha
+)
+
+dht_species <- c(
+  "H"         = 4,
+  "O"         = 10,
+  "Charge"    = 4,
+  "Ba"        = 7,
+  "Cl"        = 4,
+  "S(6)"      = 7,
+  "Sr"        = 4,
+  "Barite"    = 2,
+  "Celestite" = 2
+)
+
+chemistry_setup <- list(
+  dht_species = dht_species,
+  ai_surrogate_input_script = "./barite_50ai_surr_mdl.R"
+)
+
+# Define a setup list for simulation configuration
+setup <- list(
+  Grid = grid_setup, # Parameters related to the grid structure
+  Diffusion = diffusion_setup, # Parameters related to the diffusion process
+  Chemistry = chemistry_setup
+)
--- a/bench/barite/barite_50ai.rds
+++ b/bench/barite/barite_50ai.rds
--- a/bench/barite/barite_50ai_all.keras
+++ b/bench/barite/barite_50ai_all.keras
--- a/bench/barite/barite_50ai_rt.R
+++ b/bench/barite/barite_50ai_rt.R
@ -0,0 +1,9 @@
+iterations <- 1000
+
+dt <- 200
+
+list(
+    timesteps = rep(dt, iterations),
+    store_result = TRUE,
+    out_save = c(1, 5, seq(20, iterations, by=20))
+)
--- a/bench/barite/barite_50ai_surr_mdl.R
+++ b/bench/barite/barite_50ai_surr_mdl.R
@ -0,0 +1,90 @@
+## Time-stamp: "Last modified 2024-05-30 13:27:06 delucia"
+
+## load a pretrained model from tensorflow file
+## Use the global variable "ai_surrogate_base_path" when using file paths
+## relative to the input script
+initiate_model <- function() {
+    require(keras3)
+    require(tensorflow)
+    init_model <- normalizePath(paste0(ai_surrogate_base_path,
+                                       "barite_50ai_all.keras"))
+    Model <- keras3::load_model(init_model)
+    msgm("Loaded model:")
+    print(str(Model))
+    return(Model)
+}
+
+scale_min_max <- function(x, min, max, backtransform) {
+    if (backtransform) {
+        return((x * (max - min)) + min)
+    } else {
+        return((x - min) / (max - min))
+    }
+}
+
+minmax <- list(min = c(H = 111.012433592824, O = 55.5062185549492, Charge = -3.1028354471876e-08, 
+                       Ba = 1.87312878574393e-141, Cl = 0, `S(6)` = 4.24227510643685e-07, 
+                       Sr = 0.00049382996130541, Barite = 0.000999542409828586, Celestite = 0.244801877115968),
+               max = c(H = 111.012433679682, O = 55.5087003521685, Charge = 5.27666636082035e-07, 
+                       Ba = 0.0908849779513762, Cl = 0.195697626449355, `S(6)` = 0.000620774752665846, 
+                       Sr = 0.0558680070692722, Barite = 0.756779139057097, Celestite = 1.00075422160624
+                       ))
+
+preprocess <- function(df) {
+    if (!is.data.frame(df))
+        df <- as.data.frame(df, check.names = FALSE)
+    
+    as.data.frame(lapply(colnames(df),
+                         function(x) scale_min_max(x=df[x],
+                                                   min=minmax$min[x],
+                                                   max=minmax$max[x],
+                                                   backtransform=FALSE)),
+                  check.names = FALSE)
+}
+
+postprocess <- function(df) {
+    if (!is.data.frame(df))
+        df <- as.data.frame(df, check.names = FALSE)
+    
+    as.data.frame(lapply(colnames(df),
+                         function(x) scale_min_max(x=df[x],
+                                                   min=minmax$min[x],
+                                                   max=minmax$max[x],
+                                                   backtransform=TRUE)),
+                  check.names = FALSE)
+}
+
+mass_balance <- function(predictors, prediction) {
+    dBa <- abs(prediction$Ba + prediction$Barite -
+               predictors$Ba - predictors$Barite)
+    dSr <- abs(prediction$Sr + prediction$Celestite -
+               predictors$Sr - predictors$Celestite)
+    return(dBa + dSr)
+}
+
+validate_predictions <- function(predictors, prediction) {
+    epsilon <- 1E-7
+    mb <- mass_balance(predictors, prediction)
+    msgm("Mass balance mean:", mean(mb))
+    msgm("Mass balance variance:", var(mb))
+    ret <- mb < epsilon
+    msgm("Rows where mass balance meets threshold", epsilon, ":",
+         sum(ret))
+    return(ret)
+}
+
+training_step <- function(model, predictor, target, validity) {
+  msgm("Starting incremental training:")
+
+  ## x <- as.matrix(predictor)
+  ## y <- as.matrix(target[colnames(x)])
+
+  history <- model %>% keras3::fit(x = data.matrix(predictor),
+                                   y = data.matrix(target),
+                                   epochs = 10, verbose=1)
+
+  keras3::save_model(model,
+                     filepath = paste0(out_dir, "/current_model.keras"),
+                     overwrite=TRUE)
+  return(model)
+}
--- a/src/Chemistry/WorkerFunctions.cpp
+++ b/src/Chemistry/WorkerFunctions.cpp
@ -48,7 +48,7 @@ void poet::ChemistryModule::WorkerLoop() {
    case CHEM_FIELD_INIT: {
      ChemBCast(&this->prop_count, 1, MPI_UINT32_T);
      if (this->ai_surrogate_enabled) {
-        this->ai_surrogate_validity_vector.reserve(this->n_cells);
+        this->ai_surrogate_validity_vector.resize(this->n_cells); // resize statt reserve?
      }
      break;
    }
@ -152,8 +152,8 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status,
  // current simulation time ('age' of simulation)
  current_sim_time = mpi_buffer[count + 3];

-  /* 4th double value is currently a placeholder */
-  // placeholder = mpi_buffer[count+4];
+  // current work package start location in field
+  wp_start_index = mpi_buffer[count + 4];

  for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) {
    s_curr_wp.input[wp_i] =
--- a/src/poet.cpp
+++ b/src/poet.cpp
@ -286,25 +286,36 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters &params,
        std::to_string(chem.getField().GetRequestedVecSize()) + ")), TMP_PROPS)"));
      R.parseEval("predictors <- predictors[ai_surrogate_species]");

-      // Predict
+      // Apply preprocessing 
+      MSG("AI Preprocessing");
      R.parseEval("predictors_scaled <- preprocess(predictors)");

-      R.parseEval("prediction <- preprocess(prediction_step(model, predictors_scaled),\
-                                            backtransform = TRUE,\
-                                            outputs = TRUE)");
+      // Predict
+      MSG("AI Predict");
+      R.parseEval("aipreds_scaled <- prediction_step(model, predictors_scaled)");
+
+      // Apply postprocessing
+      MSG("AI Postprocesing");
+      R.parseEval("aipreds <- postprocess(aipreds_scaled)");

      // Validate prediction and write valid predictions to chem field
-      R.parseEval("validity_vector <- validate_predictions(predictors,\
-                                                           prediction)");
+      MSG("AI Validate");
+      R.parseEval("validity_vector <- validate_predictions(predictors, aipreds)");
+      
+      MSG("AI Marking accepted");
      chem.set_ai_surrogate_validity_vector(R.parseEval("validity_vector"));

+      MSG("AI TempField");
      std::vector<std::vector<double>> RTempField = R.parseEval("set_valid_predictions(predictors,\
-                                                                                       prediction,\
+                                                                                       aipreds,\
                                                                                       validity_vector)");

+      MSG("AI Set Field");
      Field predictions_field = Field(R.parseEval("nrow(predictors)"), 
                                      RTempField,
-                                      R.parseEval("names(predictors)"));
+                                      R.parseEval("colnames(predictors)"));
+
+      MSG("AI Update");
      chem.getField().update(predictions_field);
      double ai_end_t = MPI_Wtime();  
      R["ai_prediction_time"] = ai_end_t - ai_start_t;
@ -323,9 +334,10 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters &params,
      R.parseEval("targets <- targets[ai_surrogate_species]");
      
      // TODO: Check how to get the correct columns
-      R.parseEval("target_scaled <- preprocess(targets, outputs = TRUE)");
+      R.parseEval("target_scaled <- preprocess(targets)");

-      R.parseEval("training_step(model, predictors_scaled, target_scaled, validity_vector)");
+      MSG("AI: incremental training");
+      R.parseEval("model <- training_step(model, predictors_scaled, target_scaled, validity_vector)");
      double ai_end_t = MPI_Wtime();
      R["ai_training_time"] = ai_end_t - ai_start_t;
    }
@ -464,14 +476,14 @@ int main(int argc, char *argv[]) {
        
        const std::string ai_surrogate_input_script = init_list.getChemistryInit().ai_surrogate_input_script;

-        if (!ai_surrogate_input_script.empty()) {
-          /* Incorporate user defined ai surrogate input script */
-          R.parseEvalQ(ai_surrogate_input_script);
+        if (!ai_surrogate_input_script_path.empty()) {
+          R["ai_surrogate_base_path"] = ai_surrogate_input_script_path.substr(0, ai_surrogate_input_script_path.find_last_of('/') + 1);

-          std::string ai_surrogate_base_path = R["ai_surrogate_base_path"];
-          R["ai_surrogate_base_path"] = ai_surrogate_base_path.substr(0, ai_surrogate_base_path.find_last_of('/') + 1);
+	  MSG("AI: sourcing user-provided script");
+	  R.parseEvalQ("source('" + ai_surrogate_input_script_path + "')");
        }
-        R.parseEval("model <- initiate_model()");
+	MSG("AI: initialize AI model");
+	R.parseEval("model <- initiate_model()");
        R.parseEval("gpu_info()");
      }

@ -493,7 +505,7 @@ int main(int argc, char *argv[]) {

      string r_vis_code;
      r_vis_code =
-          "saveRDS(profiling, file=paste0(setup$out_dir,'/timings.rds'));";
+	"saveRDS(profiling, file=paste0(setup$out_dir,'/timings.rds'));";
      R.parseEval(r_vis_code);

      MSG("Done! Results are stored as R objects into <" + run_params.out_dir +