diff --git a/R_lib/ai_surrogate_model.R b/R_lib/ai_surrogate_model.R index 268fb3098..0d8794c8a 100644 --- a/R_lib/ai_surrogate_model.R +++ b/R_lib/ai_surrogate_model.R @@ -5,8 +5,8 @@ ## in the variable "ai_surrogate_input_script". See the barite_200.R file as an ## example and the general README for more information. -library(keras) -library(tensorflow) +## library(keras3) +## library(tensorflow) initiate_model <- function() { hidden_layers <- c(48, 96, 24) @@ -54,6 +54,10 @@ preprocess <- function(df, backtransform = FALSE, outputs = FALSE) { return(df) } +postprocess <- function(df, backtransform = TRUE, outputs = TRUE) { + return(df) +} + set_valid_predictions <- function(temp_field, prediction, validity) { temp_field[validity == 1, ] <- prediction[validity == 1, ] return(temp_field) diff --git a/bench/barite/barite_200ai_surrogate_input_script.R b/bench/barite/barite_200ai_surrogate_input_script.R index 93e8e0191..a69487b74 100644 --- a/bench/barite/barite_200ai_surrogate_input_script.R +++ b/bench/barite/barite_200ai_surrogate_input_script.R @@ -38,7 +38,7 @@ mass_balance <- function(predictors, prediction) { } validate_predictions <- function(predictors, prediction) { - epsilon <- 0.000000003 + epsilon <- 3e-5 mb <- mass_balance(predictors, prediction) msgm("Mass balance mean:", mean(mb)) msgm("Mass balance variance:", var(mb)) diff --git a/bench/barite/barite_50ai.R b/bench/barite/barite_50ai.R new file mode 100644 index 000000000..c2a674a85 --- /dev/null +++ b/bench/barite/barite_50ai.R @@ -0,0 +1,60 @@ +## Time-stamp: "Last modified 2024-05-30 13:34:14 delucia" +cols <- 50 +rows <- 50 + +s_cols <- 0.25 +s_rows <- 0.25 + +grid_def <- matrix(2, nrow = rows, ncol = cols) + +# Define grid configuration for POET model +grid_setup <- list( + pqc_in_file = "./barite.pqi", + pqc_db_file = "./db_barite.dat", ## Path to the database file for Phreeqc + grid_def = grid_def, ## Definition of the grid, containing IDs according to the Phreeqc input script + grid_size = c(s_rows, s_cols), ## Size of the grid in meters + constant_cells = c() ## IDs of cells with constant concentration +) + +bound_length <- 2 + +bound_def <- list( + "type" = rep("constant", bound_length), + "sol_id" = rep(3, bound_length), + "cell" = seq(1, bound_length) +) + +homogenous_alpha <- 1e-8 + +diffusion_setup <- list( + boundaries = list( + "W" = bound_def, + "N" = bound_def + ), + alpha_x = homogenous_alpha, + alpha_y = homogenous_alpha +) + +dht_species <- c( + "H" = 4, + "O" = 10, + "Charge" = 4, + "Ba" = 7, + "Cl" = 4, + "S(6)" = 7, + "Sr" = 4, + "Barite" = 2, + "Celestite" = 2 +) + +chemistry_setup <- list( + dht_species = dht_species, + ai_surrogate_input_script = "./barite_50ai_surr_mdl.R" +) + +# Define a setup list for simulation configuration +setup <- list( + Grid = grid_setup, # Parameters related to the grid structure + Diffusion = diffusion_setup, # Parameters related to the diffusion process + Chemistry = chemistry_setup +) diff --git a/bench/barite/barite_50ai.rds b/bench/barite/barite_50ai.rds new file mode 100644 index 000000000..efc230f27 Binary files /dev/null and b/bench/barite/barite_50ai.rds differ diff --git a/bench/barite/barite_50ai_all.keras b/bench/barite/barite_50ai_all.keras new file mode 100644 index 000000000..38eeb9400 Binary files /dev/null and b/bench/barite/barite_50ai_all.keras differ diff --git a/bench/barite/barite_50ai_rt.R b/bench/barite/barite_50ai_rt.R new file mode 100644 index 000000000..a7e4b4099 --- /dev/null +++ b/bench/barite/barite_50ai_rt.R @@ -0,0 +1,9 @@ +iterations <- 1000 + +dt <- 200 + +list( + timesteps = rep(dt, iterations), + store_result = TRUE, + out_save = c(1, 5, seq(20, iterations, by=20)) +) diff --git a/bench/barite/barite_50ai_surr_mdl.R b/bench/barite/barite_50ai_surr_mdl.R new file mode 100644 index 000000000..237d5a0cd --- /dev/null +++ b/bench/barite/barite_50ai_surr_mdl.R @@ -0,0 +1,90 @@ +## Time-stamp: "Last modified 2024-05-30 13:27:06 delucia" + +## load a pretrained model from tensorflow file +## Use the global variable "ai_surrogate_base_path" when using file paths +## relative to the input script +initiate_model <- function() { + require(keras3) + require(tensorflow) + init_model <- normalizePath(paste0(ai_surrogate_base_path, + "barite_50ai_all.keras")) + Model <- keras3::load_model(init_model) + msgm("Loaded model:") + print(str(Model)) + return(Model) +} + +scale_min_max <- function(x, min, max, backtransform) { + if (backtransform) { + return((x * (max - min)) + min) + } else { + return((x - min) / (max - min)) + } +} + +minmax <- list(min = c(H = 111.012433592824, O = 55.5062185549492, Charge = -3.1028354471876e-08, + Ba = 1.87312878574393e-141, Cl = 0, `S(6)` = 4.24227510643685e-07, + Sr = 0.00049382996130541, Barite = 0.000999542409828586, Celestite = 0.244801877115968), + max = c(H = 111.012433679682, O = 55.5087003521685, Charge = 5.27666636082035e-07, + Ba = 0.0908849779513762, Cl = 0.195697626449355, `S(6)` = 0.000620774752665846, + Sr = 0.0558680070692722, Barite = 0.756779139057097, Celestite = 1.00075422160624 + )) + +preprocess <- function(df) { + if (!is.data.frame(df)) + df <- as.data.frame(df, check.names = FALSE) + + as.data.frame(lapply(colnames(df), + function(x) scale_min_max(x=df[x], + min=minmax$min[x], + max=minmax$max[x], + backtransform=FALSE)), + check.names = FALSE) +} + +postprocess <- function(df) { + if (!is.data.frame(df)) + df <- as.data.frame(df, check.names = FALSE) + + as.data.frame(lapply(colnames(df), + function(x) scale_min_max(x=df[x], + min=minmax$min[x], + max=minmax$max[x], + backtransform=TRUE)), + check.names = FALSE) +} + +mass_balance <- function(predictors, prediction) { + dBa <- abs(prediction$Ba + prediction$Barite - + predictors$Ba - predictors$Barite) + dSr <- abs(prediction$Sr + prediction$Celestite - + predictors$Sr - predictors$Celestite) + return(dBa + dSr) +} + +validate_predictions <- function(predictors, prediction) { + epsilon <- 1E-7 + mb <- mass_balance(predictors, prediction) + msgm("Mass balance mean:", mean(mb)) + msgm("Mass balance variance:", var(mb)) + ret <- mb < epsilon + msgm("Rows where mass balance meets threshold", epsilon, ":", + sum(ret)) + return(ret) +} + +training_step <- function(model, predictor, target, validity) { + msgm("Starting incremental training:") + + ## x <- as.matrix(predictor) + ## y <- as.matrix(target[colnames(x)]) + + history <- model %>% keras3::fit(x = data.matrix(predictor), + y = data.matrix(target), + epochs = 10, verbose=1) + + keras3::save_model(model, + filepath = paste0(out_dir, "/current_model.keras"), + overwrite=TRUE) + return(model) +} diff --git a/src/Chemistry/WorkerFunctions.cpp b/src/Chemistry/WorkerFunctions.cpp index 0e74f0875..335132d0a 100644 --- a/src/Chemistry/WorkerFunctions.cpp +++ b/src/Chemistry/WorkerFunctions.cpp @@ -48,7 +48,7 @@ void poet::ChemistryModule::WorkerLoop() { case CHEM_FIELD_INIT: { ChemBCast(&this->prop_count, 1, MPI_UINT32_T); if (this->ai_surrogate_enabled) { - this->ai_surrogate_validity_vector.reserve(this->n_cells); + this->ai_surrogate_validity_vector.resize(this->n_cells); // resize statt reserve? } break; } @@ -152,8 +152,8 @@ void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status, // current simulation time ('age' of simulation) current_sim_time = mpi_buffer[count + 3]; - /* 4th double value is currently a placeholder */ - // placeholder = mpi_buffer[count+4]; + // current work package start location in field + wp_start_index = mpi_buffer[count + 4]; for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) { s_curr_wp.input[wp_i] = diff --git a/src/poet.cpp b/src/poet.cpp index 5322f3b17..a8cf92e9d 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -286,25 +286,36 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, std::to_string(chem.getField().GetRequestedVecSize()) + ")), TMP_PROPS)")); R.parseEval("predictors <- predictors[ai_surrogate_species]"); - // Predict + // Apply preprocessing + MSG("AI Preprocessing"); R.parseEval("predictors_scaled <- preprocess(predictors)"); - R.parseEval("prediction <- preprocess(prediction_step(model, predictors_scaled),\ - backtransform = TRUE,\ - outputs = TRUE)"); + // Predict + MSG("AI Predict"); + R.parseEval("aipreds_scaled <- prediction_step(model, predictors_scaled)"); + + // Apply postprocessing + MSG("AI Postprocesing"); + R.parseEval("aipreds <- postprocess(aipreds_scaled)"); // Validate prediction and write valid predictions to chem field - R.parseEval("validity_vector <- validate_predictions(predictors,\ - prediction)"); + MSG("AI Validate"); + R.parseEval("validity_vector <- validate_predictions(predictors, aipreds)"); + + MSG("AI Marking accepted"); chem.set_ai_surrogate_validity_vector(R.parseEval("validity_vector")); + MSG("AI TempField"); std::vector> RTempField = R.parseEval("set_valid_predictions(predictors,\ - prediction,\ + aipreds,\ validity_vector)"); + MSG("AI Set Field"); Field predictions_field = Field(R.parseEval("nrow(predictors)"), RTempField, - R.parseEval("names(predictors)")); + R.parseEval("colnames(predictors)")); + + MSG("AI Update"); chem.getField().update(predictions_field); double ai_end_t = MPI_Wtime(); R["ai_prediction_time"] = ai_end_t - ai_start_t; @@ -323,9 +334,10 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, R.parseEval("targets <- targets[ai_surrogate_species]"); // TODO: Check how to get the correct columns - R.parseEval("target_scaled <- preprocess(targets, outputs = TRUE)"); + R.parseEval("target_scaled <- preprocess(targets)"); - R.parseEval("training_step(model, predictors_scaled, target_scaled, validity_vector)"); + MSG("AI: incremental training"); + R.parseEval("model <- training_step(model, predictors_scaled, target_scaled, validity_vector)"); double ai_end_t = MPI_Wtime(); R["ai_training_time"] = ai_end_t - ai_start_t; } @@ -464,14 +476,14 @@ int main(int argc, char *argv[]) { const std::string ai_surrogate_input_script = init_list.getChemistryInit().ai_surrogate_input_script; - if (!ai_surrogate_input_script.empty()) { - /* Incorporate user defined ai surrogate input script */ - R.parseEvalQ(ai_surrogate_input_script); + if (!ai_surrogate_input_script_path.empty()) { + R["ai_surrogate_base_path"] = ai_surrogate_input_script_path.substr(0, ai_surrogate_input_script_path.find_last_of('/') + 1); - std::string ai_surrogate_base_path = R["ai_surrogate_base_path"]; - R["ai_surrogate_base_path"] = ai_surrogate_base_path.substr(0, ai_surrogate_base_path.find_last_of('/') + 1); + MSG("AI: sourcing user-provided script"); + R.parseEvalQ("source('" + ai_surrogate_input_script_path + "')"); } - R.parseEval("model <- initiate_model()"); + MSG("AI: initialize AI model"); + R.parseEval("model <- initiate_model()"); R.parseEval("gpu_info()"); } @@ -493,7 +505,7 @@ int main(int argc, char *argv[]) { string r_vis_code; r_vis_code = - "saveRDS(profiling, file=paste0(setup$out_dir,'/timings.rds'));"; + "saveRDS(profiling, file=paste0(setup$out_dir,'/timings.rds'));"; R.parseEval(r_vis_code); MSG("Done! Results are stored as R objects into <" + run_params.out_dir +