From b4b4d76c742583f6829d0ba6d5f22956c17fc2bd Mon Sep 17 00:00:00 2001 From: straile Date: Mon, 7 Oct 2024 09:50:42 +0200 Subject: [PATCH] Include Python.h --- CMakeLists.txt | 8 ++ R_lib/ai_surrogate_model.R | 75 ------------------- bench/barite/barite_200.R | 0 bench/barite/barite_50ai.R | 0 bench/barite/barite_50ai_rt.R | 0 bench/barite/barite_50ai_surr_mdl.R | 4 + src/CMakeLists.txt | 8 +- .../AI_Python_functions/keras_AI_surrogate.py | 21 ++++++ .../SurrogateModels/AI_functions.cpp | 24 ++++++ .../SurrogateModels/AI_functions.hpp | 24 ++++++ src/Init/ChemistryInit.cpp | 4 +- src/poet.cpp | 21 +++--- src/poet.hpp.in | 5 +- 13 files changed, 101 insertions(+), 93 deletions(-) delete mode 100644 R_lib/ai_surrogate_model.R mode change 100644 => 100755 bench/barite/barite_200.R mode change 100644 => 100755 bench/barite/barite_50ai.R mode change 100644 => 100755 bench/barite/barite_50ai_rt.R create mode 100644 src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py create mode 100644 src/Chemistry/SurrogateModels/AI_functions.cpp create mode 100644 src/Chemistry/SurrogateModels/AI_functions.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fa7f009a1..4408f7c75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,14 @@ find_package(MPI REQUIRED) find_package(RRuntime REQUIRED) +# make sure to use the python installation from the conda environment +if(DEFINED ENV{CONDA_PREFIX}) + set(Python3_EXECUTABLE "$ENV{CONDA_PREFIX}/bin/python3") +endif() +# Python is required to use the AI surrogate +#find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED) +find_package(Python3 COMPONENTS Development Interpreter REQUIRED) + add_subdirectory(src) option(POET_PREPROCESS_BENCHS "Preprocess benchmarks" ON) diff --git a/R_lib/ai_surrogate_model.R b/R_lib/ai_surrogate_model.R deleted file mode 100644 index 860ee0d83..000000000 --- a/R_lib/ai_surrogate_model.R +++ /dev/null @@ -1,75 +0,0 @@ -## This file contains default function implementations for the ai surrogate. -## To load pretrained models, use pre-/postprocessing or change hyperparameters -## it is recommended to override these functions with custom implementations via -## the input script. The path to the R-file containing the functions mus be set -## in the variable "ai_surrogate_input_script". See the barite_200.R file as an -## example and the general README for more information. - -require(keras3) -require(tensorflow) - -initiate_model <- function() { - hidden_layers <- c(48, 96, 24) - activation <- "relu" - loss <- "mean_squared_error" - - input_length <- length(ai_surrogate_species) - output_length <- length(ai_surrogate_species) - ## Creates a new sequential model from scratch - model <- keras_model_sequential() - - ## Input layer defined by input data shape - model %>% layer_dense(units = input_length, - activation = activation, - input_shape = input_length, - dtype = "float32") - - for (layer_size in hidden_layers) { - model %>% layer_dense(units = layer_size, - activation = activation, - dtype = "float32") - } - - ## Output data defined by output data shape - model %>% layer_dense(units = output_length, - activation = activation, - dtype = "float32") - - model %>% compile(loss = loss, - optimizer = "adam") - return(model) -} - -gpu_info <- function() { - msgm(tf_gpu_configured()) -} - -prediction_step <- function(model, predictors) { - prediction <- predict(model, as.matrix(predictors)) - colnames(prediction) <- colnames(predictors) - return(as.data.frame(prediction)) -} - -preprocess <- function(df, backtransform = FALSE, outputs = FALSE) { - return(df) -} - -postprocess <- function(df, backtransform = TRUE, outputs = TRUE) { - return(df) -} - -set_valid_predictions <- function(temp_field, prediction, validity) { - temp_field[validity == 1, ] <- prediction[validity == 1, ] - return(temp_field) -} - -training_step <- function(model, predictor, target, validity) { - msgm("Training:") - - x <- as.matrix(predictor) - y <- as.matrix(target[colnames(x)]) - - model %>% fit(x, y) - - model %>% save_model_tf(paste0(out_dir, "/current_model.keras")) -} diff --git a/bench/barite/barite_200.R b/bench/barite/barite_200.R old mode 100644 new mode 100755 diff --git a/bench/barite/barite_50ai.R b/bench/barite/barite_50ai.R old mode 100644 new mode 100755 diff --git a/bench/barite/barite_50ai_rt.R b/bench/barite/barite_50ai_rt.R old mode 100644 new mode 100755 diff --git a/bench/barite/barite_50ai_surr_mdl.R b/bench/barite/barite_50ai_surr_mdl.R index 237d5a0cd..a66f59d0b 100644 --- a/bench/barite/barite_50ai_surr_mdl.R +++ b/bench/barite/barite_50ai_surr_mdl.R @@ -3,6 +3,10 @@ ## load a pretrained model from tensorflow file ## Use the global variable "ai_surrogate_base_path" when using file paths ## relative to the input script + +model_file_path <- normalizePath(paste0(ai_surrogate_base_path, + "barite_50ai_all.keras")) + initiate_model <- function() { require(keras3) require(tensorflow) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4b07f355a..be8e029f8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,15 +16,17 @@ target_sources(POETLib Chemistry/SurrogateModels/HashFunctions.cpp Chemistry/SurrogateModels/InterpolationModule.cpp Chemistry/SurrogateModels/ProximityHashTable.cpp -) + Chemistry/SurrogateModels/AI_functions.cpp +) -target_include_directories(POETLib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") +target_include_directories(POETLib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" "${Python3_INCLUDE_DIRS}") target_link_libraries( POETLib PUBLIC RRuntime PUBLIC IPhreeqcPOET PUBLIC tug PUBLIC MPI::MPI_C + PUBLIC "${Python3_LIBRARIES}" ) include(FetchContent) @@ -80,10 +82,10 @@ target_compile_definitions(POETLib PUBLIC STRICT_R_HEADERS OMPI_SKIP_MPICXX) file(READ "${PROJECT_SOURCE_DIR}/R_lib/kin_r_library.R" R_KIN_LIB ) file(READ "${PROJECT_SOURCE_DIR}/R_lib/init_r_lib.R" R_INIT_LIB) -file(READ "${PROJECT_SOURCE_DIR}/R_lib/ai_surrogate_model.R" R_AI_SURROGATE_LIB) configure_file(poet.hpp.in poet.hpp @ONLY) + add_executable(poet poet.cpp) target_link_libraries(poet PRIVATE POETLib MPI::MPI_C RRuntime CLI11::CLI11) target_include_directories(poet PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") diff --git a/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py b/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py new file mode 100644 index 000000000..25279c0b0 --- /dev/null +++ b/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py @@ -0,0 +1,21 @@ +import tensorflow as tf +import os + +def initiate_model(model_file_path): + print(model_file_path, flush=True) + #model = tf.keras.load_model(model_file_path) + #return model + return + +def training_step(model, x, y, x_val, y_val, batch_size, epochs): + epochs = 2000 # This is a constant parameter during all experiments + history = model.fit(x, y, + epochs=epochs, + batch_size=batch_size, + validation_data=(x_val, y_val)) + print(history, flush=True) + return history["val_loss"] + +def prediction_step(model, x, batch_size): + prediction = model.predict(x, batch_size) + return prediction \ No newline at end of file diff --git a/src/Chemistry/SurrogateModels/AI_functions.cpp b/src/Chemistry/SurrogateModels/AI_functions.cpp new file mode 100644 index 000000000..9de457274 --- /dev/null +++ b/src/Chemistry/SurrogateModels/AI_functions.cpp @@ -0,0 +1,24 @@ +#include "AI_functions.hpp" +#include +#include +#include +#include + +using namespace std; + +namespace poet { + +void Python_Keras_setup(std::string SRC_DIR, std::string model_file_path) { + Py_Initialize(); // Initialize the Python interpreter + std::cout << SRC_DIR + + "/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py" + << std::endl; + std::string python_keras_file = SRC_DIR + "/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py"; + FILE* fp = fopen(python_keras_file.c_str(), "r"); + PyRun_SimpleFile(fp, python_keras_file.c_str()); + PyRun_SimpleString(("model = initiate_model(\"" + model_file_path + "\")").c_str()); + PyErr_Print(); // Ensure that python errors make it to stdout + fclose(fp); +} + +} \ No newline at end of file diff --git a/src/Chemistry/SurrogateModels/AI_functions.hpp b/src/Chemistry/SurrogateModels/AI_functions.hpp new file mode 100644 index 000000000..2b1389122 --- /dev/null +++ b/src/Chemistry/SurrogateModels/AI_functions.hpp @@ -0,0 +1,24 @@ +/** + * @file AI_functions.hpp + * @author Hans Straile (straile@uni-potsdam.de) + * @brief API for the AI/Machine Learning based chemistry surrogate model with functions to initialize a neural network and use it for training and inference via Keras for Python . + * @version 0.1 + * @date 01 Nov 2024 + * + * This file implements the creation of a DHT by using the MPI + * one-sided-communication. There is also the possibility to write or read data + * from or to the DHT. In addition, the current state of the DHT can be written + * to a file and read in again later. + */ + +#ifndef AI_FUNCTIONS_H +#define AI_FUNCTIONS_H + +#include + +namespace poet { + +void Python_Keras_setup(std::string SRC_DIR, std::string model_file_path); + +} // namespace poet +#endif // AI_FUNCTIONS_HPP \ No newline at end of file diff --git a/src/Init/ChemistryInit.cpp b/src/Init/ChemistryInit.cpp index 3c7a9871c..ebfb3de67 100644 --- a/src/Init/ChemistryInit.cpp +++ b/src/Init/ChemistryInit.cpp @@ -51,8 +51,8 @@ void InitialList::initChemistry(const Rcpp::List &chem) { // Get base path ai_surrogate_input_script_path = ai_surrogate_input_script_path.substr(0, ai_surrogate_input_script_path.find_last_of('/') + 1); // Add the filepath as a global variable in R to enable relative filepaths in the R script - fileContent += "\nai_surrogate_base_path <- \"" + ai_surrogate_input_script_path + "\""; - + fileContent.insert(0, "ai_surrogate_base_path <- \"" + ai_surrogate_input_script_path + "\"\n"); + this->ai_surrogate_input_script = fileContent; } diff --git a/src/poet.cpp b/src/poet.cpp index 9fbf94c18..2d2573a35 100644 --- a/src/poet.cpp +++ b/src/poet.cpp @@ -28,6 +28,7 @@ #include "DataStructures/Field.hpp" #include "Init/InitialList.hpp" #include "Transport/DiffusionModule.hpp" +#include "Chemistry/SurrogateModels/AI_functions.hpp" #include #include @@ -299,6 +300,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms, chem.getField().update(diffusion.getField()); MSG("Chemistry step"); + Python_Keras_setup(SRC_DIR, R["model_file_path"]); if (params.use_ai_surrogate) { double ai_start_t = MPI_Wtime(); // Save current values from the tug field as predictor for the ai step @@ -548,9 +550,14 @@ int main(int argc, char *argv[]) { R["out_ext"] = run_params.out_ext; R["out_dir"] = run_params.out_dir; + // MPI_Barrier(MPI_COMM_WORLD); + + DiffusionModule diffusion(init_list.getDiffusionInit(), + init_list.getInitialGrid()); + + chemistry.masterSetField(init_list.getInitialGrid()); + if (run_params.use_ai_surrogate) { - /* Incorporate ai surrogate from R */ - R.parseEvalQ(ai_surrogate_r_library); /* Use dht species for model input and output */ R["ai_surrogate_species"] = init_list.getChemistryInit().dht_species.getNames(); @@ -562,19 +569,11 @@ int main(int argc, char *argv[]) { R.parseEvalQ(ai_surrogate_input_script); MSG("AI: initialize AI model"); - R.parseEval("model <- initiate_model()"); - R.parseEval("gpu_info()"); + //R.parseEval("model <- initiate_model()"); } MSG("Init done on process with rank " + std::to_string(MY_RANK)); - // MPI_Barrier(MPI_COMM_WORLD); - - DiffusionModule diffusion(init_list.getDiffusionInit(), - init_list.getInitialGrid()); - - chemistry.masterSetField(init_list.getInitialGrid()); - Rcpp::List profiling = RunMasterLoop(R, run_params, diffusion, chemistry); MSG("finished simulation loop"); diff --git a/src/poet.hpp.in b/src/poet.hpp.in index 0e2409f87..1479bc92f 100644 --- a/src/poet.hpp.in +++ b/src/poet.hpp.in @@ -29,14 +29,15 @@ #include +#define SRC_DIR "@CMAKE_SOURCE_DIR@" + static const char *poet_version = "@POET_VERSION@"; // using the Raw string literal to avoid escaping the quotes static const inline std::string kin_r_library = R"(@R_KIN_LIB@)"; - -static const inline std::string init_r_library = R"(@R_INIT_LIB@)"; static const inline std::string ai_surrogate_r_library = R"(@R_AI_SURROGATE_LIB@)"; +static const inline std::string init_r_library = R"(@R_INIT_LIB@)"; static const inline std::string r_runtime_parameters = "mysetup"; struct RuntimeParameters {