Include Python.h

This commit is contained in:
straile 2024-10-07 09:50:42 +02:00
parent eec7123001
commit b4b4d76c74
13 changed files with 101 additions and 93 deletions

View File

@ -19,6 +19,14 @@ find_package(MPI REQUIRED)
find_package(RRuntime REQUIRED)
# make sure to use the python installation from the conda environment
if(DEFINED ENV{CONDA_PREFIX})
set(Python3_EXECUTABLE "$ENV{CONDA_PREFIX}/bin/python3")
endif()
# Python is required to use the AI surrogate
#find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED)
find_package(Python3 COMPONENTS Development Interpreter REQUIRED)
add_subdirectory(src)
option(POET_PREPROCESS_BENCHS "Preprocess benchmarks" ON)

View File

@ -1,75 +0,0 @@
## This file contains default function implementations for the ai surrogate.
## To load pretrained models, use pre-/postprocessing or change hyperparameters
## it is recommended to override these functions with custom implementations via
## the input script. The path to the R-file containing the functions mus be set
## in the variable "ai_surrogate_input_script". See the barite_200.R file as an
## example and the general README for more information.
require(keras3)
require(tensorflow)
initiate_model <- function() {
hidden_layers <- c(48, 96, 24)
activation <- "relu"
loss <- "mean_squared_error"
input_length <- length(ai_surrogate_species)
output_length <- length(ai_surrogate_species)
## Creates a new sequential model from scratch
model <- keras_model_sequential()
## Input layer defined by input data shape
model %>% layer_dense(units = input_length,
activation = activation,
input_shape = input_length,
dtype = "float32")
for (layer_size in hidden_layers) {
model %>% layer_dense(units = layer_size,
activation = activation,
dtype = "float32")
}
## Output data defined by output data shape
model %>% layer_dense(units = output_length,
activation = activation,
dtype = "float32")
model %>% compile(loss = loss,
optimizer = "adam")
return(model)
}
gpu_info <- function() {
msgm(tf_gpu_configured())
}
prediction_step <- function(model, predictors) {
prediction <- predict(model, as.matrix(predictors))
colnames(prediction) <- colnames(predictors)
return(as.data.frame(prediction))
}
preprocess <- function(df, backtransform = FALSE, outputs = FALSE) {
return(df)
}
postprocess <- function(df, backtransform = TRUE, outputs = TRUE) {
return(df)
}
set_valid_predictions <- function(temp_field, prediction, validity) {
temp_field[validity == 1, ] <- prediction[validity == 1, ]
return(temp_field)
}
training_step <- function(model, predictor, target, validity) {
msgm("Training:")
x <- as.matrix(predictor)
y <- as.matrix(target[colnames(x)])
model %>% fit(x, y)
model %>% save_model_tf(paste0(out_dir, "/current_model.keras"))
}

0
bench/barite/barite_200.R Normal file → Executable file
View File

0
bench/barite/barite_50ai.R Normal file → Executable file
View File

0
bench/barite/barite_50ai_rt.R Normal file → Executable file
View File

View File

@ -3,6 +3,10 @@
## load a pretrained model from tensorflow file
## Use the global variable "ai_surrogate_base_path" when using file paths
## relative to the input script
model_file_path <- normalizePath(paste0(ai_surrogate_base_path,
"barite_50ai_all.keras"))
initiate_model <- function() {
require(keras3)
require(tensorflow)

View File

@ -16,15 +16,17 @@ target_sources(POETLib
Chemistry/SurrogateModels/HashFunctions.cpp
Chemistry/SurrogateModels/InterpolationModule.cpp
Chemistry/SurrogateModels/ProximityHashTable.cpp
)
Chemistry/SurrogateModels/AI_functions.cpp
)
target_include_directories(POETLib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
target_include_directories(POETLib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" "${Python3_INCLUDE_DIRS}")
target_link_libraries(
POETLib
PUBLIC RRuntime
PUBLIC IPhreeqcPOET
PUBLIC tug
PUBLIC MPI::MPI_C
PUBLIC "${Python3_LIBRARIES}"
)
include(FetchContent)
@ -80,10 +82,10 @@ target_compile_definitions(POETLib PUBLIC STRICT_R_HEADERS OMPI_SKIP_MPICXX)
file(READ "${PROJECT_SOURCE_DIR}/R_lib/kin_r_library.R" R_KIN_LIB )
file(READ "${PROJECT_SOURCE_DIR}/R_lib/init_r_lib.R" R_INIT_LIB)
file(READ "${PROJECT_SOURCE_DIR}/R_lib/ai_surrogate_model.R" R_AI_SURROGATE_LIB)
configure_file(poet.hpp.in poet.hpp @ONLY)
add_executable(poet poet.cpp)
target_link_libraries(poet PRIVATE POETLib MPI::MPI_C RRuntime CLI11::CLI11)
target_include_directories(poet PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")

View File

@ -0,0 +1,21 @@
import tensorflow as tf
import os
def initiate_model(model_file_path):
print(model_file_path, flush=True)
#model = tf.keras.load_model(model_file_path)
#return model
return
def training_step(model, x, y, x_val, y_val, batch_size, epochs):
epochs = 2000 # This is a constant parameter during all experiments
history = model.fit(x, y,
epochs=epochs,
batch_size=batch_size,
validation_data=(x_val, y_val))
print(history, flush=True)
return history["val_loss"]
def prediction_step(model, x, batch_size):
prediction = model.predict(x, batch_size)
return prediction

View File

@ -0,0 +1,24 @@
#include "AI_functions.hpp"
#include <iostream>
#include <string>
#include <cstring>
#include <Python.h>
using namespace std;
namespace poet {
void Python_Keras_setup(std::string SRC_DIR, std::string model_file_path) {
Py_Initialize(); // Initialize the Python interpreter
std::cout << SRC_DIR +
"/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py"
<< std::endl;
std::string python_keras_file = SRC_DIR + "/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py";
FILE* fp = fopen(python_keras_file.c_str(), "r");
PyRun_SimpleFile(fp, python_keras_file.c_str());
PyRun_SimpleString(("model = initiate_model(\"" + model_file_path + "\")").c_str());
PyErr_Print(); // Ensure that python errors make it to stdout
fclose(fp);
}
}

View File

@ -0,0 +1,24 @@
/**
* @file AI_functions.hpp
* @author Hans Straile (straile@uni-potsdam.de)
* @brief API for the AI/Machine Learning based chemistry surrogate model with functions to initialize a neural network and use it for training and inference via Keras for Python .
* @version 0.1
* @date 01 Nov 2024
*
* This file implements the creation of a DHT by using the MPI
* one-sided-communication. There is also the possibility to write or read data
* from or to the DHT. In addition, the current state of the DHT can be written
* to a file and read in again later.
*/
#ifndef AI_FUNCTIONS_H
#define AI_FUNCTIONS_H
#include <string>
namespace poet {
void Python_Keras_setup(std::string SRC_DIR, std::string model_file_path);
} // namespace poet
#endif // AI_FUNCTIONS_HPP

View File

@ -51,8 +51,8 @@ void InitialList::initChemistry(const Rcpp::List &chem) {
// Get base path
ai_surrogate_input_script_path = ai_surrogate_input_script_path.substr(0, ai_surrogate_input_script_path.find_last_of('/') + 1);
// Add the filepath as a global variable in R to enable relative filepaths in the R script
fileContent += "\nai_surrogate_base_path <- \"" + ai_surrogate_input_script_path + "\"";
fileContent.insert(0, "ai_surrogate_base_path <- \"" + ai_surrogate_input_script_path + "\"\n");
this->ai_surrogate_input_script = fileContent;
}

View File

@ -28,6 +28,7 @@
#include "DataStructures/Field.hpp"
#include "Init/InitialList.hpp"
#include "Transport/DiffusionModule.hpp"
#include "Chemistry/SurrogateModels/AI_functions.hpp"
#include <RInside.h>
#include <Rcpp.h>
@ -299,6 +300,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters &params,
chem.getField().update(diffusion.getField());
MSG("Chemistry step");
Python_Keras_setup(SRC_DIR, R["model_file_path"]);
if (params.use_ai_surrogate) {
double ai_start_t = MPI_Wtime();
// Save current values from the tug field as predictor for the ai step
@ -548,9 +550,14 @@ int main(int argc, char *argv[]) {
R["out_ext"] = run_params.out_ext;
R["out_dir"] = run_params.out_dir;
// MPI_Barrier(MPI_COMM_WORLD);
DiffusionModule diffusion(init_list.getDiffusionInit(),
init_list.getInitialGrid());
chemistry.masterSetField(init_list.getInitialGrid());
if (run_params.use_ai_surrogate) {
/* Incorporate ai surrogate from R */
R.parseEvalQ(ai_surrogate_r_library);
/* Use dht species for model input and output */
R["ai_surrogate_species"] =
init_list.getChemistryInit().dht_species.getNames();
@ -562,19 +569,11 @@ int main(int argc, char *argv[]) {
R.parseEvalQ(ai_surrogate_input_script);
MSG("AI: initialize AI model");
R.parseEval("model <- initiate_model()");
R.parseEval("gpu_info()");
//R.parseEval("model <- initiate_model()");
}
MSG("Init done on process with rank " + std::to_string(MY_RANK));
// MPI_Barrier(MPI_COMM_WORLD);
DiffusionModule diffusion(init_list.getDiffusionInit(),
init_list.getInitialGrid());
chemistry.masterSetField(init_list.getInitialGrid());
Rcpp::List profiling = RunMasterLoop(R, run_params, diffusion, chemistry);
MSG("finished simulation loop");

View File

@ -29,14 +29,15 @@
#include <Rcpp.h>
#define SRC_DIR "@CMAKE_SOURCE_DIR@"
static const char *poet_version = "@POET_VERSION@";
// using the Raw string literal to avoid escaping the quotes
static const inline std::string kin_r_library = R"(@R_KIN_LIB@)";
static const inline std::string init_r_library = R"(@R_INIT_LIB@)";
static const inline std::string ai_surrogate_r_library =
R"(@R_AI_SURROGATE_LIB@)";
static const inline std::string init_r_library = R"(@R_INIT_LIB@)";
static const inline std::string r_runtime_parameters = "mysetup";
struct RuntimeParameters {