mirror of
https://git.gfz-potsdam.de/naaice/poet.git
synced 2025-12-16 04:48:23 +01:00
prototype naa online training
This commit is contained in:
parent
d6d6db8b93
commit
ef77d755ab
@ -18,6 +18,9 @@ target_sources(POETLib
|
||||
Chemistry/SurrogateModels/ProximityHashTable.cpp
|
||||
)
|
||||
|
||||
# add_compile_options(-fsanitize=address)
|
||||
# add_link_options(-fsanitize=address)
|
||||
|
||||
target_include_directories(
|
||||
POETLib PUBLIC
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
@ -45,14 +48,10 @@ if(USE_AI_SURROGATE)
|
||||
find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
set_source_files_properties(
|
||||
Chemistry/SurrogateModels/AI_functions.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-O3 -march=native -mtune=native"
|
||||
)
|
||||
|
||||
target_sources(POETLib
|
||||
PRIVATE
|
||||
Chemistry/SurrogateModels/serializer.cpp
|
||||
Chemistry/SurrogateModels/AI_functions.cpp
|
||||
)
|
||||
|
||||
@ -64,6 +63,7 @@ if(USE_AI_SURROGATE)
|
||||
target_link_libraries(POETLib
|
||||
PUBLIC "${Python3_LIBRARIES}"
|
||||
PUBLIC Threads::Threads pthread
|
||||
PUBLIC naaice::middleware
|
||||
)
|
||||
endif() # USE_AI_SURROGATE
|
||||
|
||||
@ -139,7 +139,7 @@ configure_file(poet.hpp.in poet.hpp @ONLY)
|
||||
|
||||
|
||||
add_executable(poet poet.cpp)
|
||||
target_link_libraries(poet PRIVATE POETLib MPI::MPI_C RRuntime CLI11::CLI11 naaice::middleware)
|
||||
target_link_libraries(poet PRIVATE POETLib MPI::MPI_C RRuntime CLI11::CLI11)
|
||||
target_include_directories(poet PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
add_executable(poet_init initializer.cpp)
|
||||
|
||||
@ -83,7 +83,9 @@ public:
|
||||
std::uint32_t interp_bucket_size;
|
||||
std::uint32_t interp_size_mb;
|
||||
std::uint32_t interp_min_entries;
|
||||
// ? extra struct for AI surrogate options?
|
||||
bool ai_surrogate_enabled;
|
||||
bool naa_enabled;
|
||||
};
|
||||
|
||||
void masterEnableSurrogates(const SurrogateSetup &setup) {
|
||||
|
||||
@ -478,7 +478,7 @@ void Python_Keras_train(std::vector<std::vector<double>> &x,
|
||||
}
|
||||
}
|
||||
|
||||
// Choose the correct model to traimn if clustering is used
|
||||
// Choose the correct model to train if clustering is used
|
||||
if (train_cluster == 1) {
|
||||
if (!model_path.empty()) {
|
||||
model_path.insert(model_path.length() - 6, "_reaction");
|
||||
@ -637,6 +637,7 @@ void parallel_training(EigenModel *Eigen_model,
|
||||
|
||||
// Acquire the Python GIL
|
||||
PyGILState_STATE gstate = PyGILState_Ensure();
|
||||
|
||||
// Start training
|
||||
Python_Keras_train(inputs, targets, train_cluster, model_name, params);
|
||||
|
||||
@ -669,17 +670,12 @@ void naa_training(EigenModel *Eigen_model, EigenModel *Eigen_model_reactive,
|
||||
bool *start_training, bool *end_training,
|
||||
const RuntimeParameters ¶ms, naa_handle *handle){
|
||||
|
||||
fprintf(stdout, "In naa_training\n");
|
||||
|
||||
|
||||
// initialize models with weights from pretrained keras model
|
||||
// declare memory regions for model weights, training and target data
|
||||
|
||||
// Initialize training data input and targets
|
||||
std::vector<std::vector<double>> inputs(
|
||||
training_data_buffer->x.size(),
|
||||
std::vector<double>(params.training_data_size));
|
||||
std::vector<std::vector<double>> targets(
|
||||
training_data_buffer->x.size(),
|
||||
std::vector<double>(params.training_data_size));
|
||||
|
||||
PyGILState_STATE gstate = PyGILState_Ensure();
|
||||
Eigen_model_mutex->lock();
|
||||
|
||||
@ -696,7 +692,15 @@ void naa_training(EigenModel *Eigen_model, EigenModel *Eigen_model_reactive,
|
||||
Eigen_model_mutex->unlock();
|
||||
PyGILState_Release(gstate);
|
||||
|
||||
// determine size for reuired memory regions
|
||||
// Initialize training data input and targets
|
||||
std::vector<std::vector<double>> inputs(
|
||||
Eigen_model->biases[Eigen_model->biases.size()-1].size(), // number of species
|
||||
std::vector<double>(params.training_data_size));
|
||||
std::vector<std::vector<double>> targets(
|
||||
Eigen_model->biases[Eigen_model->biases.size()-1].size(),
|
||||
std::vector<double>(params.training_data_size));
|
||||
|
||||
// determine size for required memory regions
|
||||
size_t modelSize = calculateStructSize(Eigen_model, 'E');
|
||||
size_t modelSizeReactive = calculateStructSize(Eigen_model_reactive, 'E');
|
||||
|
||||
@ -723,14 +727,16 @@ void naa_training(EigenModel *Eigen_model, EigenModel *Eigen_model_reactive,
|
||||
}
|
||||
|
||||
// create memory regions
|
||||
struct naa_param_t weight_region[] = {
|
||||
struct naa_param_t input_regions[] = {
|
||||
{(void *)serializedModel, modelSize},
|
||||
{(void *)serializedTrainingData, trainingDataSize},
|
||||
{(void *)serializedTargetData, targetDataSize}};
|
||||
|
||||
struct naa_param_t output_regions[] = {{(void *)serializedModel, modelSize}};
|
||||
|
||||
printf("-- Setting Up Connection --\n");
|
||||
// function code encode the used ai model
|
||||
if (naa_create(1, weight_region, 1, weight_region, 0, handle)) {
|
||||
if (naa_create(1, input_regions, 3, output_regions, 1, handle)) {
|
||||
fprintf(stderr, "Error during naa_create. Exiting.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
@ -826,31 +832,101 @@ void naa_training(EigenModel *Eigen_model, EigenModel *Eigen_model_reactive,
|
||||
} else {
|
||||
int res = serializeModelWeights(Eigen_model, serializedModel);
|
||||
}
|
||||
|
||||
// checksum serializeModel
|
||||
double checksum_model = 0;
|
||||
for(size_t i = 0; i < Eigen_model->weight_matrices.size(); i++){
|
||||
checksum_model += Eigen_model->weight_matrices[i].sum();
|
||||
}
|
||||
for(size_t j=0; j<Eigen_model->biases.size(); j++){
|
||||
checksum_model += Eigen_model->biases[j].sum();
|
||||
}
|
||||
|
||||
fprintf(stdout, "Checksum model: %f\n", checksum_model);
|
||||
|
||||
int res1 = serializeTrainingData(&inputs, serializedTrainingData);
|
||||
int res2 = serializeTrainingData(&targets, serializedTargetData);
|
||||
// std::vector<std::vector<double>> deserializedTrainingData = deserializeTrainingData(serializedTrainingData);
|
||||
// std::vector<std::vector<double>> deserializedTargetData = deserializeTrainingData(serializedTargetData);
|
||||
|
||||
|
||||
// calculate checksum of inputs
|
||||
// double checksum_inputs = 0;
|
||||
// for (size_t i = 0; i < inputs.size(); i++) {
|
||||
// for (size_t j = 0; j < inputs[i].size(); j++) {
|
||||
// checksum_inputs += inputs[i][j];
|
||||
// // fprintf(stdout, "inputs: %f\n", inputs[i][j]);
|
||||
// }
|
||||
// }
|
||||
|
||||
printf("-- RPC Invocation --\n");
|
||||
if (naa_invoke(handle)) {
|
||||
fprintf(stderr, "Error during naa_invoke. Exiting.\n");
|
||||
// // calculate checksum of inputs
|
||||
// double checksum_targets = 0;
|
||||
// for (size_t i = 0; i < targets.size(); i++) {
|
||||
// for (size_t j = 0; j < targets[i].size(); j++) {
|
||||
// checksum_targets += targets[i][j];
|
||||
// // fprintf(stdout, "inputs: %f\n", inputs[i][j]);
|
||||
// }
|
||||
// }
|
||||
|
||||
// double checksum_training = 0;
|
||||
// for (size_t i = 0; i < deserializedTrainingData.size(); i++) {
|
||||
// for (size_t j = 0; j < deserializedTrainingData[i].size(); j++) {
|
||||
// checksum_training += deserializedTrainingData[i][j];
|
||||
// // fprintf(stdout, "inputs: %f\n", deserializedTrainingData[i][j]);
|
||||
// }
|
||||
// }
|
||||
|
||||
// double checksum_testing = 0;
|
||||
// for (size_t i = 0; i < deserializedTargetData.size(); i++) {
|
||||
// for (size_t j = 0; j < deserializedTargetData[i].size(); j++) {
|
||||
// checksum_testing += deserializedTargetData[i][j];
|
||||
// // fprintf(stdout, "inputs: %f\n", deserializedTrainingData[i][j]);
|
||||
// }
|
||||
// }
|
||||
|
||||
// fprintf(stdout, "Checksum inputs: %f\n", checksum_inputs);
|
||||
// fprintf(stdout, "Checksum training: %f\n", checksum_training);
|
||||
// fprintf(stdout, "Checksum targets: %f\n", checksum_targets);
|
||||
// fprintf(stdout, "Checksum testing: %f\n", checksum_testing);
|
||||
|
||||
printf("-- RPC Invocation --\n");
|
||||
if (naa_invoke(handle)) {
|
||||
fprintf(stderr, "Error during naa_invoke. Exiting.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// naa_wait with new weights
|
||||
naa_status status;
|
||||
if (naa_wait(handle, &status)) {
|
||||
fprintf(stderr, "Error occurred during naa_wait. Exiting.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// TODO: naa_wait with new weights
|
||||
|
||||
// TODO: update model weights with received weights
|
||||
printf("Bytes received: %d, RPC Return code: %d\n",
|
||||
status.bytes_received, status.naa_error);
|
||||
|
||||
// update model weights with received weights
|
||||
EigenModel deserializedModel =
|
||||
deserializeModelWeights(serializedModel, modelSize);
|
||||
fprintf(stdout, "After deserialization: %f\n",
|
||||
deserializedModel.weight_matrices[0](0, 0));
|
||||
|
||||
for (int i = 0; i < Eigen_model->weight_matrices[0].rows(); i++) {
|
||||
for (int j = 0; j < Eigen_model->weight_matrices[0].cols(); j++) {
|
||||
fprintf(stdout, "model: %f, deserializedModel: %f\n",
|
||||
Eigen_model->weight_matrices[0](i, j),
|
||||
deserializedModel.weight_matrices[0](i, j));
|
||||
}
|
||||
}
|
||||
Eigen_model_mutex->lock();
|
||||
|
||||
Eigen_model->weight_matrices = deserializedModel.weight_matrices;
|
||||
Eigen_model->biases = deserializedModel.biases;
|
||||
|
||||
Eigen_model_mutex->unlock();
|
||||
|
||||
|
||||
|
||||
// for (int i = 0; i < Eigen_model->weight_matrices[0].rows(); i++) {
|
||||
// for (int j = 0; j < Eigen_model->weight_matrices[0].cols(); j++) {
|
||||
// fprintf(stdout, "model: %f, deserializedModel: %f\n",
|
||||
// Eigen_model->weight_matrices[0](i, j),
|
||||
// deserializedModel.weight_matrices[0](i, j));
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
printf("-- Cleaning Up --\n");
|
||||
@ -917,7 +993,7 @@ int Python_Keras_training_thread(
|
||||
* @param weights Vector of model weights from keras as returned by
|
||||
* Python_Keras_get_weights()
|
||||
*/
|
||||
// ? check if updating was succesful -> hash about values?
|
||||
// ? check if updating was successful -> hash about values?
|
||||
void update_weights(
|
||||
EigenModel *model,
|
||||
const std::vector<std::vector<std::vector<double>>> &weights) {
|
||||
|
||||
@ -35,7 +35,7 @@ size_t calculateStructSize(void *struct_pointer, char type){
|
||||
}
|
||||
} else if (type == 'T') {
|
||||
struct_size += sizeof(size_t); // number of vectors
|
||||
struct_size += sizeof(size_t); // length of vector
|
||||
struct_size += static_cast<std::vector<std::vector<double>>*>(struct_pointer)->size() * sizeof(size_t); // length of vector
|
||||
for (const std::vector<double> &vector:
|
||||
*static_cast<std::vector<std::vector<double>>*>(struct_pointer)){
|
||||
struct_size += vector.size() * sizeof(double);
|
||||
@ -56,7 +56,6 @@ int serializeModelWeights(const EigenModel *model, char *memory){
|
||||
size_counter += sizeof(size_t);
|
||||
for (const Eigen::MatrixXd &matrix : model->weight_matrices) {
|
||||
size_t rows = matrix.rows(), cols = matrix.cols();
|
||||
fprintf(stdout, "rows: %zu, cols: %zu\n", rows, cols);
|
||||
std::memcpy(memory, &rows, sizeof(size_t));
|
||||
memory += sizeof(size_t);
|
||||
size_counter += sizeof(size_t);
|
||||
@ -125,9 +124,8 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
}
|
||||
|
||||
// interpret memory as Eigen::MatrixXd (more efficient than memcpy?)
|
||||
Eigen::MatrixXd temp = Eigen::Map<Eigen::MatrixXd>(
|
||||
matrix = Eigen::Map<Eigen::MatrixXd>(
|
||||
reinterpret_cast<double*>(memory), rows, cols);
|
||||
matrix = temp; // copy data to matrix
|
||||
memory += rows * cols * sizeof(double);
|
||||
size_counter += rows * cols * sizeof(double);
|
||||
}
|
||||
@ -161,7 +159,6 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
}
|
||||
|
||||
// same procedure as for the matrices
|
||||
// TODO: delete temp variable
|
||||
Eigen::VectorXd temp = Eigen::Map<Eigen::VectorXd>(
|
||||
reinterpret_cast<double*>(memory), size);
|
||||
bias = temp; // Kopieren der Daten
|
||||
@ -173,14 +170,14 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
}
|
||||
|
||||
|
||||
int serializeTrainingData(std::vector<std::vector<double>> data, char *memory){
|
||||
int serializeTrainingData(std::vector<std::vector<double>> *data, char *memory){
|
||||
|
||||
size_t num_vectors = data.size();
|
||||
size_t num_vectors = data->size();
|
||||
|
||||
std::memcpy(memory, &num_vectors, sizeof(size_t));
|
||||
memory += sizeof(size_t);
|
||||
|
||||
for (const std::vector<double> &vector : data) {
|
||||
for (const std::vector<double> &vector : *data) {
|
||||
size_t size = vector.size();
|
||||
std::memcpy(memory, &size, sizeof(size_t));
|
||||
memory += sizeof(size_t);
|
||||
@ -196,6 +193,7 @@ std::vector<std::vector<double>> deserializeTrainingData(char* data){
|
||||
std::vector<std::vector<double>> deserialized_data;
|
||||
size_t num_vectors;
|
||||
std::memcpy(&num_vectors, data, sizeof(size_t));
|
||||
fprintf(stdout, "num_vectors: %zu\n", num_vectors);
|
||||
data += sizeof(size_t);
|
||||
|
||||
for (size_t i = 0; i < num_vectors; i++) {
|
||||
@ -205,6 +203,7 @@ std::vector<std::vector<double>> deserializeTrainingData(char* data){
|
||||
|
||||
std::vector<double> vector(size);
|
||||
std::memcpy(vector.data(), data, size * sizeof(double));
|
||||
|
||||
data += size * sizeof(double);
|
||||
|
||||
deserialized_data.push_back(vector);
|
||||
|
||||
39
src/poet.cpp
39
src/poet.cpp
@ -33,6 +33,7 @@
|
||||
#include <Rcpp/DataFrame.h>
|
||||
#include <Rcpp/Function.h>
|
||||
#include <Rcpp/vector/instantiation.h>
|
||||
#include <csignal>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
@ -46,9 +47,7 @@
|
||||
#include <vector>
|
||||
#include <stdio.h>
|
||||
|
||||
extern "C"{
|
||||
#include <naaice.h>
|
||||
}
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace poet;
|
||||
@ -77,6 +76,11 @@ static void init_global_functions(RInside &R) {
|
||||
SaveRObj_R = DEFunc("SaveRObj");
|
||||
}
|
||||
|
||||
void signalHandler(int signum) {
|
||||
fprintf(stderr, "Terminate program by user.\n");
|
||||
exit(signum);
|
||||
}
|
||||
|
||||
|
||||
// HACK: this is a step back as the order and also the count of fields is
|
||||
// predefined, but it will change in the future
|
||||
@ -310,18 +314,30 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms,
|
||||
std::vector<int> cluster_labels;
|
||||
if (params.use_ai_surrogate) {
|
||||
MSG("AI: Initialize model");
|
||||
struct naa_handle *handle = NULL;
|
||||
if(params.use_naa){
|
||||
handle = (naa_handle*) calloc(1, sizeof(struct naa_handle));
|
||||
if(!handle){
|
||||
throw std::runtime_error("Could not allocate memory for NAA handle");
|
||||
}
|
||||
|
||||
// TODO: create regions here (size could be determined from run_params (size of trainings buffer))
|
||||
struct naa_param_t *model_weights;
|
||||
}
|
||||
|
||||
// Initiate two models from one file
|
||||
Python_Keras_load_model(R["model_file_path"], R["model_reactive_file_path"],
|
||||
params.use_clustering);
|
||||
|
||||
if (!params.disable_training) {
|
||||
MSG("AI: Initialize training thread");
|
||||
// TODO add naa_handle as optional parameter which is NULL per default
|
||||
|
||||
Python_Keras_training_thread(&Eigen_model, &Eigen_model_reactive,
|
||||
&Eigen_model_mutex, &training_data_buffer,
|
||||
&training_data_buffer_mutex,
|
||||
&training_data_buffer_full, &start_training,
|
||||
&end_training, params);
|
||||
&end_training, params, handle);
|
||||
}
|
||||
if (!params.use_Keras_predictions) {
|
||||
// Initialize Eigen model for custom inference function
|
||||
@ -390,12 +406,11 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms,
|
||||
R["TMP"] = Rcpp::wrap(chem.getField().AsVector());
|
||||
R.parseEval(std::string("predictors <- ") +
|
||||
"set_field(TMP, TMP_PROPS, field_nrow, ai_surrogate_species)");
|
||||
|
||||
// Apply preprocessing
|
||||
MSG("AI Preprocessing");
|
||||
R.parseEval("predictors_scaled <- preprocess(predictors)");
|
||||
std::vector<std::vector<double>> predictors_scaled = R["predictors_scaled"];
|
||||
|
||||
|
||||
// Get K-Means cluster assignements based on the preprocessed data
|
||||
if (params.use_clustering) {
|
||||
R.parseEval("cluster_labels <- assign_clusters(predictors_scaled)");
|
||||
@ -425,6 +440,7 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms,
|
||||
// Validate prediction and write valid predictions to chem field
|
||||
MSG("AI: Validate");
|
||||
R.parseEval("validity_vector <- validate_predictions(predictors, predictions)");
|
||||
// R.parseEval("print(head(predictors, 30))");
|
||||
|
||||
MSG("AI: Marking valid");
|
||||
chem.set_ai_surrogate_validity_vector(R.parseEval("validity_vector"));
|
||||
@ -470,8 +486,9 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters ¶ms,
|
||||
|
||||
std::vector<std::vector<double>> invalid_x =
|
||||
R.parseEval("get_invalid_values(predictors_scaled, validity_vector)");
|
||||
|
||||
|
||||
R.parseEval("target_scaled <- preprocess(state_C[ai_surrogate_species])");
|
||||
// R.parseEval("print(head(state_C[ai_surrogate_species], 30))");
|
||||
std::vector<std::vector<double>> invalid_y =
|
||||
R.parseEval("get_invalid_values(target_scaled, validity_vector)");
|
||||
|
||||
@ -599,6 +616,8 @@ std::vector<std::string> getSpeciesNames(const Field &&field, int root,
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
signal(SIGINT, signalHandler);
|
||||
|
||||
int world_size;
|
||||
|
||||
// Threadsafe MPI is necessary for the AI surrogate
|
||||
@ -656,9 +675,11 @@ int main(int argc, char *argv[]) {
|
||||
run_params.interp_bucket_entries,
|
||||
run_params.interp_size,
|
||||
run_params.interp_min_entries,
|
||||
run_params.use_ai_surrogate};
|
||||
run_params.use_ai_surrogate,
|
||||
run_params.use_naa};
|
||||
// TODO add option for naa training
|
||||
|
||||
|
||||
chemistry.masterEnableSurrogates(surr_setup);
|
||||
|
||||
if (MY_RANK > 0) {
|
||||
@ -685,6 +706,7 @@ int main(int argc, char *argv[]) {
|
||||
chemistry.masterSetField(init_list.getInitialGrid());
|
||||
|
||||
if (run_params.use_ai_surrogate) {
|
||||
|
||||
// Load default function implementations
|
||||
R.parseEvalQ(ai_surrogate_r_library);
|
||||
/* Use dht species for model input and output */
|
||||
@ -704,7 +726,6 @@ int main(int argc, char *argv[]) {
|
||||
variable of the same name in one of the the R input scripts)*/
|
||||
run_params.training_data_size = init_list.getDiffusionInit().n_rows *
|
||||
init_list.getDiffusionInit().n_cols; // Default value is number of cells in field
|
||||
// ? error handling for all if statements
|
||||
if (Rcpp::as<bool>(R.parseEval("exists(\"batch_size\")"))) {
|
||||
run_params.batch_size = R["batch_size"];
|
||||
}
|
||||
|
||||
412
src/poet.hpp
Normal file
412
src/poet.hpp
Normal file
@ -0,0 +1,412 @@
|
||||
/*
|
||||
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
|
||||
** Potsdam)
|
||||
**
|
||||
** Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam)
|
||||
**
|
||||
** Copyright (C) 2023-2024 Max Luebke (University of Potsdam)
|
||||
**
|
||||
** POET is free software; you can redistribute it and/or modify it under the
|
||||
** terms of the GNU General Public License as published by the Free Software
|
||||
** Foundation; either version 2 of the License, or (at your option) any later
|
||||
** version.
|
||||
**
|
||||
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU General Public License along with
|
||||
** this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <Rcpp.h>
|
||||
|
||||
#define SRC_DIR "/mnt/scratch/signer/poet"
|
||||
#define CUDA_SRC_DIR ""
|
||||
#define USE_NAA "USE_NAA;ON"
|
||||
|
||||
// target and source IP address for NAA support
|
||||
// #ifdef USE_NAA
|
||||
#define SOURCE_IP "10.3.10.41"
|
||||
#define TARGET_IP "10.3.10.42"
|
||||
// #endif
|
||||
|
||||
static const char *poet_version = "naaice/v0.3-105-g13ad41d";
|
||||
|
||||
// using the Raw string literal to avoid escaping the quotes
|
||||
static const inline std::string kin_r_library = R"(### Copyright (C) 2018-2023 Marco De Lucia, Max Luebke (GFZ Potsdam)
|
||||
###
|
||||
### POET is free software; you can redistribute it and/or modify it under the
|
||||
### terms of the GNU General Public License as published by the Free Software
|
||||
### Foundation; either version 2 of the License, or (at your option) any later
|
||||
### version.
|
||||
###
|
||||
### POET is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
### WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||
### A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
###
|
||||
### You should have received a copy of the GNU General Public License along with
|
||||
### this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
master_init <- function(setup, out_dir, init_field) {
|
||||
## Setup the directory where we will store the results
|
||||
if (!dir.exists(out_dir)) {
|
||||
dir.create(out_dir)
|
||||
msgm("created directory ", out_dir)
|
||||
} else {
|
||||
msgm("dir ", out_dir, " already exists, I will overwrite!")
|
||||
}
|
||||
if (is.null(setup$store_result)) {
|
||||
msgm("store_result doesn't exist!")
|
||||
} else {
|
||||
msgm("store_result is ", setup$store_result)
|
||||
}
|
||||
|
||||
setup$iter <- 1
|
||||
setup$timesteps <- setup$timesteps
|
||||
setup$maxiter <- length(setup$timesteps)
|
||||
setup$iterations <- setup$maxiter
|
||||
setup$simulation_time <- 0
|
||||
|
||||
dgts <- as.integer(ceiling(log10(setup$maxiter)))
|
||||
## string format to use in sprintf
|
||||
fmt <- paste0("%0", dgts, "d")
|
||||
|
||||
if (is.null(setup[["store_result"]])) {
|
||||
setup$store_result <- TRUE
|
||||
}
|
||||
|
||||
if (setup$store_result) {
|
||||
init_field_out <- paste0(out_dir, "/iter_", sprintf(fmt = fmt, 0), ".", setup$out_ext)
|
||||
init_field <- data.frame(init_field, check.names = FALSE)
|
||||
SaveRObj(x = init_field, path = init_field_out)
|
||||
msgm("Stored initial field in ", init_field_out)
|
||||
if (is.null(setup[["out_save"]])) {
|
||||
setup$out_save <- seq(1, setup$iterations)
|
||||
}
|
||||
}
|
||||
|
||||
setup$out_dir <- out_dir
|
||||
|
||||
return(setup)
|
||||
}
|
||||
|
||||
## This function, called only by master, stores on disk the last
|
||||
## calculated time step if store_result is TRUE and increments the
|
||||
## iteration counter
|
||||
master_iteration_end <- function(setup, state_T, state_C) {
|
||||
iter <- setup$iter
|
||||
# print(iter)
|
||||
## max digits for iterations
|
||||
dgts <- as.integer(ceiling(log10(setup$maxiter + 1)))
|
||||
## string format to use in sprintf
|
||||
fmt <- paste0("%0", dgts, "d")
|
||||
|
||||
## Write on disk state_T and state_C after every iteration
|
||||
## comprised in setup$out_save
|
||||
if (setup$store_result) {
|
||||
if (iter %in% setup$out_save) {
|
||||
nameout <- paste0(setup$out_dir, "/iter_", sprintf(fmt = fmt, iter), ".", setup$out_ext)
|
||||
state_T <- data.frame(state_T, check.names = FALSE)
|
||||
state_C <- data.frame(state_C, check.names = FALSE)
|
||||
|
||||
ai_surrogate_info <- list(
|
||||
prediction_time = if (exists("ai_prediction_time")) ai_prediction_time else NULL,
|
||||
predictions_validity = if (exists("validity_vector")) validity_vector else NULL,
|
||||
cluster_labels = if (exists("cluster_labels")) cluster_labels else NULL,
|
||||
predictions = if (exists("predictions")) predictions else NULL
|
||||
)
|
||||
|
||||
SaveRObj(x = list(
|
||||
T = state_T,
|
||||
C = state_C,
|
||||
simtime = as.integer(setup$simulation_time),
|
||||
totaltime = as.integer(totaltime),
|
||||
ai_surrogate_info = ai_surrogate_info
|
||||
), path = nameout)
|
||||
msgm("results stored in <", nameout, ">")
|
||||
}
|
||||
}
|
||||
## Add last time step to simulation time
|
||||
setup$simulation_time <- setup$simulation_time + setup$timesteps[iter]
|
||||
|
||||
msgm("done iteration", iter, "/", length(setup$timesteps))
|
||||
setup$iter <- setup$iter + 1
|
||||
return(setup)
|
||||
}
|
||||
|
||||
|
||||
## Attach the name of the calling function to the message displayed on
|
||||
## R's stdout
|
||||
msgm <- function(...) {
|
||||
prefix <- paste0("R: ")
|
||||
cat(paste(prefix, ..., "\n"))
|
||||
invisible()
|
||||
}
|
||||
|
||||
|
||||
## Function called by master R process to store on disk all relevant
|
||||
## parameters for the simulation
|
||||
StoreSetup <- function(setup, filesim, out_dir) {
|
||||
to_store <- vector(mode = "list", length = 4)
|
||||
## names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT")
|
||||
names(to_store) <- c("Sim", "Transport", "DHT", "Cmdline")
|
||||
|
||||
## read the setup R file, which is sourced in kin.cpp
|
||||
tmpbuff <- file(filesim, "r")
|
||||
setupfile <- readLines(tmpbuff)
|
||||
close.connection(tmpbuff)
|
||||
|
||||
to_store$Sim <- setupfile
|
||||
|
||||
## to_store$Flow <- list(
|
||||
## snapshots = setup$snapshots,
|
||||
## gridfile = setup$gridfile,
|
||||
## phase = setup$phase,
|
||||
## density = setup$density,
|
||||
## dt_differ = setup$dt_differ,
|
||||
## prolong = setup$prolong,
|
||||
## maxiter = setup$maxiter,
|
||||
## saved_iter = setup$iter_output,
|
||||
## out_save = setup$out_save )
|
||||
|
||||
to_store$Transport <- setup$diffusion
|
||||
|
||||
## to_store$Chemistry <- list(
|
||||
## nprocs = n_procs,
|
||||
## wp_size = work_package_size,
|
||||
## base = setup$base,
|
||||
## first = setup$first,
|
||||
## init = setup$initsim,
|
||||
## db = db,
|
||||
## kin = setup$kin,
|
||||
## ann = setup$ann)
|
||||
|
||||
if (dht_enabled) {
|
||||
to_store$DHT <- list(
|
||||
enabled = dht_enabled,
|
||||
log = dht_log
|
||||
## signif = dht_final_signif,
|
||||
## proptype = dht_final_proptype
|
||||
)
|
||||
} else {
|
||||
to_store$DHT <- FALSE
|
||||
}
|
||||
|
||||
if (dht_enabled) {
|
||||
to_store$DHT <- list(
|
||||
enabled = dht_enabled,
|
||||
log = dht_log
|
||||
# signif = dht_final_signif,
|
||||
# proptype = dht_final_proptype
|
||||
)
|
||||
} else {
|
||||
to_store$DHT <- FALSE
|
||||
}
|
||||
|
||||
saveRDS(to_store, file = paste0(fileout, "/setup.rds"))
|
||||
msgm("initialization stored in ", paste0(fileout, "/setup.rds"))
|
||||
}
|
||||
|
||||
GetWorkPackageSizesVector <- function(n_packages, package_size, len) {
|
||||
ids <- rep(1:n_packages, times = package_size, each = 1)[1:len]
|
||||
return(as.integer(table(ids)))
|
||||
}
|
||||
|
||||
|
||||
## Handler to read R objs from binary files using either builtin
|
||||
## readRDS() or qs::qread() based on file extension
|
||||
ReadRObj <- function(path) {
|
||||
## code borrowed from tools::file_ext()
|
||||
pos <- regexpr("\\.([[:alnum:]]+)$", path)
|
||||
extension <- ifelse(pos > -1L, substring(path, pos + 1L), "")
|
||||
|
||||
switch(extension,
|
||||
rds = readRDS(path),
|
||||
qs = qs::qread(path)
|
||||
)
|
||||
}
|
||||
|
||||
## Handler to store R objs to binary files using either builtin
|
||||
## saveRDS() or qs::qsave() based on file extension
|
||||
SaveRObj <- function(x, path) {
|
||||
msgm("Storing to", path)
|
||||
## code borrowed from tools::file_ext()
|
||||
pos <- regexpr("\\.([[:alnum:]]+)$", path)
|
||||
extension <- ifelse(pos > -1L, substring(path, pos + 1L), "")
|
||||
|
||||
switch(extension,
|
||||
rds = saveRDS(object = x, file = path),
|
||||
qs = qs::qsave(x = x, file = path)
|
||||
)
|
||||
}
|
||||
)";
|
||||
static const inline std::string init_r_library = R"(### Copyright (C) 2018-2024 Marco De Lucia, Max Luebke (GFZ Potsdam, University of Potsdam)
|
||||
###
|
||||
### POET is free software; you can redistribute it and/or modify it under the
|
||||
### terms of the GNU General Public License as published by the Free Software
|
||||
### Foundation; either version 2 of the License, or (at your option) any later
|
||||
### version.
|
||||
###
|
||||
### POET is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
### WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||
### A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
###
|
||||
### You should have received a copy of the GNU General Public License along with
|
||||
### this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
##' @param pqc_mat matrix, containing IDs and PHREEQC outputs
|
||||
##' @param grid matrix, zonation referring to pqc_mat$ID
|
||||
##' @return a data.frame
|
||||
pqc_to_grid <- function(pqc_mat, grid) {
|
||||
# Convert the input DataFrame to a matrix
|
||||
pqc_mat <- as.matrix(pqc_mat)
|
||||
|
||||
# Flatten the matrix into a vector
|
||||
id_vector <- as.integer(t(grid))
|
||||
|
||||
# Find the matching rows in the matrix
|
||||
row_indices <- match(id_vector, pqc_mat[, "ID"])
|
||||
|
||||
# Extract the matching rows from pqc_mat to size of grid matrix
|
||||
result_mat <- pqc_mat[row_indices, ]
|
||||
|
||||
# Convert the result matrix to a data frame
|
||||
res_df <- as.data.frame(result_mat)
|
||||
|
||||
# Remove all columns which only contain NaN
|
||||
res_df <- res_df[, colSums(is.na(res_df)) != nrow(res_df)]
|
||||
|
||||
# Remove row names
|
||||
rownames(res_df) <- NULL
|
||||
|
||||
return(res_df)
|
||||
}
|
||||
|
||||
|
||||
##' @param pqc_mat matrix,
|
||||
##' @param transport_spec column name of species in pqc_mat
|
||||
##' @param id
|
||||
##' @title
|
||||
##' @return
|
||||
resolve_pqc_bound <- function(pqc_mat, transport_spec, id) {
|
||||
df <- as.data.frame(pqc_mat, check.names = FALSE)
|
||||
value <- df[df$ID == id, transport_spec]
|
||||
|
||||
if (is.nan(value)) {
|
||||
value <- 0
|
||||
}
|
||||
|
||||
return(value)
|
||||
}
|
||||
|
||||
##' @title
|
||||
##' @param init_grid
|
||||
##' @param new_names
|
||||
##' @return
|
||||
add_missing_transport_species <- function(init_grid, new_names) {
|
||||
# add 'ID' to new_names front, as it is not a transport species but required
|
||||
new_names <- c("ID", new_names)
|
||||
sol_length <- length(new_names)
|
||||
|
||||
new_grid <- data.frame(matrix(0, nrow = nrow(init_grid), ncol = sol_length))
|
||||
names(new_grid) <- new_names
|
||||
|
||||
matching_cols <- intersect(names(init_grid), new_names)
|
||||
|
||||
# Copy matching columns from init_grid to new_grid
|
||||
new_grid[, matching_cols] <- init_grid[, matching_cols]
|
||||
|
||||
|
||||
# Add missing columns to new_grid
|
||||
append_df <- init_grid[, !(names(init_grid) %in% new_names)]
|
||||
new_grid <- cbind(new_grid, append_df)
|
||||
|
||||
return(new_grid)
|
||||
}
|
||||
)";
|
||||
static const inline std::string ai_surrogate_r_library =
|
||||
R"(## This file contains default function implementations for the ai surrogate.
|
||||
## To use pre-/postprocessing it is recommended to override these functions
|
||||
## with custom implementations via the input script. The path to the R-file
|
||||
## See the barite_50.R file as an example and the general README for more
|
||||
## information.
|
||||
|
||||
preprocess <- function(df) {
|
||||
return(df)
|
||||
}
|
||||
|
||||
postprocess <- function(df) {
|
||||
return(df)
|
||||
}
|
||||
|
||||
set_valid_predictions <- function(temp_field, prediction, validity) {
|
||||
temp_field[validity == 1, ] <- prediction[validity == 1, ]
|
||||
return(temp_field)
|
||||
}
|
||||
|
||||
get_invalid_values <- function(df, validity) {
|
||||
return(df[validity == 0, ])
|
||||
}
|
||||
|
||||
set_field <- function(temp_field, columns, rows, column_name_limit,
|
||||
byrow = FALSE) {
|
||||
temp_field <- matrix(temp_field, nrow = rows, byrow = byrow)
|
||||
temp_field <- setNames(data.frame(temp_field), columns)
|
||||
temp_field <- temp_field[column_name_limit]
|
||||
return(temp_field)
|
||||
}
|
||||
)";
|
||||
static const inline std::string r_runtime_parameters = "mysetup";
|
||||
|
||||
struct RuntimeParameters {
|
||||
std::string out_dir;
|
||||
std::vector<double> timesteps;
|
||||
|
||||
Rcpp::List init_params;
|
||||
|
||||
bool as_rds = false;
|
||||
std::string out_ext; // MDL added to accomodate for qs::qsave/qread
|
||||
|
||||
bool print_progress = false;
|
||||
|
||||
static constexpr std::uint32_t WORK_PACKAGE_SIZE_DEFAULT = 32;
|
||||
std::uint32_t work_package_size;
|
||||
|
||||
bool use_dht = false;
|
||||
static constexpr std::uint32_t DHT_SIZE_DEFAULT = 1.5E3;
|
||||
std::uint32_t dht_size;
|
||||
static constexpr std::uint8_t DHT_SNAPS_DEFAULT = 0;
|
||||
std::uint8_t dht_snaps;
|
||||
|
||||
bool use_interp = false;
|
||||
static constexpr std::uint32_t INTERP_SIZE_DEFAULT = 100;
|
||||
std::uint32_t interp_size;
|
||||
static constexpr std::uint32_t INTERP_MIN_ENTRIES_DEFAULT = 5;
|
||||
std::uint32_t interp_min_entries;
|
||||
static constexpr std::uint32_t INTERP_BUCKET_ENTRIES_DEFAULT = 20;
|
||||
std::uint32_t interp_bucket_entries;
|
||||
|
||||
/*AI surriogate configuration*/
|
||||
bool use_ai_surrogate = false; // Can be set with command line flag ---ai-surrogate
|
||||
bool disable_training = false; // Can be set in the R input script
|
||||
bool use_clustering = false; // Can be set in the R input script
|
||||
bool use_Keras_predictions = false; // Can be set in the R input script
|
||||
bool train_only_invalid = false; // Can be set in the R input script
|
||||
int batch_size = 2560; // default value determined in test on the UP Turing cluster
|
||||
int training_epochs = 20;; // Can be set in the R input script
|
||||
int training_data_size; // Can be set in the R input script
|
||||
bool use_naa = true;
|
||||
std::string save_model_path = ""; // Can be set in the R input script
|
||||
std::string cuda_src_dir = CUDA_SRC_DIR; // From CMake
|
||||
};
|
||||
@ -31,6 +31,13 @@
|
||||
|
||||
#define SRC_DIR "@CMAKE_SOURCE_DIR@"
|
||||
#define CUDA_SRC_DIR "@CUDA_TOOLKIT_ROOT_DIR@"
|
||||
#define USE_NAA "@USE_NAA@"
|
||||
|
||||
// target and source IP address for NAA support
|
||||
// #ifdef USE_NAA
|
||||
#define SOURCE_IP "@SOURCE_IP@"
|
||||
#define TARGET_IP "@TARGET_IP@"
|
||||
// #endif
|
||||
|
||||
static const char *poet_version = "@POET_VERSION@";
|
||||
|
||||
@ -78,6 +85,7 @@ struct RuntimeParameters {
|
||||
int batch_size = 2560; // default value determined in test on the UP Turing cluster
|
||||
int training_epochs = 20;; // Can be set in the R input script
|
||||
int training_data_size; // Can be set in the R input script
|
||||
bool use_naa = false;
|
||||
std::string save_model_path = ""; // Can be set in the R input script
|
||||
std::string cuda_src_dir = CUDA_SRC_DIR; // From CMake
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user