Validate predictions

This commit is contained in:
straile 2024-10-09 17:23:14 +02:00
parent e2d96ca9b6
commit d839ae4d5e
5 changed files with 59 additions and 32 deletions

View File

@ -4,8 +4,6 @@ project(POET
LANGUAGES CXX C
DESCRIPTION "A coupled reactive transport simulator")
set(CMAKE_CXX_FLAGS "-O3 -march=native -mtune=native")
# specify the C++ standard
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED True)

View File

@ -0,0 +1,18 @@
## This file contains default function implementations for the ai surrogate.
## To use pre-/postprocessing it is recommended to override these functions
## with custom implementations via the input script. The path to the R-file
## See the barite_50.R file as an example and the general README for more
## information.
preprocess <- function(df, backtransform = FALSE, outputs = FALSE) {
return(df)
}
postprocess <- function(df, backtransform = TRUE, outputs = TRUE) {
return(df)
}
set_valid_predictions <- function(temp_field, prediction, validity) {
temp_field[validity == 1, ] <- prediction[validity == 1, ]
return(temp_field)
}

View File

@ -17,8 +17,6 @@ def training_step(model, x, y, x_val, y_val, batch_size, epochs):
def prediction_step(model, x, batch_size):
prediction = model.predict(x, batch_size)
print("Prediction from Python", flush=True)
print(prediction, flush=True)
return np.array(prediction, dtype=np.float64)
def get_weights(model):

View File

@ -180,24 +180,29 @@ EigenModel Python_Keras_get_weights_as_Eigen() {
PyObject* py_weights_list = PyObject_CallObject(py_get_weights_function, args);
if (!py_weights_list) {
PyErr_Print(); // Print Python error
PyErr_Print();
throw std::runtime_error("Failed to get weights from Keras model");
}
Py_ssize_t num_layers = PyList_Size(py_weights_list);
for (Py_ssize_t i = 0; i < num_layers; i += 2) {
// Get weight matrix
PyErr_Print();
PyObject* weight_array = PyList_GetItem(py_weights_list, i);
if (!PyArray_Check(weight_array)) throw std::runtime_error("Weight array is not a NumPy array");
PyErr_Print();
if (!weight_array) {
PyErr_Print();
throw std::runtime_error("Failed to get layer from Keras weights");
}
PyArrayObject* weight_np = reinterpret_cast<PyArrayObject*>(weight_array);
if (PyArray_NDIM(weight_np) != 2) throw std::runtime_error("Weight array is not 2-dimensional");
PyErr_Print();
// Check weight data type (corresponding to the model's precision settings)
int dtype = PyArray_TYPE(weight_np);
if (dtype != NPY_FLOAT32 && dtype != NPY_DOUBLE) {
throw std::runtime_error("Unsupported data type for model weights. Must be NPY_FLOAT32 or");
}
int num_rows = PyArray_DIM(weight_np, 0);
int num_cols = PyArray_DIM(weight_np, 1);
@ -214,7 +219,6 @@ EigenModel Python_Keras_get_weights_as_Eigen() {
weight_matrix = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
weight_data_double, num_rows, num_cols).transpose();
}
// Get bias vector
PyObject* bias_array = PyList_GetItem(py_weights_list, i + 1);
PyArrayObject* bias_np = reinterpret_cast<PyArrayObject*>(bias_array);
@ -269,7 +273,7 @@ std::vector<double> Eigen_predict(const EigenModel& model, std::vector<std::vect
}
std::vector<double> result;
result.reserve(num_samples * model.biases.back().size());
result.reserve(num_samples * num_features);
int num_batches = std::ceil(static_cast<double>(num_samples) / batch_size);
for (int batch = 0; batch < num_batches; ++batch) {

View File

@ -314,52 +314,61 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters &params,
double ai_start_t = MPI_Wtime();
// Get current values from the tug field for the ai predictions
R["TMP"] = Rcpp::wrap(chem.getField().AsVector());
R.parseEval(
std::string("predictors <- setNames(data.frame(matrix(TMP, nrow=" +
std::to_string(chem.getField().GetRequestedVecSize()) +
")), TMP_PROPS)"));
R.parseEval("predictors <- matrix(TMP, nrow=" +
std::to_string(chem.getField().GetRequestedVecSize()) + ")");
R.parseEval("predictors <- setNames(data.frame(predictors), TMP_PROPS)");
R.parseEval("predictors <- predictors[ai_surrogate_species]");
// Apply preprocessing
MSG("AI Preprocessing");
R.parseEval("predictors_scaled <- preprocess(predictors)");
R.parseEval("print(head(predictors_scaled))");
// Predict
MSG("AI: Predict");
//R["TMP"] = Python_keras_predict(R["predictors_scaled"], params.batch_size);
//R.parseEval("predictions_scaled <- matrix(TMP, nrow=nrow(predictors), byrow = TRUE)");
//R.parseEval("predictions_scaled <- setNames(data.frame(predictions_scaled), colnames(predictors))");
R["TMP"] = Python_keras_predict(R["predictors_scaled"], params.batch_size);
R.parseEval(std:string("predictions_scaled <-" +
"matrix(TMP, nrow=nrow(predictors), byrow = TRUE)"));
R.parseEval(std::string("predictions_scaled <- " +
"setNames(data.frame(predictions_scaled), colnames(predictors))"));
MSG("Keras predictions:")
R.parseEval("print(head(predictions_scaled))");
EigenModel Eigen_model = Python_Keras_get_weights_as_Eigen();
R["TMP"] = Eigen_predict(Eigen_model, R["predictors_scaled"], params.batch_size);
R.parseEval("predictions_scaled <- matrix(TMP, nrow=nrow(predictors), byrow = TRUE)");
R.parseEval("predictions_scaled <- setNames(data.frame(predictions_scaled), colnames(predictors))");
R.parseEval(std::string("predictions_scaled <- matrix(TMP, " +
"nrow=nrow(predictors), byrow = TRUE)"));
R.parseEval(std::string("predictions_scaled <- " +
"setNames(data.frame(predictions_scaled), colnames(predictors))"));
MSG("Eigen predictions:")
R.parseEval("print(head(predictions_scaled))");
// after this comes old R code!
// Apply postprocessing
MSG("AI Postprocesing");
R.parseEval("predictions <- postprocess(predictions_scaled)");
// Validate prediction and write valid predictions to chem field
MSG("AI Validate");
R.parseEval(
"validity_vector <- validate_predictions(predictors, predictions)");
R.parseEval("validity_vector <- validate_predictions(predictors, predictions)");
MSG("AI Marking accepted");
MSG("AI Marking valid");
chem.set_ai_surrogate_validity_vector(R.parseEval("validity_vector"));
MSG("AI TempField");
std::vector<std::vector<double>> RTempField =
R.parseEval("set_valid_predictions(predictors,\
aipreds,\
validity_vector)");
R.parseEval("set_valid_predictions(predictors, predictions, validity_vector)");
std::vector<std::vector<double>> RTempField =
R.parseEval("set_valid_predictions(predictors, predictions, validity_vector)");
MSG("AI Set Field");
Field predictions_field =
Field(R.parseEval("nrow(predictors)"), RTempField,
R.parseEval("colnames(predictors)"));
MSG("AI Update");
MSG("AI Update field with AI predictions");
chem.getField().update(predictions_field);
double ai_end_t = MPI_Wtime();
R["ai_prediction_time"] = ai_end_t - ai_start_t;