Validate predictions

2025-12-16 12:54:50 +01:00 · 2024-10-09 17:23:14 +02:00 · 2024-10-09 17:23:14 +02:00 · d839ae4d5e
commit d839ae4d5e
parent e2d96ca9b6
5 changed files with 59 additions and 32 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -4,8 +4,6 @@ project(POET
  LANGUAGES CXX C
  DESCRIPTION "A coupled reactive transport simulator")

-set(CMAKE_CXX_FLAGS "-O3 -march=native -mtune=native")
-
 # specify the C++ standard
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
--- a/R_lib/ai_surrogate_model_functions.R
+++ b/R_lib/ai_surrogate_model_functions.R
@ -0,0 +1,18 @@
+## This file contains default function implementations for the ai surrogate.
+## To use pre-/postprocessing it is recommended to override these functions
+## with custom implementations via the input script. The path to the R-file
+## See the barite_50.R file as an example and the general README for more
+## information.
+
+preprocess <- function(df, backtransform = FALSE, outputs = FALSE) {
+  return(df)
+}
+
+postprocess <- function(df, backtransform = TRUE, outputs = TRUE) {
+  return(df)
+}
+
+set_valid_predictions <- function(temp_field, prediction, validity) {
+  temp_field[validity == 1, ] <- prediction[validity == 1, ]
+  return(temp_field)
+}
--- a/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py
+++ b/src/Chemistry/SurrogateModels/AI_Python_functions/keras_AI_surrogate.py
@ -17,9 +17,7 @@ def training_step(model, x, y, x_val, y_val, batch_size, epochs):

 def prediction_step(model, x, batch_size):
    prediction = model.predict(x, batch_size)
-    print("Prediction from Python", flush=True)
-    print(prediction, flush=True)
    return np.array(prediction, dtype=np.float64)

 def get_weights(model):
-    return model.get_weights()
+    return  model.get_weights()
--- a/src/Chemistry/SurrogateModels/AI_functions.cpp
+++ b/src/Chemistry/SurrogateModels/AI_functions.cpp
@ -180,24 +180,29 @@ EigenModel Python_Keras_get_weights_as_Eigen() {
  PyObject* py_weights_list = PyObject_CallObject(py_get_weights_function, args);
  
  if (!py_weights_list) {
-    PyErr_Print(); // Print Python error
+    PyErr_Print();
    throw std::runtime_error("Failed to get weights from Keras model");
  }

  Py_ssize_t num_layers = PyList_Size(py_weights_list);
  for (Py_ssize_t i = 0; i < num_layers; i += 2) {
    // Get weight matrix
+    PyErr_Print();
    PyObject* weight_array = PyList_GetItem(py_weights_list, i);
-    if (!PyArray_Check(weight_array)) throw std::runtime_error("Weight array is not a NumPy array");
+    PyErr_Print();
+    if (!weight_array) {
+      PyErr_Print();
+      throw std::runtime_error("Failed to get layer from Keras weights");
+    }
+
    PyArrayObject* weight_np = reinterpret_cast<PyArrayObject*>(weight_array);
    if (PyArray_NDIM(weight_np) != 2) throw std::runtime_error("Weight array is not 2-dimensional");
-
+    PyErr_Print();
    // Check weight data type (corresponding to the model's precision settings)
    int dtype = PyArray_TYPE(weight_np);
    if (dtype != NPY_FLOAT32 && dtype != NPY_DOUBLE) {
        throw std::runtime_error("Unsupported data type for model weights. Must be NPY_FLOAT32 or");
    }
-
    int num_rows = PyArray_DIM(weight_np, 0);
    int num_cols = PyArray_DIM(weight_np, 1);
    
@ -214,7 +219,6 @@ EigenModel Python_Keras_get_weights_as_Eigen() {
      weight_matrix = Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
        weight_data_double, num_rows, num_cols).transpose();
    }
-
    // Get bias vector
    PyObject* bias_array = PyList_GetItem(py_weights_list, i + 1);
    PyArrayObject* bias_np = reinterpret_cast<PyArrayObject*>(bias_array);
@ -269,7 +273,7 @@ std::vector<double> Eigen_predict(const EigenModel& model, std::vector<std::vect
  }

  std::vector<double> result;
-  result.reserve(num_samples * model.biases.back().size());
+  result.reserve(num_samples * num_features);
  
  int num_batches = std::ceil(static_cast<double>(num_samples) / batch_size);
  for (int batch = 0; batch < num_batches; ++batch) {
--- a/src/poet.cpp
+++ b/src/poet.cpp
@ -314,52 +314,61 @@ static Rcpp::List RunMasterLoop(RInsidePOET &R, const RuntimeParameters &params,
      double ai_start_t = MPI_Wtime();
      // Get current values from the tug field for the ai predictions
      R["TMP"] = Rcpp::wrap(chem.getField().AsVector());
-      R.parseEval(
-          std::string("predictors <- setNames(data.frame(matrix(TMP, nrow=" +
-                      std::to_string(chem.getField().GetRequestedVecSize()) +
-                      ")), TMP_PROPS)"));
+      
+      R.parseEval("predictors <- matrix(TMP, nrow=" + 
+                  std::to_string(chem.getField().GetRequestedVecSize()) + ")");
+      R.parseEval("predictors <- setNames(data.frame(predictors), TMP_PROPS)");
      R.parseEval("predictors <- predictors[ai_surrogate_species]");

      // Apply preprocessing
      MSG("AI Preprocessing");
      R.parseEval("predictors_scaled <- preprocess(predictors)");
-
+      R.parseEval("print(head(predictors_scaled))");
      // Predict
      MSG("AI: Predict");
-      //R["TMP"] = Python_keras_predict(R["predictors_scaled"], params.batch_size);
-      //R.parseEval("predictions_scaled <- matrix(TMP, nrow=nrow(predictors), byrow = TRUE)");
-      //R.parseEval("predictions_scaled <- setNames(data.frame(predictions_scaled), colnames(predictors))");
+      R["TMP"] = Python_keras_predict(R["predictors_scaled"], params.batch_size);
+      R.parseEval(std:string("predictions_scaled <-" + 
+                              "matrix(TMP, nrow=nrow(predictors), byrow = TRUE)"));
+      R.parseEval(std::string("predictions_scaled <- " +
+                  "setNames(data.frame(predictions_scaled), colnames(predictors))"));
+      
+      MSG("Keras predictions:")
+      R.parseEval("print(head(predictions_scaled))");
+

      EigenModel Eigen_model = Python_Keras_get_weights_as_Eigen();
      R["TMP"] = Eigen_predict(Eigen_model, R["predictors_scaled"], params.batch_size);
-      R.parseEval("predictions_scaled <- matrix(TMP, nrow=nrow(predictors), byrow = TRUE)");
-      R.parseEval("predictions_scaled <- setNames(data.frame(predictions_scaled), colnames(predictors))");
+      R.parseEval(std::string("predictions_scaled <- matrix(TMP, " +
+                  "nrow=nrow(predictors), byrow = TRUE)"));
+      R.parseEval(std::string("predictions_scaled <- " +
+                  "setNames(data.frame(predictions_scaled), colnames(predictors))"));
+      
+      MSG("Eigen predictions:")
+      R.parseEval("print(head(predictions_scaled))");
+      
      
-      // after this comes old R code!
      // Apply postprocessing
      MSG("AI Postprocesing");
      R.parseEval("predictions <- postprocess(predictions_scaled)");

      // Validate prediction and write valid predictions to chem field
      MSG("AI Validate");
-      R.parseEval(
-          "validity_vector <- validate_predictions(predictors, predictions)");
+      R.parseEval("validity_vector <- validate_predictions(predictors, predictions)");

-      MSG("AI Marking accepted");
+      MSG("AI Marking valid");
      chem.set_ai_surrogate_validity_vector(R.parseEval("validity_vector"));

-      MSG("AI TempField");
      std::vector<std::vector<double>> RTempField =
-          R.parseEval("set_valid_predictions(predictors,\
-                                                                                       aipreds,\
-                                                                                       validity_vector)");
+      R.parseEval("set_valid_predictions(predictors, predictions, validity_vector)");
+
+      std::vector<std::vector<double>> RTempField =
+      R.parseEval("set_valid_predictions(predictors, predictions, validity_vector)");

-      MSG("AI Set Field");
      Field predictions_field =
          Field(R.parseEval("nrow(predictors)"), RTempField,
                R.parseEval("colnames(predictors)"));

-      MSG("AI Update");
+      MSG("AI Update field with AI predictions");
      chem.getField().update(predictions_field);
      double ai_end_t = MPI_Wtime();
      R["ai_prediction_time"] = ai_end_t - ai_start_t;