add training weight serializer functions

2025-12-16 04:48:23 +01:00 · 2024-12-21 12:15:38 +01:00 · 2024-12-21 12:15:38 +01:00 · f76e438c30
commit f76e438c30
parent 13ad41d302
4 changed files with 833 additions and 341 deletions
--- a/src/Chemistry/SurrogateModels/AI_functions.cpp
+++ b/src/Chemistry/SurrogateModels/AI_functions.cpp
--- a/src/Chemistry/SurrogateModels/AI_functions.hpp
+++ b/src/Chemistry/SurrogateModels/AI_functions.hpp
@ -17,9 +17,14 @@
 #ifndef AI_FUNCTIONS_H
 #define AI_FUNCTIONS_H
 #include <condition_variable>
 #include <mutex>
 #include <string>
 #include <vector>
 #include "poet.hpp"
 extern "C"{
 #include <naaice_ap2.h>
 }
 // PhreeqC definition of pi clashes with Eigen macros
 // so we have to temporarily undef it 
@ -44,7 +49,7 @@ struct TrainingData {
  int n_training_runs = 0;
 };
-// Ony declare the actual functions if flag is set 
+// Only declare the actual functions if flag is set 
 #ifdef USE_AI_SURROGATE
 int Python_Keras_setup(std::string functions_file_path, std::string cuda_src_dir);
@ -71,7 +76,7 @@ int Python_Keras_training_thread(EigenModel* Eigen_model, EigenModel* Eigen_mode
                                 std::mutex* training_data_buffer_mutex,
                                 std::condition_variable* training_data_buffer_full,
                                 bool* start_training, bool* end_training,
-                                 const RuntimeParameters& params);
+                                 const RuntimeParameters& params, naa_handle *handle);
 void update_weights(EigenModel* model, const std::vector<std::vector<std::vector<double>>>& weights);
@ -84,7 +89,6 @@ std::vector<double> Eigen_predict_clustered(const EigenModel& model, const Eigen
 std::vector<double> Eigen_predict(const EigenModel& model, std::vector<std::vector<double>> x, int batch_size,
                                  std::mutex* Eigen_model_mutex);
 // Otherwise, define the necessary stubs
 #else
 inline void Python_Keras_setup(std::string, std::string){}
@ -97,7 +101,7 @@ inline void training_data_buffer_append(std::vector<std::vector<double>>&,
 inline void cluster_labels_append(std::vector<int>&, std::vector<int>&, std::vector<int>){}
 inline int Python_Keras_training_thread(EigenModel*, EigenModel*, std::mutex*, 
                                        TrainingData*, std::mutex*, std::condition_variable*,
-                                        bool*, bool*, const RuntimeParameters&){return {};}
+                                        bool*, bool*, const RuntimeParameters&, naa_handle*){return {};}
 inline void update_weights(EigenModel*, const std::vector<std::vector<std::vector<double>>>&){}
 inline std::vector<std::vector<std::vector<double>>> Python_Keras_get_weights(std::string){return {};}
--- a/src/Chemistry/SurrogateModels/serializer.cpp
+++ b/src/Chemistry/SurrogateModels/serializer.cpp
@ -0,0 +1,221 @@
 #include "serializer.hpp"
 #include "AI_functions.hpp"
 #include <Eigen/src/Core/Matrix.h>
 #include <cstddef>
 #include <cstdio>
 using namespace std;
 namespace poet{
 size_t calculateStructSize(void *struct_pointer, char type){
    size_t struct_size = 0;
    if (type == 'E') {
      struct_size += sizeof(size_t); // number of matrices
      struct_size +=
          static_cast<EigenModel *>(struct_pointer)->weight_matrices.size() *
          2 * sizeof(size_t);        // dimensions of matrices
      struct_size += sizeof(size_t); // number of vectors
      struct_size += static_cast<EigenModel *>(struct_pointer)->biases.size() *
                     sizeof(size_t); // length of vectors
      for (const Eigen::MatrixXd &matrix :
           static_cast<EigenModel *>(struct_pointer)->weight_matrices) {
        // fprintf(stderr, "matrix size: rows:%td, cols: %td\n", matrix.rows(), matrix.cols());
        struct_size += matrix.size() * sizeof(double);
        // fprintf(stderr, "matrix size %td\n", matrix.size());
      }
      for (const Eigen::VectorXd &bias :
           static_cast<EigenModel *>(struct_pointer)->biases) {
        struct_size += bias.size() * sizeof(double);
        // fprintf(stderr, "matrix size %td\n", bias.size());
      }
    } else if (type == 'T') {
    }
 return struct_size;
 }
 int serializeModelWeights(const EigenModel *model, char *memory){
    size_t num_matrices = model->weight_matrices.size();
    size_t size_counter = 0;
    std::memcpy(memory, &num_matrices, sizeof(size_t));
    memory += sizeof(size_t);
    size_counter += sizeof(size_t);
    for (const Eigen::MatrixXd &matrix : model->weight_matrices) {
      size_t rows = matrix.rows(), cols = matrix.cols();
      fprintf(stdout, "rows: %zu, cols: %zu\n", rows, cols);
      std::memcpy(memory, &rows, sizeof(size_t));
      memory += sizeof(size_t);
      size_counter += sizeof(size_t);
      std::memcpy(memory, &cols, sizeof(size_t));
      memory += sizeof(size_t);
      size_counter += sizeof(size_t);
      std::memcpy(memory, matrix.data(), rows * cols * sizeof(double));
      memory += rows * cols * sizeof(double);
      size_counter += rows * cols * sizeof(double);
    }
    // Serialisierung der Bias-Vektoren
    size_t num_biases = model->biases.size();
    std::memcpy(memory, &num_biases, sizeof(size_t));
    memory += sizeof(size_t);
    size_counter += sizeof(size_t);
    for (const Eigen::VectorXd &bias : model->biases) {
      size_t size = bias.size();
      std::memcpy(memory, &size, sizeof(size_t));
      memory += sizeof(size_t);
      size_counter += sizeof(size_t);
      std::memcpy(memory, bias.data(), size * sizeof(double));
      memory += size * sizeof(double);
      size_counter += size * sizeof(double);
    }
    fprintf(stdout, "serializer size: %zu\n", size_counter);
    return 0;
 }
 // EigenModel deserializeModelWeights(char *memory, size_t buffer_size){
 //     EigenModel deserializedModel;
 //     size_t num_matrices; 
 //     size_t size_counter = 0;
 //     std::memcpy(&num_matrices, memory, sizeof(size_t));
 //     fprintf(stdout, "number of matrices: %zu\n", num_matrices);
 //     memory += sizeof(size_t);
 //     size_counter += sizeof(size_t);
 //     deserializedModel.weight_matrices.resize(num_matrices);
 //     for (Eigen::MatrixXd &matrix : deserializedModel.weight_matrices) {
 //         size_t rows, cols;
 //         std::memcpy(&rows, memory, sizeof(size_t));
 //         memory += sizeof(size_t);
 //         size_counter += sizeof(size_t);
 //         std::memcpy(&cols, memory, sizeof(size_t));
 //         fprintf(stdout, "rows: %zu, cols: %zu\n", rows, cols);
 //         memory += sizeof(size_t);
 //         size_counter += sizeof(size_t);
 //         fprintf(stdout, "rows before: %td, cols before: %td\n", matrix.rows(), matrix.cols());
 //         matrix.resize(rows, cols);
 //         std::memcpy(matrix.data(), memory, rows * cols * sizeof(double));
 //         memory += rows * cols * sizeof(double);
 //         size_counter += rows * cols * sizeof(double);
 //     }
 //     fprintf(stdout, "deserialized size of matrices: %zu\n", size_counter);
 //     size_t num_biases;
 //     std::memcpy(&num_biases, memory, sizeof(size_t));
 //     memory += sizeof(size_t);
 //     size_counter += sizeof(size_t);
 //     fprintf(stdout, "number of biases: %zu\n", num_biases);
 //     deserializedModel.biases.resize(num_biases);
 //     for (Eigen::VectorXd &bias : deserializedModel.biases) {
 //         size_t size;
 //         std::memcpy(&size, memory, sizeof(size_t));
 //         fprintf(stdout, "bias length: %zu\n", size);
 //         memory += sizeof(size_t);
 //         size_counter += sizeof(size_t);
 //         bias.resize(size);
 //         std::memcpy(bias.data(), memory, size * sizeof(double));
 //         memory += size * sizeof(double);
 //         size_counter += size * sizeof(double);
 //     }
 //     fprintf(stdout, "deserialized size: %zu\n", size_counter);
 //     if(size_counter > buffer_size){
 //       fprintf(stderr, "buffer corrupted!\n");
 //     }
 //     return deserializedModel;
 // }
 EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
    EigenModel deserializedModel;
    size_t size_counter = 0;
    // Anzahl Matrizen
    size_t num_matrices;
    if (buffer_size < sizeof(size_t)) {
        fprintf(stderr, "Buffer too small.\n");
        return deserializedModel;
    }
    std::memcpy(&num_matrices, memory, sizeof(size_t));
    memory += sizeof(size_t);
    size_counter += sizeof(size_t);
    deserializedModel.weight_matrices.resize(num_matrices);
    // Matrizen deserialisieren
    for (Eigen::MatrixXd &matrix : deserializedModel.weight_matrices) {
        size_t rows, cols;
        // Buffer-Check
        if (size_counter + 2 * sizeof(size_t) > buffer_size) {
            fprintf(stderr, "Buffer too small for matrix dimensions.\n");
            return deserializedModel;
        }
        std::memcpy(&rows, memory, sizeof(size_t));
        memory += sizeof(size_t);
        size_counter += sizeof(size_t);
        std::memcpy(&cols, memory, sizeof(size_t));
        memory += sizeof(size_t);
        size_counter += sizeof(size_t);
        if (size_counter + rows * cols * sizeof(double) > buffer_size) {
            fprintf(stderr, "Buffer too small for matrix data.\n");
            return deserializedModel;
        }
        // Kopiere Daten in neue Matrix
        Eigen::MatrixXd temp = Eigen::Map<Eigen::MatrixXd>(
            reinterpret_cast<double*>(memory), rows, cols);
        matrix = temp;  // Kopieren der Daten
        memory += rows * cols * sizeof(double);
        size_counter += rows * cols * sizeof(double);
    }
    // Anzahl Biases
    size_t num_biases;
    if (size_counter + sizeof(size_t) > buffer_size) {
        fprintf(stderr, "Buffer too small for biases.\n");
        return deserializedModel;
    }
    std::memcpy(&num_biases, memory, sizeof(size_t));
    memory += sizeof(size_t);
    size_counter += sizeof(size_t);
    deserializedModel.biases.resize(num_biases);
    // Biases deserialisieren
    for (Eigen::VectorXd &bias : deserializedModel.biases) {
        size_t size;
        if (size_counter + sizeof(size_t) > buffer_size) {
            fprintf(stderr, "Buffer too small for bias size.\n");
            return deserializedModel;
        }
        std::memcpy(&size, memory, sizeof(size_t));
        memory += sizeof(size_t);
        size_counter += sizeof(size_t);
        if (size_counter + size * sizeof(double) > buffer_size) {
            fprintf(stderr, "Buffer too small for bias data.\n");
            return deserializedModel;
        }
        // Kopiere Daten in neuen Vektor
        Eigen::VectorXd temp = Eigen::Map<Eigen::VectorXd>(
            reinterpret_cast<double*>(memory), size);
        bias = temp;  // Kopieren der Daten
        memory += size * sizeof(double);
        size_counter += size * sizeof(double);
    }
    return deserializedModel;
 }
 }
--- a/src/Chemistry/SurrogateModels/serializer.hpp
+++ b/src/Chemistry/SurrogateModels/serializer.hpp
@ -0,0 +1,56 @@
 #ifndef SERIALIZER_H
 #define SERIALIZER_H
 #include "AI_functions.hpp"
 #include <cstddef>
 namespace poet{
 /**
 * @brief Serialize the weights and biases of the model into a memory location
 * to send them via RDMA
 *
 * @param model: Struct of EigenModel containing the weights and biases of the
 * model
 * @param memory: Pointer to the memory location where the serialized data will
 * be stored
 * @return int: 0 if the serialization was successful, -1 otherwise
 */
 int serializeModelWeights(const EigenModel *model, char *memory);
 /**
 * @brief Deserialize the weights and biases of the model from a memory location
 * 
 * @param data Pointer to the memory location where the serialized data is stored
 * @return EigenModel struct containing the weights and biases of the model
 */
 EigenModel deserializeModelWeights(char* memory, size_t buffer_size);
 /**
 * @brief Serialize the training data into a memory location to send it via RDMA
 * 
 * @param data Struct of TrainingData containing the training data
 * @param memory 
 * @return std::vector<char> 
 */
 int serializeTrainingData(const TrainingData& data, void *memory);
 /**
 * @brief Deserialize the training data from a memory location
 * 
 * @param data Pointer to the memory location where the serialized data is stored
 * @return TrainingData struct containing the training data
 */
 TrainingData deserializeTrainingData(void* data);
 /**
 * @brief Calculates the size of stored elements in the EigenModel and TrainData
 * structs
 *
 * @param struct_pointer: pointer to the struct
 * @param type: determines the struct type given: E for EigenModel and T TrainData
 * @return size_t: size of stored elements
 */
 size_t calculateStructSize(void* struct_pointer, char type);
 }
 #endif