mirror of
https://git.gfz-potsdam.de/naaice/poet.git
synced 2025-12-16 04:48:23 +01:00
add serializer/deserializer functionality for training and target data
This commit is contained in:
parent
f76e438c30
commit
05e8f11d82
@ -3,6 +3,8 @@
|
||||
#include <Eigen/src/Core/Matrix.h>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
namespace poet{
|
||||
@ -32,6 +34,12 @@ size_t calculateStructSize(void *struct_pointer, char type){
|
||||
|
||||
}
|
||||
} else if (type == 'T') {
|
||||
struct_size += sizeof(size_t); // number of vectors
|
||||
struct_size += sizeof(size_t); // length of vector
|
||||
for (const std::vector<double> &vector:
|
||||
*static_cast<std::vector<std::vector<double>>*>(struct_pointer)){
|
||||
struct_size += vector.size() * sizeof(double);
|
||||
}
|
||||
}
|
||||
|
||||
return struct_size;
|
||||
@ -78,60 +86,6 @@ int serializeModelWeights(const EigenModel *model, char *memory){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// EigenModel deserializeModelWeights(char *memory, size_t buffer_size){
|
||||
|
||||
// EigenModel deserializedModel;
|
||||
|
||||
|
||||
// size_t num_matrices;
|
||||
// size_t size_counter = 0;
|
||||
// std::memcpy(&num_matrices, memory, sizeof(size_t));
|
||||
// fprintf(stdout, "number of matrices: %zu\n", num_matrices);
|
||||
|
||||
// memory += sizeof(size_t);
|
||||
// size_counter += sizeof(size_t);
|
||||
// deserializedModel.weight_matrices.resize(num_matrices);
|
||||
// for (Eigen::MatrixXd &matrix : deserializedModel.weight_matrices) {
|
||||
// size_t rows, cols;
|
||||
// std::memcpy(&rows, memory, sizeof(size_t));
|
||||
// memory += sizeof(size_t);
|
||||
// size_counter += sizeof(size_t);
|
||||
// std::memcpy(&cols, memory, sizeof(size_t));
|
||||
// fprintf(stdout, "rows: %zu, cols: %zu\n", rows, cols);
|
||||
// memory += sizeof(size_t);
|
||||
// size_counter += sizeof(size_t);
|
||||
// fprintf(stdout, "rows before: %td, cols before: %td\n", matrix.rows(), matrix.cols());
|
||||
// matrix.resize(rows, cols);
|
||||
// std::memcpy(matrix.data(), memory, rows * cols * sizeof(double));
|
||||
|
||||
// memory += rows * cols * sizeof(double);
|
||||
// size_counter += rows * cols * sizeof(double);
|
||||
// }
|
||||
// fprintf(stdout, "deserialized size of matrices: %zu\n", size_counter);
|
||||
// size_t num_biases;
|
||||
// std::memcpy(&num_biases, memory, sizeof(size_t));
|
||||
// memory += sizeof(size_t);
|
||||
// size_counter += sizeof(size_t);
|
||||
|
||||
// fprintf(stdout, "number of biases: %zu\n", num_biases);
|
||||
// deserializedModel.biases.resize(num_biases);
|
||||
// for (Eigen::VectorXd &bias : deserializedModel.biases) {
|
||||
// size_t size;
|
||||
// std::memcpy(&size, memory, sizeof(size_t));
|
||||
// fprintf(stdout, "bias length: %zu\n", size);
|
||||
// memory += sizeof(size_t);
|
||||
// size_counter += sizeof(size_t);
|
||||
// bias.resize(size);
|
||||
// std::memcpy(bias.data(), memory, size * sizeof(double));
|
||||
// memory += size * sizeof(double);
|
||||
// size_counter += size * sizeof(double);
|
||||
// }
|
||||
// fprintf(stdout, "deserialized size: %zu\n", size_counter);
|
||||
// if(size_counter > buffer_size){
|
||||
// fprintf(stderr, "buffer corrupted!\n");
|
||||
// }
|
||||
// return deserializedModel;
|
||||
// }
|
||||
EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
EigenModel deserializedModel;
|
||||
size_t size_counter = 0;
|
||||
@ -147,11 +101,11 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
size_counter += sizeof(size_t);
|
||||
deserializedModel.weight_matrices.resize(num_matrices);
|
||||
|
||||
// Matrizen deserialisieren
|
||||
// matrix deserialization
|
||||
for (Eigen::MatrixXd &matrix : deserializedModel.weight_matrices) {
|
||||
size_t rows, cols;
|
||||
|
||||
// Buffer-Check
|
||||
// buffer check
|
||||
if (size_counter + 2 * sizeof(size_t) > buffer_size) {
|
||||
fprintf(stderr, "Buffer too small for matrix dimensions.\n");
|
||||
return deserializedModel;
|
||||
@ -170,15 +124,15 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
return deserializedModel;
|
||||
}
|
||||
|
||||
// Kopiere Daten in neue Matrix
|
||||
// interpret memory as Eigen::MatrixXd (more efficient than memcpy?)
|
||||
Eigen::MatrixXd temp = Eigen::Map<Eigen::MatrixXd>(
|
||||
reinterpret_cast<double*>(memory), rows, cols);
|
||||
matrix = temp; // Kopieren der Daten
|
||||
matrix = temp; // copy data to matrix
|
||||
memory += rows * cols * sizeof(double);
|
||||
size_counter += rows * cols * sizeof(double);
|
||||
}
|
||||
|
||||
// Anzahl Biases
|
||||
// number of bias vectors
|
||||
size_t num_biases;
|
||||
if (size_counter + sizeof(size_t) > buffer_size) {
|
||||
fprintf(stderr, "Buffer too small for biases.\n");
|
||||
@ -189,7 +143,7 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
size_counter += sizeof(size_t);
|
||||
deserializedModel.biases.resize(num_biases);
|
||||
|
||||
// Biases deserialisieren
|
||||
// deserialization of bias vectors
|
||||
for (Eigen::VectorXd &bias : deserializedModel.biases) {
|
||||
size_t size;
|
||||
if (size_counter + sizeof(size_t) > buffer_size) {
|
||||
@ -206,7 +160,8 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
return deserializedModel;
|
||||
}
|
||||
|
||||
// Kopiere Daten in neuen Vektor
|
||||
// same procedure as for the matrices
|
||||
// TODO: delete temp variable
|
||||
Eigen::VectorXd temp = Eigen::Map<Eigen::VectorXd>(
|
||||
reinterpret_cast<double*>(memory), size);
|
||||
bias = temp; // Kopieren der Daten
|
||||
@ -218,4 +173,45 @@ EigenModel deserializeModelWeights(char *memory, size_t buffer_size) {
|
||||
}
|
||||
|
||||
|
||||
int serializeTrainingData(std::vector<std::vector<double>> data, char *memory){
|
||||
|
||||
size_t num_vectors = data.size();
|
||||
|
||||
std::memcpy(memory, &num_vectors, sizeof(size_t));
|
||||
memory += sizeof(size_t);
|
||||
|
||||
for (const std::vector<double> &vector : data) {
|
||||
size_t size = vector.size();
|
||||
std::memcpy(memory, &size, sizeof(size_t));
|
||||
memory += sizeof(size_t);
|
||||
std::memcpy(memory, vector.data(), size * sizeof(double));
|
||||
memory += size * sizeof(double);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::vector<std::vector<double>> deserializeTrainingData(char* data){
|
||||
|
||||
std::vector<std::vector<double>> deserialized_data;
|
||||
size_t num_vectors;
|
||||
std::memcpy(&num_vectors, data, sizeof(size_t));
|
||||
data += sizeof(size_t);
|
||||
|
||||
for (size_t i = 0; i < num_vectors; i++) {
|
||||
size_t size;
|
||||
std::memcpy(&size, data, sizeof(size_t));
|
||||
data += sizeof(size_t);
|
||||
|
||||
std::vector<double> vector(size);
|
||||
std::memcpy(vector.data(), data, size * sizeof(double));
|
||||
data += size * sizeof(double);
|
||||
|
||||
deserialized_data.push_back(vector);
|
||||
}
|
||||
|
||||
return deserialized_data;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -6,6 +6,17 @@
|
||||
|
||||
namespace poet{
|
||||
|
||||
/**
|
||||
* @brief Calculates the size of stored elements in the EigenModel and TrainData
|
||||
* structs
|
||||
*
|
||||
* @param struct_pointer: pointer to the struct
|
||||
* @param type: determines the struct type given: E for EigenModel and T
|
||||
* training data vector structures
|
||||
* @return size_t: size of stored elements
|
||||
*/
|
||||
size_t calculateStructSize(void* struct_pointer, char type);
|
||||
|
||||
/**
|
||||
* @brief Serialize the weights and biases of the model into a memory location
|
||||
* to send them via RDMA
|
||||
@ -14,6 +25,9 @@ namespace poet{
|
||||
* model
|
||||
* @param memory: Pointer to the memory location where the serialized data will
|
||||
* be stored
|
||||
* The serialized data looks like this:
|
||||
* |# matrices|# cols of matrix 1|# rows of matrix 1|matrix 1 data|# cols of matrix 2|...
|
||||
* |# bias vectors|length of bias vector 1|bias vector 1 data|length of bias vector 2|...
|
||||
* @return int: 0 if the serialization was successful, -1 otherwise
|
||||
*/
|
||||
int serializeModelWeights(const EigenModel *model, char *memory);
|
||||
@ -31,26 +45,23 @@ EigenModel deserializeModelWeights(char* memory, size_t buffer_size);
|
||||
*
|
||||
* @param data Struct of TrainingData containing the training data
|
||||
* @param memory
|
||||
* The serialized data looks like this:
|
||||
* |# of vectors|length of vector 1.1|vector 1.1 data|length of vector 1.2|...
|
||||
* |# of vectors|length of vector 2.1|vector 2.1 data|length of vector 2.2|...
|
||||
* |length of vector |vector 3 data|
|
||||
* n_training_runs
|
||||
* @return std::vector<char>
|
||||
*/
|
||||
int serializeTrainingData(const TrainingData& data, void *memory);
|
||||
int serializeTrainingData(std::vector<std::vector<double>> *data, char *memory);
|
||||
|
||||
/**
|
||||
* @brief Deserialize the training data from a memory location
|
||||
*
|
||||
* @param data Pointer to the memory location where the serialized data is stored
|
||||
* @return TrainingData struct containing the training data
|
||||
* @param data Pointer to the memory location where the serialized data is
|
||||
* stored
|
||||
* @return std::vector<std::vector<double>> containing n vectors for each
|
||||
* species with m training elements
|
||||
*/
|
||||
TrainingData deserializeTrainingData(void* data);
|
||||
|
||||
/**
|
||||
* @brief Calculates the size of stored elements in the EigenModel and TrainData
|
||||
* structs
|
||||
*
|
||||
* @param struct_pointer: pointer to the struct
|
||||
* @param type: determines the struct type given: E for EigenModel and T TrainData
|
||||
* @return size_t: size of stored elements
|
||||
*/
|
||||
size_t calculateStructSize(void* struct_pointer, char type);
|
||||
std::vector<std::vector<double>> deserializeTrainingData(char* data);
|
||||
}
|
||||
#endif
|
||||
Loading…
x
Reference in New Issue
Block a user