mirror of
https://git.gfz-potsdam.de/naaice/poet.git
synced 2025-12-16 04:48:23 +01:00
feat: bcast control_cell_ids to workers
This commit is contained in:
parent
7c97f29fa6
commit
1b2d942960
@ -2,19 +2,16 @@
|
|||||||
#ifndef CHEMISTRYMODULE_H_
|
#ifndef CHEMISTRYMODULE_H_
|
||||||
#define CHEMISTRYMODULE_H_
|
#define CHEMISTRYMODULE_H_
|
||||||
|
|
||||||
|
#include "ChemistryDefs.hpp"
|
||||||
|
#include "Control/ControlModule.hpp"
|
||||||
#include "DataStructures/Field.hpp"
|
#include "DataStructures/Field.hpp"
|
||||||
#include "DataStructures/NamedVector.hpp"
|
#include "DataStructures/NamedVector.hpp"
|
||||||
|
|
||||||
#include "ChemistryDefs.hpp"
|
|
||||||
|
|
||||||
#include "Init/InitialList.hpp"
|
#include "Init/InitialList.hpp"
|
||||||
#include "NameDouble.h"
|
#include "NameDouble.h"
|
||||||
|
#include "PhreeqcRunner.hpp"
|
||||||
#include "SurrogateModels/DHT_Wrapper.hpp"
|
#include "SurrogateModels/DHT_Wrapper.hpp"
|
||||||
#include "SurrogateModels/Interpolation.hpp"
|
#include "SurrogateModels/Interpolation.hpp"
|
||||||
|
|
||||||
#include "poet.hpp"
|
|
||||||
|
|
||||||
#include "PhreeqcRunner.hpp"
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -24,6 +21,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace poet {
|
namespace poet {
|
||||||
|
class ControlModule;
|
||||||
/**
|
/**
|
||||||
* \brief Wrapper around PhreeqcRM to provide POET specific parallelization with
|
* \brief Wrapper around PhreeqcRM to provide POET specific parallelization with
|
||||||
* easy access.
|
* easy access.
|
||||||
@ -211,6 +209,8 @@ public:
|
|||||||
*/
|
*/
|
||||||
std::vector<double> GetWorkerIdleTimings() const;
|
std::vector<double> GetWorkerIdleTimings() const;
|
||||||
|
|
||||||
|
std::vector<double> GetWorkerControlTimings() const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* **Master only** Collect and return DHT hits of all workers.
|
* **Master only** Collect and return DHT hits of all workers.
|
||||||
*
|
*
|
||||||
@ -257,25 +257,15 @@ public:
|
|||||||
|
|
||||||
std::vector<int> ai_surrogate_validity_vector;
|
std::vector<int> ai_surrogate_validity_vector;
|
||||||
|
|
||||||
RuntimeParameters *runtime_params = nullptr;
|
void SetControlModule(poet::ControlModule *ctrl) { control_module = ctrl; }
|
||||||
uint32_t control_iteration_counter = 0;
|
|
||||||
|
|
||||||
struct error_stats {
|
void SetDhtEnabled(bool enabled) { dht_enabled = enabled; }
|
||||||
std::vector<double> mape;
|
bool GetDhtEnabled() const { return dht_enabled; }
|
||||||
std::vector<double> rrsme;
|
|
||||||
uint32_t iteration;
|
|
||||||
|
|
||||||
error_stats(size_t species_count, size_t iter)
|
void SetInterpEnabled(bool enabled) { interp_enabled = enabled; }
|
||||||
: mape(species_count, 0.0), rrsme(species_count, 0.0), iteration(iter) {
|
bool GetInterpEnabled() const { return interp_enabled; }
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<error_stats> error_stats_history;
|
void SetWarmupEnabled(bool enabled) { warmup_enabled = enabled; }
|
||||||
|
|
||||||
static void computeStats(const std::vector<double> &pqc_vector,
|
|
||||||
const std::vector<double> &sur_vector,
|
|
||||||
uint32_t size_per_prop, uint32_t species_count,
|
|
||||||
error_stats &stats);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void initializeDHT(uint32_t size_mb,
|
void initializeDHT(uint32_t size_mb,
|
||||||
@ -290,12 +280,13 @@ protected:
|
|||||||
|
|
||||||
enum {
|
enum {
|
||||||
CHEM_FIELD_INIT,
|
CHEM_FIELD_INIT,
|
||||||
CHEM_DHT_ENABLE,
|
//CHEM_DHT_ENABLE,
|
||||||
CHEM_DHT_SIGNIF_VEC,
|
CHEM_DHT_SIGNIF_VEC,
|
||||||
CHEM_DHT_SNAPS,
|
CHEM_DHT_SNAPS,
|
||||||
CHEM_DHT_READ_FILE,
|
CHEM_DHT_READ_FILE,
|
||||||
CHEM_INTERP,
|
//CHEM_WARMUP_PHASE, // Control flag
|
||||||
CHEM_IP_ENABLE,
|
//CHEM_CTRL_ENABLE, // Control flag
|
||||||
|
//CHEM_IP_ENABLE,
|
||||||
CHEM_IP_MIN_ENTRIES,
|
CHEM_IP_MIN_ENTRIES,
|
||||||
CHEM_IP_SIGNIF_VEC,
|
CHEM_IP_SIGNIF_VEC,
|
||||||
CHEM_WORK_LOOP,
|
CHEM_WORK_LOOP,
|
||||||
@ -308,6 +299,7 @@ protected:
|
|||||||
|
|
||||||
enum {
|
enum {
|
||||||
WORKER_PHREEQC,
|
WORKER_PHREEQC,
|
||||||
|
WORKER_CTRL_ITER,
|
||||||
WORKER_DHT_GET,
|
WORKER_DHT_GET,
|
||||||
WORKER_DHT_FILL,
|
WORKER_DHT_FILL,
|
||||||
WORKER_IDLE,
|
WORKER_IDLE,
|
||||||
@ -330,6 +322,7 @@ protected:
|
|||||||
double dht_get = 0.;
|
double dht_get = 0.;
|
||||||
double dht_fill = 0.;
|
double dht_fill = 0.;
|
||||||
double idle_t = 0.;
|
double idle_t = 0.;
|
||||||
|
double ctrl_t = 0.;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct worker_info_s {
|
struct worker_info_s {
|
||||||
@ -347,7 +340,7 @@ protected:
|
|||||||
void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer,
|
void MasterSendPkgs(worker_list_t &w_list, workpointer_t &work_pointer,
|
||||||
workpointer_t &sur_pointer, int &pkg_to_send,
|
workpointer_t &sur_pointer, int &pkg_to_send,
|
||||||
int &count_pkgs, int &free_workers, double dt,
|
int &count_pkgs, int &free_workers, double dt,
|
||||||
uint32_t iteration, uint32_t control_iteration,
|
uint32_t iteration,
|
||||||
const std::vector<uint32_t> &wp_sizes_vector);
|
const std::vector<uint32_t> &wp_sizes_vector);
|
||||||
void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send,
|
void MasterRecvPkgs(worker_list_t &w_list, int &pkg_to_recv, bool to_send,
|
||||||
int &free_workers);
|
int &free_workers);
|
||||||
@ -385,6 +378,10 @@ protected:
|
|||||||
|
|
||||||
void BCastStringVec(std::vector<std::string> &io);
|
void BCastStringVec(std::vector<std::string> &io);
|
||||||
|
|
||||||
|
int packResultsIntoBuffer(std::vector<double> &mpi_buffer, int base_count,
|
||||||
|
const WorkPackage &wp,
|
||||||
|
const WorkPackage &wp_control);
|
||||||
|
|
||||||
int comm_size, comm_rank;
|
int comm_size, comm_rank;
|
||||||
MPI_Comm group_comm;
|
MPI_Comm group_comm;
|
||||||
|
|
||||||
@ -412,6 +409,7 @@ protected:
|
|||||||
inline void PropagateFunctionType(int &type) const {
|
inline void PropagateFunctionType(int &type) const {
|
||||||
ChemBCast(&type, 1, MPI_INT);
|
ChemBCast(&type, 1, MPI_INT);
|
||||||
}
|
}
|
||||||
|
|
||||||
double simtime = 0.;
|
double simtime = 0.;
|
||||||
double idle_t = 0.;
|
double idle_t = 0.;
|
||||||
double seq_t = 0.;
|
double seq_t = 0.;
|
||||||
@ -419,10 +417,9 @@ protected:
|
|||||||
|
|
||||||
double recv_ctrl_t = 0.;
|
double recv_ctrl_t = 0.;
|
||||||
double shuf_t = 0.;
|
double shuf_t = 0.;
|
||||||
double metrics_t = 0.
|
double metrics_t = 0.;
|
||||||
|
|
||||||
std::array<double, 2>
|
std::array<double, 2> base_totals{0};
|
||||||
base_totals{0};
|
|
||||||
|
|
||||||
bool print_progessbar{false};
|
bool print_progessbar{false};
|
||||||
|
|
||||||
@ -442,8 +439,12 @@ protected:
|
|||||||
|
|
||||||
poet::ControlModule *control_module = nullptr;
|
poet::ControlModule *control_module = nullptr;
|
||||||
|
|
||||||
|
std::vector<double> mpi_surr_buffer;
|
||||||
|
|
||||||
bool control_enabled{false};
|
bool control_enabled{false};
|
||||||
bool warmup_enabled{false};
|
bool warmup_enabled{false};
|
||||||
|
|
||||||
|
// std::vector<double> sur_shuffled;
|
||||||
};
|
};
|
||||||
} // namespace poet
|
} // namespace poet
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,6 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <iomanip>
|
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -41,6 +40,12 @@ std::vector<double> poet::ChemistryModule::GetWorkerPhreeqcTimings() const {
|
|||||||
return MasterGatherWorkerTimings(WORKER_PHREEQC);
|
return MasterGatherWorkerTimings(WORKER_PHREEQC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<double> poet::ChemistryModule::GetWorkerControlTimings() const {
|
||||||
|
int type = CHEM_PERF;
|
||||||
|
MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm);
|
||||||
|
return MasterGatherWorkerTimings(WORKER_CTRL_ITER);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<double> poet::ChemistryModule::GetWorkerDHTGetTimings() const {
|
std::vector<double> poet::ChemistryModule::GetWorkerDHTGetTimings() const {
|
||||||
int type = CHEM_PERF;
|
int type = CHEM_PERF;
|
||||||
MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm);
|
MPI_Bcast(&type, 1, MPI_INT, 0, this->group_comm);
|
||||||
@ -252,6 +257,8 @@ inline void poet::ChemistryModule::MasterSendPkgs(
|
|||||||
/* note current processed work package in workerlist */
|
/* note current processed work package in workerlist */
|
||||||
w_list[p].send_addr = work_pointer.base();
|
w_list[p].send_addr = work_pointer.base();
|
||||||
w_list[p].surrogate_addr = sur_pointer.base();
|
w_list[p].surrogate_addr = sur_pointer.base();
|
||||||
|
// this->control_enabled ? sur_pointer.base() : w_list[p].surrogate_addr =
|
||||||
|
// nullptr;
|
||||||
|
|
||||||
/* push work pointer to next work package */
|
/* push work pointer to next work package */
|
||||||
const uint32_t end_of_wp = local_work_package_size * this->prop_count;
|
const uint32_t end_of_wp = local_work_package_size * this->prop_count;
|
||||||
@ -349,6 +356,11 @@ inline void poet::ChemistryModule::MasterRecvPkgs(worker_list_t &w_list,
|
|||||||
std::copy(recv_buffer.begin(), recv_buffer.begin() + half,
|
std::copy(recv_buffer.begin(), recv_buffer.begin() + half,
|
||||||
w_list[p - 1].send_addr);
|
w_list[p - 1].send_addr);
|
||||||
|
|
||||||
|
/*
|
||||||
|
if (w_list[p - 1].surrogate_addr == nullptr) {
|
||||||
|
throw std::runtime_error("MasterRecvPkgs: surrogate_addr is null");
|
||||||
|
}*/
|
||||||
|
|
||||||
std::copy(recv_buffer.begin() + (size / 2), recv_buffer.begin() + size,
|
std::copy(recv_buffer.begin() + (size / 2), recv_buffer.begin() + size,
|
||||||
w_list[p - 1].surrogate_addr);
|
w_list[p - 1].surrogate_addr);
|
||||||
recv_ctrl_b = MPI_Wtime();
|
recv_ctrl_b = MPI_Wtime();
|
||||||
@ -418,6 +430,7 @@ void poet::ChemistryModule::MasterRunParallel(double dt) {
|
|||||||
int free_workers;
|
int free_workers;
|
||||||
int i_pkgs;
|
int i_pkgs;
|
||||||
int ftype;
|
int ftype;
|
||||||
|
double shuf_a, shuf_b, metrics_a, metrics_b;
|
||||||
|
|
||||||
const std::vector<uint32_t> wp_sizes_vector =
|
const std::vector<uint32_t> wp_sizes_vector =
|
||||||
CalculateWPSizesVector(this->n_cells, this->wp_size);
|
CalculateWPSizesVector(this->n_cells, this->wp_size);
|
||||||
@ -435,47 +448,34 @@ void poet::ChemistryModule::MasterRunParallel(double dt) {
|
|||||||
ftype = CHEM_WORK_LOOP;
|
ftype = CHEM_WORK_LOOP;
|
||||||
PropagateFunctionType(ftype);
|
PropagateFunctionType(ftype);
|
||||||
|
|
||||||
ftype = CHEM_INTERP;
|
|
||||||
PropagateFunctionType(ftype);
|
|
||||||
|
|
||||||
if (this->runtime_params->rollback_simulation) {
|
|
||||||
this->interp_enabled = false;
|
|
||||||
int interp_flag = 0;
|
|
||||||
ChemBCast(&interp_flag, 1, MPI_INT);
|
|
||||||
} else {
|
|
||||||
this->interp_enabled = true;
|
|
||||||
int interp_flag = 1;
|
|
||||||
ChemBCast(&interp_flag, 1, MPI_INT);
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Barrier(this->group_comm);
|
MPI_Barrier(this->group_comm);
|
||||||
|
|
||||||
static uint32_t iteration = 0;
|
this->control_enabled = this->control_module->getControlIntervalEnabled();
|
||||||
uint32_t control_iteration = static_cast<uint32_t>(
|
if (this->control_enabled) {
|
||||||
this->runtime_params->control_iteration_active ? 1 : 0);
|
this->mpi_surr_buffer.assign(this->n_cells * this->prop_count, 0.0);
|
||||||
if (control_iteration) {
|
|
||||||
sur_shuffled.clear();
|
|
||||||
sur_shuffled.reserve(this->n_cells * this->prop_count);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t iteration = 0;
|
||||||
|
|
||||||
/* start time measurement of sequential part */
|
/* start time measurement of sequential part */
|
||||||
seq_a = MPI_Wtime();
|
seq_a = MPI_Wtime();
|
||||||
|
|
||||||
/* shuffle grid */
|
/* shuffle grid */
|
||||||
// grid.shuffleAndExport(mpi_buffer);
|
// grid.shuffleAndExport(mpi_buffer);
|
||||||
|
|
||||||
std::vector<double> mpi_buffer =
|
std::vector<double> mpi_buffer =
|
||||||
shuffleField(chem_field.AsVector(), this->n_cells, this->prop_count,
|
shuffleField(chem_field.AsVector(), this->n_cells, this->prop_count,
|
||||||
wp_sizes_vector.size());
|
wp_sizes_vector.size());
|
||||||
|
|
||||||
this->sur_shuffled.resize(mpi_buffer.size());
|
//this->mpi_surr_buffer.resize(mpi_buffer.size());
|
||||||
|
|
||||||
/* setup local variables */
|
/* setup local variables */
|
||||||
pkg_to_send = wp_sizes_vector.size();
|
pkg_to_send = wp_sizes_vector.size();
|
||||||
pkg_to_recv = wp_sizes_vector.size();
|
pkg_to_recv = wp_sizes_vector.size();
|
||||||
|
|
||||||
workpointer_t work_pointer = mpi_buffer.begin();
|
workpointer_t work_pointer = mpi_buffer.begin();
|
||||||
workpointer_t sur_pointer = sur_shuffled.begin();
|
workpointer_t sur_pointer = this->mpi_surr_buffer.begin();
|
||||||
|
//(this->control_enabled ? this->mpi_surr_buffer.begin()
|
||||||
|
// : mpi_buffer.end());
|
||||||
worker_list_t worker_list(this->comm_size - 1);
|
worker_list_t worker_list(this->comm_size - 1);
|
||||||
|
|
||||||
free_workers = this->comm_size - 1;
|
free_workers = this->comm_size - 1;
|
||||||
@ -499,8 +499,7 @@ void poet::ChemistryModule::MasterRunParallel(double dt) {
|
|||||||
if (pkg_to_send > 0) {
|
if (pkg_to_send > 0) {
|
||||||
// send packages to all free workers ...
|
// send packages to all free workers ...
|
||||||
MasterSendPkgs(worker_list, work_pointer, sur_pointer, pkg_to_send,
|
MasterSendPkgs(worker_list, work_pointer, sur_pointer, pkg_to_send,
|
||||||
i_pkgs, free_workers, dt, iteration, control_iteration,
|
i_pkgs, free_workers, dt, iteration, wp_sizes_vector);
|
||||||
wp_sizes_vector);
|
|
||||||
}
|
}
|
||||||
// ... and try to receive them from workers who has finished their work
|
// ... and try to receive them from workers who has finished their work
|
||||||
MasterRecvPkgs(worker_list, pkg_to_recv, pkg_to_send > 0, free_workers);
|
MasterRecvPkgs(worker_list, pkg_to_recv, pkg_to_send > 0, free_workers);
|
||||||
@ -524,15 +523,13 @@ void poet::ChemistryModule::MasterRunParallel(double dt) {
|
|||||||
chem_field = out_vec;
|
chem_field = out_vec;
|
||||||
|
|
||||||
/* do master stuff */
|
/* do master stuff */
|
||||||
|
if (this->control_enabled) {
|
||||||
/* do master stuff */
|
|
||||||
if (control_enabled) {
|
|
||||||
std::cout << "[Master] Control logic enabled for this iteration."
|
std::cout << "[Master] Control logic enabled for this iteration."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
std::vector<double> sur_unshuffled{mpi_surr_buffer};
|
std::vector<double> sur_unshuffled{mpi_surr_buffer};
|
||||||
|
|
||||||
shuf_a = MPI_Wtime();
|
shuf_a = MPI_Wtime();
|
||||||
unshuffleField(mpi_surr_buffer, this->n_cells, this->prop_count,
|
unshuffleField(this->mpi_surr_buffer, this->n_cells, this->prop_count,
|
||||||
wp_sizes_vector.size(), sur_unshuffled);
|
wp_sizes_vector.size(), sur_unshuffled);
|
||||||
shuf_b = MPI_Wtime();
|
shuf_b = MPI_Wtime();
|
||||||
this->shuf_t += shuf_b - shuf_a;
|
this->shuf_t += shuf_b - shuf_a;
|
||||||
@ -550,7 +547,6 @@ void poet::ChemistryModule::MasterRunParallel(double dt) {
|
|||||||
this->metrics_t += metrics_b - metrics_a;
|
this->metrics_t += metrics_b - metrics_a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* start time measurement of master chemistry */
|
/* start time measurement of master chemistry */
|
||||||
sim_e_chemistry = MPI_Wtime();
|
sim_e_chemistry = MPI_Wtime();
|
||||||
|
|
||||||
|
|||||||
@ -9,17 +9,15 @@
|
|||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <mpi.h>
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <mpi.h>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace poet
|
namespace poet {
|
||||||
{
|
|
||||||
|
|
||||||
inline std::string get_string(int root, MPI_Comm communicator)
|
inline std::string get_string(int root, MPI_Comm communicator) {
|
||||||
{
|
|
||||||
int count;
|
int count;
|
||||||
MPI_Bcast(&count, 1, MPI_INT, root, communicator);
|
MPI_Bcast(&count, 1, MPI_INT, root, communicator);
|
||||||
|
|
||||||
@ -34,8 +32,7 @@ namespace poet
|
|||||||
return ret_str;
|
return ret_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerLoop()
|
void poet::ChemistryModule::WorkerLoop() {
|
||||||
{
|
|
||||||
struct worker_s timings;
|
struct worker_s timings;
|
||||||
|
|
||||||
// HACK: defining the worker iteration count here, which will increment after
|
// HACK: defining the worker iteration count here, which will increment after
|
||||||
@ -43,62 +40,76 @@ namespace poet
|
|||||||
uint32_t iteration = 1;
|
uint32_t iteration = 1;
|
||||||
bool loop = true;
|
bool loop = true;
|
||||||
|
|
||||||
while (loop)
|
while (loop) {
|
||||||
{
|
|
||||||
int func_type;
|
int func_type;
|
||||||
PropagateFunctionType(func_type);
|
PropagateFunctionType(func_type);
|
||||||
|
|
||||||
switch (func_type)
|
switch (func_type) {
|
||||||
{
|
case CHEM_FIELD_INIT: {
|
||||||
case CHEM_FIELD_INIT:
|
|
||||||
{
|
|
||||||
ChemBCast(&this->prop_count, 1, MPI_UINT32_T);
|
ChemBCast(&this->prop_count, 1, MPI_UINT32_T);
|
||||||
if (this->ai_surrogate_enabled)
|
if (this->ai_surrogate_enabled) {
|
||||||
{
|
|
||||||
this->ai_surrogate_validity_vector.resize(
|
this->ai_surrogate_validity_vector.resize(
|
||||||
this->n_cells); // resize statt reserve?
|
this->n_cells); // resize statt reserve?
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CHEM_AI_BCAST_VALIDITY:
|
case CHEM_AI_BCAST_VALIDITY: {
|
||||||
{
|
|
||||||
// Receive the index vector of valid ai surrogate predictions
|
// Receive the index vector of valid ai surrogate predictions
|
||||||
MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells,
|
MPI_Bcast(&this->ai_surrogate_validity_vector.front(), this->n_cells,
|
||||||
MPI_INT, 0, this->group_comm);
|
MPI_INT, 0, this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CHEM_INTERP:
|
/*
|
||||||
{
|
case CHEM_WARMUP_PHASE: {
|
||||||
int interp_flag;
|
int warmup_flag = 0;
|
||||||
ChemBCast(&interp_flag, 1, MPI_INT);
|
ChemBCast(&warmup_flag, 1, MPI_INT);
|
||||||
this->interp_enabled = (interp_flag == 1);
|
this->warmup_enabled = (warmup_flag == 1);
|
||||||
|
//std::cout << "Warmup phase is " << this->warmup_enabled << std::endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CHEM_WORK_LOOP:
|
case CHEM_DHT_ENABLE: {
|
||||||
{
|
int dht_flag = 0;
|
||||||
|
ChemBCast(&dht_flag, 1, MPI_INT);
|
||||||
|
this->dht_enabled = (dht_flag == 1);
|
||||||
|
//std::cout << "DHT_enabled is " << this->dht_enabled << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case CHEM_IP_ENABLE: {
|
||||||
|
int interp_flag = 0;
|
||||||
|
ChemBCast(&interp_flag, 1, MPI_INT);
|
||||||
|
this->interp_enabled = (interp_flag == 1);
|
||||||
|
;
|
||||||
|
std::cout << "Interp_enabled is " << this->interp_enabled << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case CHEM_CTRL_ENABLE: {
|
||||||
|
int control_flag = 0;
|
||||||
|
ChemBCast(&control_flag, 1, MPI_INT);
|
||||||
|
this->control_enabled = (control_flag == 1);
|
||||||
|
std::cout << "Control_enabled is " << this->control_enabled << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
case CHEM_WORK_LOOP: {
|
||||||
WorkerProcessPkgs(timings, iteration);
|
WorkerProcessPkgs(timings, iteration);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CHEM_PERF:
|
case CHEM_PERF: {
|
||||||
{
|
|
||||||
int type;
|
int type;
|
||||||
ChemBCast(&type, 1, MPI_INT);
|
ChemBCast(&type, 1, MPI_INT);
|
||||||
if (type < WORKER_DHT_HITS)
|
if (type < WORKER_DHT_HITS) {
|
||||||
{
|
|
||||||
WorkerPerfToMaster(type, timings);
|
WorkerPerfToMaster(type, timings);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
WorkerMetricsToMaster(type);
|
WorkerMetricsToMaster(type);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CHEM_BREAK_MAIN_LOOP:
|
case CHEM_BREAK_MAIN_LOOP: {
|
||||||
{
|
|
||||||
WorkerPostSim(iteration);
|
WorkerPostSim(iteration);
|
||||||
loop = false;
|
loop = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default: {
|
||||||
{
|
|
||||||
throw std::runtime_error("Worker received unknown tag from master.");
|
throw std::runtime_error("Worker received unknown tag from master.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -106,23 +117,19 @@ namespace poet
|
|||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings,
|
void poet::ChemistryModule::WorkerProcessPkgs(struct worker_s &timings,
|
||||||
uint32_t &iteration)
|
uint32_t &iteration) {
|
||||||
{
|
|
||||||
MPI_Status probe_status;
|
MPI_Status probe_status;
|
||||||
bool loop = true;
|
bool loop = true;
|
||||||
|
|
||||||
MPI_Barrier(this->group_comm);
|
MPI_Barrier(this->group_comm);
|
||||||
|
|
||||||
while (loop)
|
while (loop) {
|
||||||
{
|
|
||||||
double idle_a = MPI_Wtime();
|
double idle_a = MPI_Wtime();
|
||||||
MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status);
|
MPI_Probe(0, MPI_ANY_TAG, this->group_comm, &probe_status);
|
||||||
double idle_b = MPI_Wtime();
|
double idle_b = MPI_Wtime();
|
||||||
|
|
||||||
switch (probe_status.MPI_TAG)
|
switch (probe_status.MPI_TAG) {
|
||||||
{
|
case LOOP_WORK: {
|
||||||
case LOOP_WORK:
|
|
||||||
{
|
|
||||||
timings.idle_t += idle_b - idle_a;
|
timings.idle_t += idle_b - idle_a;
|
||||||
int count;
|
int count;
|
||||||
MPI_Get_count(&probe_status, MPI_DOUBLE, &count);
|
MPI_Get_count(&probe_status, MPI_DOUBLE, &count);
|
||||||
@ -130,8 +137,7 @@ namespace poet
|
|||||||
WorkerDoWork(probe_status, count, timings);
|
WorkerDoWork(probe_status, count, timings);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LOOP_END:
|
case LOOP_END: {
|
||||||
{
|
|
||||||
WorkerPostIter(probe_status, iteration);
|
WorkerPostIter(probe_status, iteration);
|
||||||
iteration++;
|
iteration++;
|
||||||
loop = false;
|
loop = false;
|
||||||
@ -143,20 +149,20 @@ namespace poet
|
|||||||
|
|
||||||
void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status,
|
void poet::ChemistryModule::WorkerDoWork(MPI_Status &probe_status,
|
||||||
int double_count,
|
int double_count,
|
||||||
struct worker_s &timings)
|
struct worker_s &timings) {
|
||||||
{
|
|
||||||
static int counter = 1;
|
static int counter = 1;
|
||||||
|
|
||||||
double dht_get_start, dht_get_end;
|
double dht_get_start, dht_get_end;
|
||||||
double phreeqc_time_start, phreeqc_time_end;
|
double phreeqc_time_start, phreeqc_time_end;
|
||||||
double dht_fill_start, dht_fill_end;
|
double dht_fill_start, dht_fill_end;
|
||||||
|
double ctrl_cp_start, ctrl_cp_end, ctrl_start, ctrl_end;
|
||||||
|
|
||||||
uint32_t iteration;
|
uint32_t iteration;
|
||||||
double dt;
|
double dt;
|
||||||
double current_sim_time;
|
double current_sim_time;
|
||||||
uint32_t wp_start_index;
|
uint32_t wp_start_index;
|
||||||
int count = double_count;
|
int count = double_count;
|
||||||
bool control_iteration_active = false;
|
int flags;
|
||||||
std::vector<double> mpi_buffer(count);
|
std::vector<double> mpi_buffer(count);
|
||||||
|
|
||||||
/* receive */
|
/* receive */
|
||||||
@ -183,23 +189,31 @@ namespace poet
|
|||||||
// current work package start location in field
|
// current work package start location in field
|
||||||
wp_start_index = mpi_buffer[count + 4];
|
wp_start_index = mpi_buffer[count + 4];
|
||||||
|
|
||||||
control_iteration_active = (mpi_buffer[count + 5] == 1);
|
// read packed control flags
|
||||||
|
flags = static_cast<int>(mpi_buffer[count + 5]);
|
||||||
|
this->interp_enabled = (flags & 1) != 0;
|
||||||
|
this->dht_enabled = (flags & 2) != 0;
|
||||||
|
this->warmup_enabled = (flags & 4) != 0;
|
||||||
|
this->control_enabled = (flags & 8) != 0;
|
||||||
|
|
||||||
for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++)
|
/*std::cout << "warmup_enabled is " << warmup_enabled << ", control_enabled is
|
||||||
{
|
"
|
||||||
|
<< control_enabled << ", dht_enabled is "
|
||||||
|
<< dht_enabled << ", interp_enabled is " << interp_enabled
|
||||||
|
<< std::endl;*/
|
||||||
|
|
||||||
|
for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) {
|
||||||
s_curr_wp.input[wp_i] =
|
s_curr_wp.input[wp_i] =
|
||||||
std::vector<double>(mpi_buffer.begin() + this->prop_count * wp_i,
|
std::vector<double>(mpi_buffer.begin() + this->prop_count * wp_i,
|
||||||
mpi_buffer.begin() + this->prop_count * (wp_i + 1));
|
mpi_buffer.begin() + this->prop_count * (wp_i + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
// std::cout << this->comm_rank << ":" << counter++ << std::endl;
|
// std::cout << this->comm_rank << ":" << counter++ << std::endl;
|
||||||
if (dht_enabled || interp_enabled)
|
if (dht_enabled || interp_enabled || warmup_enabled) {
|
||||||
{
|
|
||||||
dht->prepareKeys(s_curr_wp.input, dt);
|
dht->prepareKeys(s_curr_wp.input, dt);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dht_enabled)
|
if (dht_enabled) {
|
||||||
{
|
|
||||||
/* check for values in DHT */
|
/* check for values in DHT */
|
||||||
dht_get_start = MPI_Wtime();
|
dht_get_start = MPI_Wtime();
|
||||||
dht->checkDHT(s_curr_wp);
|
dht->checkDHT(s_curr_wp);
|
||||||
@ -207,135 +221,132 @@ namespace poet
|
|||||||
timings.dht_get += dht_get_end - dht_get_start;
|
timings.dht_get += dht_get_end - dht_get_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (interp_enabled)
|
if (interp_enabled) {
|
||||||
{
|
|
||||||
interp->tryInterpolation(s_curr_wp);
|
interp->tryInterpolation(s_curr_wp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->ai_surrogate_enabled)
|
if (this->ai_surrogate_enabled) {
|
||||||
{
|
|
||||||
// Map valid predictions from the ai surrogate in the workpackage
|
// Map valid predictions from the ai surrogate in the workpackage
|
||||||
for (int i = 0; i < s_curr_wp.size; i++)
|
for (int i = 0; i < s_curr_wp.size; i++) {
|
||||||
{
|
if (this->ai_surrogate_validity_vector[wp_start_index + i] == 1) {
|
||||||
if (this->ai_surrogate_validity_vector[wp_start_index + i] == 1)
|
|
||||||
{
|
|
||||||
s_curr_wp.mapping[i] = CHEM_AISURR;
|
s_curr_wp.mapping[i] = CHEM_AISURR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if control iteration: create copy surrogate results (output and mappings) and then set them to zero,
|
/* if control iteration: create copy surrogate results (output and mappings)
|
||||||
give this to phreeqc */
|
and then set them to zero, give this to phreeqc */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
poet::WorkPackage s_curr_wp_control = s_curr_wp;
|
poet::WorkPackage s_curr_wp_control = s_curr_wp;
|
||||||
|
|
||||||
if (control_iteration_active)
|
|
||||||
{
|
|
||||||
for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++)
|
/*
|
||||||
{
|
if (control_enabled) {
|
||||||
|
ctrl_cp_start = MPI_Wtime();
|
||||||
|
for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) {
|
||||||
s_curr_wp_control.output[wp_i] = std::vector<double>(this->prop_count, 0.0);
|
s_curr_wp_control.output[wp_i] = std::vector<double>(this->prop_count, 0.0);
|
||||||
s_curr_wp_control.mapping[wp_i] = 0;
|
s_curr_wp_control.mapping[wp_i] = CHEM_PQC;
|
||||||
}
|
}
|
||||||
|
ctrl_cp_end = MPI_Wtime();
|
||||||
|
timings.ctrl_t += ctrl_cp_end - ctrl_cp_start;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
phreeqc_time_start = MPI_Wtime();
|
phreeqc_time_start = MPI_Wtime();
|
||||||
|
|
||||||
WorkerRunWorkPackage(control_iteration_active ? s_curr_wp_control : s_curr_wp, current_sim_time, dt);
|
WorkerRunWorkPackage(control_enabled ? s_curr_wp_control : s_curr_wp,
|
||||||
|
current_sim_time, dt);
|
||||||
|
|
||||||
phreeqc_time_end = MPI_Wtime();
|
phreeqc_time_end = MPI_Wtime();
|
||||||
|
|
||||||
if (control_iteration_active)
|
for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) {
|
||||||
{
|
std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(),
|
||||||
|
mpi_buffer.begin() + this->prop_count * wp_i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
if (control_enabled) {
|
||||||
|
ctrl_start = MPI_Wtime();
|
||||||
std::size_t sur_wp_offset = s_curr_wp.size * this->prop_count;
|
std::size_t sur_wp_offset = s_curr_wp.size * this->prop_count;
|
||||||
|
|
||||||
mpi_buffer.resize(count + sur_wp_offset);
|
mpi_buffer.resize(count + sur_wp_offset);
|
||||||
|
|
||||||
for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++)
|
for (std::size_t wp_i = 0; wp_i < s_curr_wp_control.size; wp_i++) {
|
||||||
{
|
std::copy(s_curr_wp_control.output[wp_i].begin(),
|
||||||
std::copy(s_curr_wp_control.output[wp_i].begin(), s_curr_wp_control.output[wp_i].end(),
|
s_curr_wp_control.output[wp_i].end(),
|
||||||
mpi_buffer.begin() + this->prop_count * wp_i);
|
mpi_buffer.begin() + this->prop_count * wp_i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// s_curr_wp only contains the interpolated data
|
// s_curr_wp only contains the interpolated data
|
||||||
// copy surrogate output after the the pqc output, mpi_buffer[pqc][interp]
|
// copy surrogate output after the the pqc output, mpi_buffer[pqc][interp]
|
||||||
|
|
||||||
for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++)
|
for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++) {
|
||||||
{
|
// only copy if surrogate was used
|
||||||
if (s_curr_wp.mapping[wp_i] != CHEM_PQC) // only copy if surrogate was used
|
if (s_curr_wp.mapping[wp_i] != CHEM_PQC) {
|
||||||
{
|
|
||||||
std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(),
|
std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(),
|
||||||
mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i);
|
mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i);
|
||||||
} else
|
} else {
|
||||||
{
|
|
||||||
// if pqc was used, copy pqc results again
|
// if pqc was used, copy pqc results again
|
||||||
std::copy(s_curr_wp_control.output[wp_i].begin(), s_curr_wp_control.output[wp_i].end(),
|
std::copy(s_curr_wp_control.output[wp_i].begin(),
|
||||||
|
s_curr_wp_control.output[wp_i].end(),
|
||||||
mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i);
|
mpi_buffer.begin() + sur_wp_offset + this->prop_count * wp_i);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
count += sur_wp_offset;
|
count += sur_wp_offset;
|
||||||
|
ctrl_end = MPI_Wtime();
|
||||||
|
timings.ctrl_t += ctrl_end - ctrl_start;
|
||||||
|
} else {
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
*/
|
||||||
{
|
|
||||||
for (std::size_t wp_i = 0; wp_i < s_curr_wp.size; wp_i++)
|
|
||||||
{
|
|
||||||
std::copy(s_curr_wp.output[wp_i].begin(), s_curr_wp.output[wp_i].end(),
|
|
||||||
mpi_buffer.begin() + this->prop_count * wp_i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* send results to master */
|
/* send results to master */
|
||||||
MPI_Request send_req;
|
MPI_Request send_req;
|
||||||
|
|
||||||
int mpi_tag = control_iteration_active ? LOOP_CTRL : LOOP_WORK;
|
int mpi_tag = control_enabled ? LOOP_CTRL : LOOP_WORK;
|
||||||
MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD, &send_req);
|
MPI_Isend(mpi_buffer.data(), count, MPI_DOUBLE, 0, mpi_tag, MPI_COMM_WORLD,
|
||||||
|
&send_req);
|
||||||
|
|
||||||
if (dht_enabled || interp_enabled)
|
if (dht_enabled || interp_enabled || warmup_enabled) {
|
||||||
{
|
|
||||||
/* write results to DHT */
|
/* write results to DHT */
|
||||||
dht_fill_start = MPI_Wtime();
|
dht_fill_start = MPI_Wtime();
|
||||||
dht->fillDHT(control_iteration_active ? s_curr_wp_control : s_curr_wp);
|
dht->fillDHT(control_enabled ? s_curr_wp_control : s_curr_wp);
|
||||||
dht_fill_end = MPI_Wtime();
|
dht_fill_end = MPI_Wtime();
|
||||||
|
|
||||||
if (interp_enabled)
|
if (interp_enabled || warmup_enabled) {
|
||||||
{
|
|
||||||
interp->writePairs();
|
interp->writePairs();
|
||||||
}
|
}
|
||||||
timings.dht_fill += dht_fill_end - dht_fill_start;
|
timings.dht_fill += dht_fill_end - dht_fill_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
timings.phreeqc_t += phreeqc_time_end - phreeqc_time_start;
|
timings.phreeqc_t += phreeqc_time_end - phreeqc_time_start;
|
||||||
|
|
||||||
MPI_Wait(&send_req, MPI_STATUS_IGNORE);
|
MPI_Wait(&send_req, MPI_STATUS_IGNORE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status,
|
void poet::ChemistryModule::WorkerPostIter(MPI_Status &prope_status,
|
||||||
uint32_t iteration)
|
uint32_t iteration) {
|
||||||
{
|
|
||||||
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm,
|
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, LOOP_END, this->group_comm,
|
||||||
MPI_STATUS_IGNORE);
|
MPI_STATUS_IGNORE);
|
||||||
|
|
||||||
if (this->dht_enabled)
|
if (this->dht_enabled) {
|
||||||
{
|
|
||||||
dht_hits.push_back(dht->getHits());
|
dht_hits.push_back(dht->getHits());
|
||||||
dht_evictions.push_back(dht->getEvictions());
|
dht_evictions.push_back(dht->getEvictions());
|
||||||
dht->resetCounter();
|
dht->resetCounter();
|
||||||
|
|
||||||
if (this->dht_snaps_type == DHT_SNAPS_ITEREND)
|
if (this->dht_snaps_type == DHT_SNAPS_ITEREND) {
|
||||||
{
|
|
||||||
WorkerWriteDHTDump(iteration);
|
WorkerWriteDHTDump(iteration);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->interp_enabled)
|
if (this->interp_enabled) {
|
||||||
{
|
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
interp_calls.push_back(interp->getInterpolationCount());
|
interp_calls.push_back(interp->getInterpolationCount());
|
||||||
interp->resetCounter();
|
interp->resetCounter();
|
||||||
interp->writePHTStats();
|
interp->writePHTStats();
|
||||||
if (this->dht_snaps_type == DHT_SNAPS_ITEREND)
|
if (this->dht_snaps_type == DHT_SNAPS_ITEREND) {
|
||||||
{
|
|
||||||
out << this->dht_file_out_dir << "/iter_" << std::setfill('0')
|
out << this->dht_file_out_dir << "/iter_" << std::setfill('0')
|
||||||
<< std::setw(this->file_pad) << iteration << ".pht";
|
<< std::setw(this->file_pad) << iteration << ".pht";
|
||||||
interp->dumpPHTState(out.str());
|
interp->dumpPHTState(out.str());
|
||||||
@ -344,12 +355,10 @@ namespace poet
|
|||||||
const auto max_mean_idx =
|
const auto max_mean_idx =
|
||||||
DHT_get_used_idx_factor(this->interp->getDHTObject(), 1);
|
DHT_get_used_idx_factor(this->interp->getDHTObject(), 1);
|
||||||
|
|
||||||
if (max_mean_idx >= 2)
|
if (max_mean_idx >= 2) {
|
||||||
{
|
|
||||||
DHT_flush(this->interp->getDHTObject());
|
DHT_flush(this->interp->getDHTObject());
|
||||||
DHT_flush(this->dht->getDHT());
|
DHT_flush(this->dht->getDHT());
|
||||||
if (this->comm_rank == 2)
|
if (this->comm_rank == 2) {
|
||||||
{
|
|
||||||
std::cout << "Flushed both DHT and PHT!\n\n";
|
std::cout << "Flushed both DHT and PHT!\n\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -358,14 +367,11 @@ namespace poet
|
|||||||
RInsidePOET::getInstance().parseEvalQ("gc()");
|
RInsidePOET::getInstance().parseEvalQ("gc()");
|
||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerPostSim(uint32_t iteration)
|
void poet::ChemistryModule::WorkerPostSim(uint32_t iteration) {
|
||||||
{
|
if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) {
|
||||||
if (this->dht_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND)
|
|
||||||
{
|
|
||||||
WorkerWriteDHTDump(iteration);
|
WorkerWriteDHTDump(iteration);
|
||||||
}
|
}
|
||||||
if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND)
|
if (this->interp_enabled && this->dht_snaps_type >= DHT_SNAPS_ITEREND) {
|
||||||
{
|
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
out << this->dht_file_out_dir << "/iter_" << std::setfill('0')
|
out << this->dht_file_out_dir << "/iter_" << std::setfill('0')
|
||||||
<< std::setw(this->file_pad) << iteration << ".pht";
|
<< std::setw(this->file_pad) << iteration << ".pht";
|
||||||
@ -373,8 +379,7 @@ namespace poet
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration)
|
void poet::ChemistryModule::WorkerWriteDHTDump(uint32_t iteration) {
|
||||||
{
|
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
out << this->dht_file_out_dir << "/iter_" << std::setfill('0')
|
out << this->dht_file_out_dir << "/iter_" << std::setfill('0')
|
||||||
<< std::setw(this->file_pad) << iteration << ".dht";
|
<< std::setw(this->file_pad) << iteration << ".dht";
|
||||||
@ -388,26 +393,19 @@ namespace poet
|
|||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerReadDHTDump(
|
void poet::ChemistryModule::WorkerReadDHTDump(
|
||||||
const std::string &dht_input_file)
|
const std::string &dht_input_file) {
|
||||||
{
|
|
||||||
int res = dht->fileToTable((char *)dht_input_file.c_str());
|
int res = dht->fileToTable((char *)dht_input_file.c_str());
|
||||||
if (res != DHT_SUCCESS)
|
if (res != DHT_SUCCESS) {
|
||||||
{
|
if (res == DHT_WRONG_FILE) {
|
||||||
if (res == DHT_WRONG_FILE)
|
|
||||||
{
|
|
||||||
if (this->comm_rank == 1)
|
if (this->comm_rank == 1)
|
||||||
std::cerr
|
std::cerr
|
||||||
<< "CPP: Worker: Wrong file layout! Continue with empty DHT ...\n";
|
<< "CPP: Worker: Wrong file layout! Continue with empty DHT ...\n";
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
if (this->comm_rank == 1)
|
if (this->comm_rank == 1)
|
||||||
std::cerr << "CPP: Worker: Error in loading current state of DHT from "
|
std::cerr << "CPP: Worker: Error in loading current state of DHT from "
|
||||||
"file. Continue with empty DHT ...\n";
|
"file. Continue with empty DHT ...\n";
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
if (this->comm_rank == 2)
|
if (this->comm_rank == 2)
|
||||||
std::cout << "CPP: Worker: Successfully loaded state of DHT from file "
|
std::cout << "CPP: Worker: Successfully loaded state of DHT from file "
|
||||||
<< dht_input_file << "\n";
|
<< dht_input_file << "\n";
|
||||||
@ -416,32 +414,27 @@ namespace poet
|
|||||||
|
|
||||||
void poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package,
|
void poet::ChemistryModule::WorkerRunWorkPackage(WorkPackage &work_package,
|
||||||
double dSimTime,
|
double dSimTime,
|
||||||
double dTimestep)
|
double dTimestep) {
|
||||||
{
|
|
||||||
|
|
||||||
std::vector<std::vector<double>> inout_chem = work_package.input;
|
std::vector<std::vector<double>> inout_chem = work_package.input;
|
||||||
std::vector<std::size_t> to_ignore;
|
std::vector<std::size_t> to_ignore;
|
||||||
|
|
||||||
for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++)
|
for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) {
|
||||||
{
|
if (work_package.mapping[wp_id] != CHEM_PQC) {
|
||||||
if (work_package.mapping[wp_id] != CHEM_PQC)
|
|
||||||
{
|
|
||||||
to_ignore.push_back(wp_id);
|
to_ignore.push_back(wp_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// HACK: remove the first element (cell_id) before sending to phreeqc
|
// HACK: remove the first element (cell_id) before sending to phreeqc
|
||||||
inout_chem[wp_id].erase(
|
inout_chem[wp_id].erase(inout_chem[wp_id].begin(),
|
||||||
inout_chem[wp_id].begin(), inout_chem[wp_id].begin() + 1);
|
inout_chem[wp_id].begin() + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->pqc_runner->run(inout_chem, dTimestep, to_ignore);
|
this->pqc_runner->run(inout_chem, dTimestep, to_ignore);
|
||||||
|
|
||||||
for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++)
|
for (std::size_t wp_id = 0; wp_id < work_package.size; wp_id++) {
|
||||||
{
|
if (work_package.mapping[wp_id] == CHEM_PQC) {
|
||||||
if (work_package.mapping[wp_id] == CHEM_PQC)
|
// HACK: as we removed the first element (cell_id) before sending to
|
||||||
{
|
// phreeqc, copy back with an offset of 1
|
||||||
// HACK: as we removed the first element (cell_id) before sending to phreeqc,
|
|
||||||
// copy back with an offset of 1
|
|
||||||
work_package.output[wp_id] = work_package.input[wp_id];
|
work_package.output[wp_id] = work_package.input[wp_id];
|
||||||
std::copy(inout_chem[wp_id].begin(), inout_chem[wp_id].end(),
|
std::copy(inout_chem[wp_id].begin(), inout_chem[wp_id].end(),
|
||||||
work_package.output[wp_id].begin() + 1);
|
work_package.output[wp_id].begin() + 1);
|
||||||
@ -450,67 +443,60 @@ namespace poet
|
|||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerPerfToMaster(int type,
|
void poet::ChemistryModule::WorkerPerfToMaster(int type,
|
||||||
const struct worker_s &timings)
|
const struct worker_s &timings) {
|
||||||
{
|
switch (type) {
|
||||||
switch (type)
|
case WORKER_PHREEQC: {
|
||||||
{
|
|
||||||
case WORKER_PHREEQC:
|
|
||||||
{
|
|
||||||
MPI_Gather(&timings.phreeqc_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
MPI_Gather(&timings.phreeqc_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
||||||
this->group_comm);
|
this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_DHT_GET:
|
case WORKER_CTRL_ITER: {
|
||||||
{
|
MPI_Gather(&timings.ctrl_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
||||||
|
this->group_comm);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case WORKER_DHT_GET: {
|
||||||
MPI_Gather(&timings.dht_get, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
MPI_Gather(&timings.dht_get, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
||||||
this->group_comm);
|
this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_DHT_FILL:
|
case WORKER_DHT_FILL: {
|
||||||
{
|
|
||||||
MPI_Gather(&timings.dht_fill, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
MPI_Gather(&timings.dht_fill, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
||||||
this->group_comm);
|
this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_IDLE:
|
case WORKER_IDLE: {
|
||||||
{
|
|
||||||
MPI_Gather(&timings.idle_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
MPI_Gather(&timings.idle_t, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0,
|
||||||
this->group_comm);
|
this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_IP_WRITE:
|
case WORKER_IP_WRITE: {
|
||||||
{
|
|
||||||
double val = interp->getPHTWriteTime();
|
double val = interp->getPHTWriteTime();
|
||||||
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_IP_READ:
|
case WORKER_IP_READ: {
|
||||||
{
|
|
||||||
double val = interp->getPHTReadTime();
|
double val = interp->getPHTReadTime();
|
||||||
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_IP_GATHER:
|
case WORKER_IP_GATHER: {
|
||||||
{
|
|
||||||
double val = interp->getDHTGatherTime();
|
double val = interp->getDHTGatherTime();
|
||||||
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_IP_FC:
|
case WORKER_IP_FC: {
|
||||||
{
|
|
||||||
double val = interp->getInterpolationTime();
|
double val = interp->getInterpolationTime();
|
||||||
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
MPI_Gather(&val, 1, MPI_DOUBLE, NULL, 1, MPI_DOUBLE, 0, this->group_comm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default: {
|
||||||
{
|
|
||||||
throw std::runtime_error("Unknown perf type in master's message.");
|
throw std::runtime_error("Unknown perf type in master's message.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void poet::ChemistryModule::WorkerMetricsToMaster(int type)
|
void poet::ChemistryModule::WorkerMetricsToMaster(int type) {
|
||||||
{
|
|
||||||
MPI_Comm worker_comm = dht->getCommunicator();
|
MPI_Comm worker_comm = dht->getCommunicator();
|
||||||
int worker_rank;
|
int worker_rank;
|
||||||
MPI_Comm_rank(worker_comm, &worker_rank);
|
MPI_Comm_rank(worker_comm, &worker_rank);
|
||||||
@ -518,44 +504,36 @@ namespace poet
|
|||||||
MPI_Comm &group_comm = this->group_comm;
|
MPI_Comm &group_comm = this->group_comm;
|
||||||
|
|
||||||
auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm](
|
auto reduce_and_send = [&worker_rank, &worker_comm, &group_comm](
|
||||||
std::vector<std::uint32_t> &send_buffer, int tag)
|
std::vector<std::uint32_t> &send_buffer, int tag) {
|
||||||
{
|
|
||||||
std::vector<uint32_t> to_master(send_buffer.size());
|
std::vector<uint32_t> to_master(send_buffer.size());
|
||||||
MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(),
|
MPI_Reduce(send_buffer.data(), to_master.data(), send_buffer.size(),
|
||||||
MPI_UINT32_T, MPI_SUM, 0, worker_comm);
|
MPI_UINT32_T, MPI_SUM, 0, worker_comm);
|
||||||
|
|
||||||
if (worker_rank == 0)
|
if (worker_rank == 0) {
|
||||||
{
|
|
||||||
MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag,
|
MPI_Send(to_master.data(), to_master.size(), MPI_UINT32_T, 0, tag,
|
||||||
group_comm);
|
group_comm);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
switch (type)
|
switch (type) {
|
||||||
{
|
case WORKER_DHT_HITS: {
|
||||||
case WORKER_DHT_HITS:
|
|
||||||
{
|
|
||||||
reduce_and_send(dht_hits, WORKER_DHT_HITS);
|
reduce_and_send(dht_hits, WORKER_DHT_HITS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_DHT_EVICTIONS:
|
case WORKER_DHT_EVICTIONS: {
|
||||||
{
|
|
||||||
reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS);
|
reduce_and_send(dht_evictions, WORKER_DHT_EVICTIONS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case WORKER_IP_CALLS:
|
case WORKER_IP_CALLS: {
|
||||||
{
|
|
||||||
reduce_and_send(interp_calls, WORKER_IP_CALLS);
|
reduce_and_send(interp_calls, WORKER_IP_CALLS);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case WORKER_PHT_CACHE_HITS:
|
case WORKER_PHT_CACHE_HITS: {
|
||||||
{
|
|
||||||
std::vector<std::uint32_t> input = this->interp->getPHTLocalCacheHits();
|
std::vector<std::uint32_t> input = this->interp->getPHTLocalCacheHits();
|
||||||
reduce_and_send(input, WORKER_PHT_CACHE_HITS);
|
reduce_and_send(input, WORKER_PHT_CACHE_HITS);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
default:
|
default: {
|
||||||
{
|
|
||||||
throw std::runtime_error("Unknown perf type in master's message.");
|
throw std::runtime_error("Unknown perf type in master's message.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -52,7 +52,7 @@ public:
|
|||||||
std::uint32_t control_interval;
|
std::uint32_t control_interval;
|
||||||
std::vector<std::string> species_names;
|
std::vector<std::string> species_names;
|
||||||
std::vector<double> mape_threshold;
|
std::vector<double> mape_threshold;
|
||||||
std::vector<double> ctrl_cell_ids;
|
std::vector<uint32_t> ctrl_cell_ids;
|
||||||
};
|
};
|
||||||
|
|
||||||
void enableControlLogic(const ControlSetup &setup) {
|
void enableControlLogic(const ControlSetup &setup) {
|
||||||
|
|||||||
27
src/poet.cpp
27
src/poet.cpp
@ -250,7 +250,8 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) {
|
|||||||
|
|
||||||
params.timesteps =
|
params.timesteps =
|
||||||
Rcpp::as<std::vector<double>>(global_rt_setup->operator[]("timesteps"));
|
Rcpp::as<std::vector<double>>(global_rt_setup->operator[]("timesteps"));
|
||||||
params.checkpoint_interval = Rcpp::as<uint32_t>(global_rt_setup->operator[]("checkpoint_interval"));
|
params.checkpoint_interval =
|
||||||
|
Rcpp::as<uint32_t>(global_rt_setup->operator[]("checkpoint_interval"));
|
||||||
params.control_interval =
|
params.control_interval =
|
||||||
Rcpp::as<uint32_t>(global_rt_setup->operator[]("control_interval"));
|
Rcpp::as<uint32_t>(global_rt_setup->operator[]("control_interval"));
|
||||||
params.mape_threshold = Rcpp::as<std::vector<double>>(
|
params.mape_threshold = Rcpp::as<std::vector<double>>(
|
||||||
@ -465,6 +466,30 @@ int parseInitValues(int argc, char **argv, RuntimeParameters ¶ms) {
|
|||||||
return profiling;
|
return profiling;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void getControlCellIds(const vector<uint32_t> &ids, int root,
|
||||||
|
MPI_Comm comm) {
|
||||||
|
std::uint32_t n_ids = 0;
|
||||||
|
int rank;
|
||||||
|
MPI_Comm_rank(comm, &rank);
|
||||||
|
bool is_master = root == rank;
|
||||||
|
|
||||||
|
if (is_master) {
|
||||||
|
n_ids = ids.size();
|
||||||
|
}
|
||||||
|
// broadcast size of id vector
|
||||||
|
MPI_Bcast(n_ids, 1, MPI_UINT32_T, root, comm);
|
||||||
|
|
||||||
|
// worker
|
||||||
|
if (!is_master) {
|
||||||
|
ids.resize(n_ids);
|
||||||
|
}
|
||||||
|
// broadcast control cell ids
|
||||||
|
if (n_ids > 0) {
|
||||||
|
MPI_Bcast(ids.data(), n_ids, MPI_UINT32_T, root, comm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<std::string> getSpeciesNames(const Field &&field, int root,
|
std::vector<std::string> getSpeciesNames(const Field &&field, int root,
|
||||||
MPI_Comm comm) {
|
MPI_Comm comm) {
|
||||||
std::uint32_t n_elements;
|
std::uint32_t n_elements;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user