Merge branch 'dev'

This commit is contained in:
Max Lübke 2021-02-26 19:33:04 +01:00
commit 90e14eb0fd
No known key found for this signature in database
GPG Key ID: D3201E51647D1199
54 changed files with 7066 additions and 2019 deletions

2
.gitignore vendored
View File

@ -138,3 +138,5 @@ vignettes/*.pdf
/*_files/
# End of https://www.toptal.com/developers/gitignore/api/c,c++,r,cmake
build/

25
CMake/FindR.cmake Normal file
View File

@ -0,0 +1,25 @@
# prepare R environment (Rcpp + RInside)
find_program(R_EXE "R")
# search for R executable, R header file and library path
if(R_EXE)
execute_process(COMMAND ${R_EXE} RHOME
OUTPUT_VARIABLE R_ROOT_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
find_path(R_INCLUDE_DIR R.h
HINTS ${R_ROOT_DIR}
PATHS /usr/inlcude /usr/local/include /usr/share
PATH_SUFFIXES include/R R/include
)
find_library(R_LIBRARY R
HINTS ${R_ROOT_DIR}/lib
)
else()
message(FATAL_ERROR "No R runtime found!")
endif()
set(R_LIBRARIES ${R_LIBRARY})
set(R_INCLUDE_DIRS ${R_INCLUDE_DIR})

23
CMake/FindRInside.cmake Normal file
View File

@ -0,0 +1,23 @@
# find RInside libraries and include path
execute_process(COMMAND echo "cat(find.package('RInside'))"
COMMAND ${R_EXE} --vanilla --slave
RESULT_VARIABLE RINSIDE_NOT_FOUND
ERROR_QUIET
OUTPUT_VARIABLE RINSIDE_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(RInside_NOT_FOUND)
message(FATAL_ERROR "RInside not found!")
endif()
find_library(R_RInside_LIBRARY libRInside.so
HINTS ${RINSIDE_PATH}/lib)
list(APPEND R_LIBRARIES ${R_RInside_LIBRARY})
find_path(R_RInside_INCLUDE_DIR RInside.h
HINTS ${RINSIDE_PATH}
PATH_SUFFIXES include)
list(APPEND R_INCLUDE_DIRS ${R_RInside_INCLUDE_DIR})

23
CMake/FindRcpp.cmake Normal file
View File

@ -0,0 +1,23 @@
# find Rcpp include directory
execute_process(COMMAND echo "cat(find.package('Rcpp'))"
COMMAND ${R_EXE} --vanilla --slave
RESULT_VARIABLE RCPP_NOT_FOUND
ERROR_QUIET
OUTPUT_VARIABLE RCPP_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(RCPP_NOT_FOUND)
message(FATAL_ERROR "Rcpp not found!")
endif()
# find_library(R_Rcpp_LIBRARY Rcpp.so
# HINTS ${RCPP_PATH}/libs)
# list(APPEND R_LIBRARIES ${R_Rcpp_LIBRARY})
find_path(R_Rcpp_INCLUDE_DIR Rcpp.h
HINTS ${RCPP_PATH}
PATH_SUFFIXES include)
list(APPEND R_INCLUDE_DIRS ${R_Rcpp_INCLUDE_DIR})

28
CMake/POET_Scripts.cmake Normal file
View File

@ -0,0 +1,28 @@
# Set or get version
macro(get_POET_version)
if(EXISTS ${PROJECT_SOURCE_DIR}/.git)
find_program(GIT_EXECUTABLE git DOC "git executable")
mark_as_advanced(GIT_EXECUTABLE)
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE POET_GIT_BRANCH
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --always
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE POET_GIT_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT POET_GIT_BRANCH STREQUAL "master")
set(POET_VERSION "${POET_GIT_BRANCH}/${POET_GIT_VERSION}")
else()
set(POET_VERSION "${POET_GIT_VERSION}")
endif()
elseif(EXISTS ${PROJECT_SOURCE_DIR}/.svn)
file(STRINGS .gitversion POET_VERSION)
else()
set(POET_VERSION "0.1")
endif()
message(STATUS "Configuring POET version ${POET_VERSION}")
endmacro(get_POET_version)

31
CMakeLists.txt Normal file
View File

@ -0,0 +1,31 @@
# Version 3.9+ offers new MPI package variables
cmake_minimum_required(VERSION 3.9)
project(POET CXX C)
# specify the C++ standard
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
include("CMake/POET_Scripts.cmake")
list(APPEND CMAKE_MODULE_PATH "${POET_SOURCE_DIR}/CMake")
# set(GCC_CXX_FLAGS "-D STRICT_R_HEADERS") add_definitions(${GCC_CXX_FLAGS})
find_package(MPI REQUIRED)
find_package(R REQUIRED)
find_package(Rcpp REQUIRED)
find_package(RInside REQUIRED)
add_subdirectory(src)
add_subdirectory(R_lib)
add_subdirectory(data)
option(BUILD_DOC "Build documentation with doxygen" ON)
if(BUILD_DOC)
add_subdirectory(docs)
endif(BUILD_DOC)

212
README.md
View File

@ -1,63 +1,201 @@

<!--
Time-stamp: "Last modified 2020-02-01 18:14:13 delucia"
Time-stamp: "Last modified 2021-02-08 13:46:00 mluebke"
-->
# install libraries from MDL
# POET
library(devtools)
devtools::install_gitlab("delucia/RedModRphree", host="https://git.gfz-potsdam.de")
devtools::install_gitlab("delucia/Rmufits", host="https://git.gfz-potsdam.de")
# USAGE
POET is a coupled reactive transport simulator implementing a parallel
architecture and a fast, original MPI-based Distributed Hash Table.
mpirun ./kin <OPTIONS> <simfile.R> <DIRECTORY>
## External Libraries
OPTIONS:
The following external header library is shipped with POET:
--work-package-size=<1-n> ... size of work packages (default 5)
- **argh** - https://github.com/adishavit/argh (BSD license)
--ignore-result ... disables store of simulation resuls
## Installation
DHT:
### Requirements
--dht ... enable dht (default is off)
To compile POET you need several software to be installed:
--dht-log ... enable logarithm application before rounding (default is off)
- C/C++ compiler (tested with GCC)
- MPI-Implementation (tested with OpenMPI and MVAPICH)
- R language and environment
- CMake 3.9+
--dht-signif=<1-n> ... set rounding to number of significant digits (default 5)
(only used if no vector is given in setup file)
(for individual values per column use R vector "signif_vector" in setup file)
If you want to build documentation during compilation, `doxygen`and `graphviz` must be provided too.
--dht-strategy=<0-1> ... change dht strategy, not implemented yet (default 0, dht on workers)
The following R libraries must then be installed, which will get the needed dependencies automatically:
--dht-size=<1-n> ... size of dht per process involved (see dht-strategy) in byte (default 1GiB)
- [devtools](https://www.r-project.org/nosvn/pandoc/devtools.html)
- [Rcpp](https://cran.r-project.org/web/packages/Rcpp/index.html)
- [RInside](https://cran.r-project.org/web/packages/RInside/index.html)
- [RedModRphree](https://git.gfz-potsdam.de/delucia/RedModRphree)
- [Rmufits](https://git.gfz-potsdam.de/delucia/Rmufits)
--dht-snaps=<0-2> ... enable or disable storage of DHT snapshots
0 = snapshots are disabled
1 = only stores snapshot at the end of the simulation with name <DIRECTORY>.dht
2 = stores snapshot at the end and after each iteration
iteration snapshot files are stored in <DIRECTORY>/iter<n>.dht
### Compiling source code
--dht-file=<snapshot> ... initializes DHT with the given snapshot file
The generation of makefiles is done with CMake. If you obtained POET from git, you should be able to generate Makefiles by running
###############################################################################
```sh
mkdir build && cd build
cmake ..
```
This will create the directory `build` and processes the CMake files and generate Makefiles from it. You're now able to run `make` to start build
process.
# about the usage of MPI_Wtime()
From the OpenMPI Man Page:
If POET was obtained from the official SVN repository or the redmine at <https://redmine.cs.uni-potsdam.de/projects/poet> the branch or tag to be used have to be set via
For example, on platforms that support it, the clock_gettime() function will be used
to obtain a monotonic clock value with whatever precision is supported on that platform (e.g., nanoseconds).
```sh
mkdir build && cd build
cmake -D POET_SET_BRANCH="<BRANCH>" ..
```
# External Libraries
Cmdline Parsing -> https://github.com/adishavit/argh
where currently available branches/tags are:
- dev
# Examples included (more to come)
1) SimDol2D.R ... simple chemistry (Calcite/Dolomite) on a 50x50 2D grid, 20 time steps
2) SimDolKtz.R ... simple chemistry (Calcite/Dolomite) on Ketzin grid (~650k elements), 20 time steps
The flow snapshots are NOT INCLUDED in svn but must be provided separately
If everything went well you'll find the executable at `build/src/poet`, but it is recommended to install the POET project structure to a desired `CMAKE_INSTALL_PREFIX` with `make install`.
During the generation of Makefiles, various options can be specified via `cmake -D <option>=<value> [...]`. Currently there are the following available options:
- **DHT_Debug**=_boolean_ - toggles the output of detailed statistics about DHT
usage (`cmake -D DHT_Debug=ON`). Defaults to _OFF_.
- **BUILD_DOC**=_boolean_ - toggles the generation of documantiation during
compilation process. Defaults to _ON_.
- _only from svn version:_ **POET_SET_BRANCH**=_string_ - set branch or tag whose code is used
### Example: Build from scratch
Assuming that only the C/C++ compiler, MPI libraries, R runtime environment and
CMake have been installed, POET can be installed as follows:
```sh
# start R environment
$ R
# install R dependencies
> install.packages(c("devtools", "Rcpp", "RInside"))
> devtools::install_gitlab("delucia/RedModRphree", host="https://git.gfz-potsdam.de")
> devtools::install_gitlab("delucia/Rmufits", host="https://git.gfz-potsdam.de")
> q(save="no")
# cd into POET project root
$ cd <POET_dir>
# Build process
$ mkdir build && cd build
$ cmake -DCMAKE_INSTALL_PREFIX=/home/<user>/poet ..
$ make -j<max_numprocs>
$ make install
```
This will install a POET project structure into `/home/<user>/poet` which is
called hereinafter `<POET_INSTALL_DIR>`. With this version of POET we **do not
recommend** to install to hierarchies like `/usr/local/` etc.
The correspondending directory tree would look like this:
```sh
.
└── poet/
├── bin/
│ └── poet
├── data/
│ └── SimDol2D.R
├── docs/
│ └── html/
│ ├── index.html
│ └── ...
└── R_lib/
├── kin_r_library.R
└── parallel_r_library.R
```
The R libraries will be loaded at runtime and the paths are hardcoded
absolute paths inside `poet.cpp`. So, if you consider to move `bin/poet` either
change paths of the R source files and recompile POET or also move `R_lib/*`
according to the binary.
To display the generated html documentation just open `docs/html/index.html`
with the browser of your choice.
## Running
Before POET is ready to run, a working directory must be created. In this
directory you should find the executable file, the R scripts
`<POET_ROOT>/R_lib/kin_r_library.R` and `<POET_ROOT>/R_lib/parallel_r_library.R`
and the simulation description e.g. `<POET_ROOT>/data/chem_problems/SimDol2D.R`.
Run POET by `mpirun ./poet <OPTIONS> <SIMFILE> <OUTPUT_DIRECTORY>` where:
- **OPTIONS** - runtime parameters (explained below)
- **SIMFILE** - simulation described as R script (currently supported:
`<POET_INSTALL_DIR>/data/SimDol2D.R`)
- **OUTPUT_DIRECTORY** - path, where all output of POET should be stored
### Runtime options
The following parameters can be set:
| Option | Value | Description |
| ------------------------ | ------------ | -------------------------------------------------------------- |
| **--work-package-size=** | _1..n_ | size of work packages (defaults to _5_) |
| **--ignore-result** | | disables store of simulation resuls |
| **--dht** | | enabling DHT usage (defaults to _OFF_) |
| **--dht-nolog** | | disabling applying of logarithm before rounding |
| **--dht-signif=** | _1..n_ | set rounding to number of significant digits (defaults to _5_) |
| **--dht-strategy=** | _0-1_ | change DHT strategy. **NOT IMPLEMENTED YET** (Defaults to _0_) |
| **--dht-size=** | _1-n_ | size of DHT per process involved in byte (defaults to _1 GiB_) |
| **--dht-snaps=** | _0-2_ | disable or enable storage of DHT snapshots |
| **--dht-file=** | `<SNAPSHOT>` | initializes DHT with the given snapshot file |
#### Additions to `dht-signif`
Only used if no vector is given in setup file. For individual values per column
use R vector `signif_vector` in `SIMFILE`.
#### Additions to `dht-snaps`
Following values can be set:
- _0_ = snapshots are disabled
- _1_ = only stores snapshot at the end of the simulation with name
`<OUTPUT_DIRECTORY>.dht`
- _2_ = stores snapshot at the end and after each iteration iteration
snapshot files are stored in `<DIRECTORY>/iter<n>.dht`
### Example: Running from scratch
We will continue the above example and start a simulation with `SimDol2D.R`,
which is the only simulation supported at this moment. The required flow velocities
snapshots are included in the R package Rmufits. It's a 2D, 50x50 grid, with 20 time
steps. To start the simulation with 4 processes `cd` into your previously installed
POET-dir `<POET_INSTALL_DIR>/bin` and run:
```sh
mpirun -n 4 ./poet ../data/SimDol2D.R output
```
After a finished simulation all data generated by POET will be found in the
directory `output`.
You might want to use the DHT to cache previously simulated data and
reuse them in further time-steps. Just append `--dht` to the options of POET to
activate the usage of the DHT. The resulting call would look like this:
```sh
mpirun -n 4 ./poet --dht SimDol2D.R output
```
## About the usage of MPI_Wtime()
Implemented time measurement functions uses `MPI_Wtime()`. Some important
informations from the OpenMPI Man Page:
For example, on platforms that support it, the clock_gettime() function will be
used to obtain a monotonic clock value with whatever precision is supported on
that platform (e.g., nanoseconds).

1
R_lib/CMakeLists.txt Normal file
View File

@ -0,0 +1 @@
install(FILES kin_r_library.R parallel_r_library.R DESTINATION R_lib)

View File

@ -1,3 +1,18 @@
### Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
###
### POET is free software; you can redistribute it and/or modify it under the
### terms of the GNU General Public License as published by the Free Software
### Foundation; either version 2 of the License, or (at your option) any later
### version.
###
### POET is distributed in the hope that it will be useful, but WITHOUT ANY
### WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
### A PARTICULAR PURPOSE. See the GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License along with
### this program; if not, write to the Free Software Foundation, Inc., 51
### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
## Simple function to check file extension. It is needed to check if
## the GridFile is SUM (MUFITS format) or rds/RData
FileExt <- function (x)

View File

@ -1,3 +1,18 @@
### Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
###
### POET is free software; you can redistribute it and/or modify it under the
### terms of the GNU General Public License as published by the Free Software
### Foundation; either version 2 of the License, or (at your option) any later
### version.
###
### POET is distributed in the hope that it will be useful, but WITHOUT ANY
### WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
### A PARTICULAR PURPOSE. See the GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License along with
### this program; if not, write to the Free Software Foundation, Inc., 51
### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
## Simple function to check file extension. It is needed to check if
## the GridFile is SUM (MUFITS format) or rds/RData
FileExt <- function (x)

View File

@ -1,3 +1,18 @@
### Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
###
### POET is free software; you can redistribute it and/or modify it under the
### terms of the GNU General Public License as published by the Free Software
### Foundation; either version 2 of the License, or (at your option) any later
### version.
###
### POET is distributed in the hope that it will be useful, but WITHOUT ANY
### WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
### A PARTICULAR PURPOSE. See the GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License along with
### this program; if not, write to the Free Software Foundation, Inc., 51
### Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
distribute_work_packages <- function(len, package_size)
{
## Check if work_package is a divisor of grid length and act

1
data/CMakeLists.txt Normal file
View File

@ -0,0 +1 @@
install(FILES SimDol2D.R DESTINATION data)

18
docs/CMakeLists.txt Normal file
View File

@ -0,0 +1,18 @@
find_package(Doxygen)
if(DOXYGEN_FOUND)
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/./Doxyfile.in)
set(DOXYGEN_OUT ${CMAKE_BINARY_DIR}/Doxyfile)
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
add_custom_target(doc_doxygen ALL
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating documantation with doxygen"
VERBATIM)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/doxygen/ DESTINATION docs)
else(DOXYGEN_FOUND)
message("Doxygen not found. Please install.")
endif(DOXYGEN_FOUND)

2633
docs/Doxyfile.in Normal file

File diff suppressed because it is too large Load Diff

66
docs/Output.md Normal file
View File

@ -0,0 +1,66 @@
# Output Files
POET will place all simulation data and other files inside the given
`<OUTPUT_DIRECTORY>`. The directory will look like this:
``` {.example}
.
└── <OUTPUT_DIRECTORY>/
├── iter000.rds
├── iter000.dht
├── ...
├── iter<n>.rds
├── iter<n>.dht
├── setup.rds
└── timings.rds
```
## Description
All `.rds` file can be read into an R runtime using e.g.
`readRDS("<FILE>")`. The following description can be given to the
files:
| File | Description |
|---------------|-------------------------------------------------------------------------------------------------------------------|
| iter<*n*>.rds | Defines the state of the grid after *n* iteration, especially the state after transport (`T`) and chemistry (`C`) |
| iter<*n*>.dht | DHT-snapshot of the *n* th iteration |
| setup.rds | Summary of all simulation parameters given at startup |
| timings.rds | Various measured timings by POET |
## Timings
POET provides built-in time measurements of (sub) routines. The
corresponding values can be found in `<OUTPUT_DIRECTORY>/timings.rds`
and possible to read out within a R runtime with
`readRDS("timings.rds")`. There you will find the following values:
| Value | Description |
|--------------------|----------------------------------------------------------------------------|
| simtime | time spent in whole simulation loop without any initialization and cleanup |
| simtime\_transport | measured time in *transport* subroutine |
| simtime\_chemistry | measured time in *chemistry* subroutine (actual parallelized part) |
### chemistry subsetting
If running parallel there are also measured timings which are subsets of
*simtime\_chemistry*.
| Value | Description |
|----------------------------|----------------------------------------------------|
| simtime\_workers | time spent in send/recv loop of master |
| simtime\_chemistry\_master | sequential part of master chemistry |
| phreeqc | measured time of each worker in PHREEQC subroutine |
### DHT usage {#DHT-usage}
If running in parallel and with activated DHT, two more timings and also
some profiling about the DHT usage are given:
| Value | Description |
|-----------------|---------------------------------------------------------|
| dht\_fill\_time | time to write data to DHT |
| dht\_get\_time | time to retreive data from DHT |
| dh\_hits | count of data points retrieved from DHT |
| dht\_miss | count of misses/count of data points written to DHT |
| dht\_evictions | count of data points evicted by another write operation |

18
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,18 @@
set(SRC_CODE_DIR
${CMAKE_CURRENT_SOURCE_DIR}
CACHE INTERNAL "directory indicating which source code version is used")
get_poet_version()
configure_file(poet.h.in poet.h)
add_executable(poet poet.cpp)
target_include_directories(poet PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(poet PUBLIC POET_Model POET_Util MPI::MPI_C)
target_compile_definitions(poet PRIVATE OMPI_SKIP_MPICXX)
install(TARGETS poet DESTINATION bin)
add_subdirectory(DHT)
add_subdirectory(model)
add_subdirectory(util)

View File

@ -1,423 +0,0 @@
#include "DHT.h"
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
/*
* determining destination rank and index by hash of key
*
* return values by reference
*/
static void determine_dest(uint64_t hash, int comm_size, unsigned int table_size, unsigned int *dest_rank, unsigned int *index, unsigned int index_count) {
uint64_t tmp;
int char_hop = 9-index_count;
unsigned int i;
for (i = 0; i < index_count ; i++) {
tmp = 0;
memcpy(&tmp,(unsigned char *)&hash+i, char_hop);
index[i] = (unsigned int) (tmp % table_size);
}
*dest_rank = (unsigned int) (hash % comm_size);
}
/**
* set write flag to 1
*/
static void set_flag(char* flag_byte) {
*flag_byte = 0;
*flag_byte |= (1 << 0);
}
/**
* return 1 if write flag is set
* else 0
*/
static int read_flag(char flag_byte) {
if ((flag_byte & 0x01) == 0x01) {
return 1;
} else return 0;
}
/*
* allocating memory for DHT object and buckets.
* creating MPI window for OSC
* filling DHT object with passed by parameters, window, 2 counters for R/W errors and 2 pointers with allocated memory for further use
* return DHT object
*/
DHT* DHT_create(MPI_Comm comm, unsigned int size, int data_size, int key_size, uint64_t(*hash_func) (int, void*)) {
DHT *object;
MPI_Win window;
void* mem_alloc;
int comm_size, tmp;
tmp = (int) ceil(log2(size));
if (tmp%8 != 0) tmp = tmp + (8-(tmp%8));
object = (DHT*) malloc(sizeof(DHT));
if (object == NULL) return NULL;
//every memory allocation has 1 additional byte for flags etc.
if (MPI_Alloc_mem(size * (1 + data_size + key_size), MPI_INFO_NULL, &mem_alloc) != 0) return NULL;
if (MPI_Comm_size(comm, &comm_size) != 0) return NULL;
memset(mem_alloc, '\0', size * (1 + data_size + key_size));
if (MPI_Win_create(mem_alloc, size * (1 + data_size + key_size), (1 + data_size + key_size), MPI_INFO_NULL, comm, &window) != 0) return NULL;
object->data_size = data_size;
object->key_size = key_size;
object->table_size = size;
object->window = window;
object->hash_func = hash_func;
object->comm_size = comm_size;
object->communicator = comm;
object->read_misses = 0;
object->collisions = 0;
object->recv_entry = malloc(1 + data_size + key_size);
object->send_entry = malloc(1 + data_size + key_size);
object->index_count = 9-(tmp/8);
object->index = (unsigned int*) malloc((9-(tmp/8))*sizeof(int));
object->mem_alloc = mem_alloc;
DHT_stats *stats;
stats = (DHT_stats*) malloc(sizeof(DHT_stats));
if (stats == NULL) return NULL;
object->stats = stats;
object->stats->writes_local = (int*) calloc(comm_size, sizeof(int));
object->stats->old_writes = 0;
object->stats->read_misses = 0;
object->stats->collisions = 0;
object->stats->w_access = 0;
object->stats->r_access = 0;
return object;
}
/*
* puts passed by data with key to DHT
*
* returning DHT_MPI_ERROR = -1 if MPI error occurred
* else DHT_SUCCESS = 0 if success
*/
int DHT_write(DHT *table, void* send_key, void* send_data) {
unsigned int dest_rank, i;
int result = DHT_SUCCESS;
table->stats->w_access++;
//determine destination rank and index by hash of key
determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, table->table_size, &dest_rank, table->index, table->index_count);
//concatenating key with data to write entry to DHT
set_flag((char *) table->send_entry);
memcpy((char *) table->send_entry + 1, (char *) send_key, table->key_size);
memcpy((char *) table->send_entry + table->key_size + 1, (char *) send_data, table->data_size);
//locking window of target rank with exclusive lock
if (MPI_Win_lock(MPI_LOCK_EXCLUSIVE, dest_rank, 0, table->window) != 0)
return DHT_MPI_ERROR;
for (i = 0; i < table->index_count; i++)
{
if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
//increment collision counter if receiving key doesn't match sending key
//,entry has write flag + last index is reached
if (read_flag(*(char *)table->recv_entry)) {
if (memcmp(send_key, (char *) table->recv_entry + 1, table->key_size) != 0) {
if (i == (table->index_count)-1) {
table->collisions += 1;
table->stats->collisions += 1;
result = DHT_WRITE_SUCCESS_WITH_COLLISION;
break;
}
} else break;
} else {
table->stats->writes_local[dest_rank]++;
break;
}
}
//put data to DHT
if (MPI_Put(table->send_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
//unlock window of target rank
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return result;
}
/*
* gets data from the DHT by key
*
* return DHT_SUCCESS = 0 if success
* DHT_MPI_ERROR = -1 if MPI error occurred
* DHT_READ_ERROR = -2 if receiving key doesn't match sending key
*/
int DHT_read(DHT *table, void* send_key, void* destination) {
unsigned int dest_rank, i;
table->stats->r_access++;
//determine destination rank and index by hash of key
determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, table->table_size, &dest_rank, table->index, table->index_count);
//locking window of target rank with shared lock
if (MPI_Win_lock(MPI_LOCK_SHARED, dest_rank, 0, table->window) != 0) return DHT_MPI_ERROR;
//receive data
for (i = 0; i < table->index_count; i++) {
if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
//increment read error counter if write flag isn't set or key doesn't match passed by key + last index reached
//else copy data to dereference of passed by destination pointer
if ((read_flag(*(char *) table->recv_entry)) == 0) {
table->read_misses += 1;
table->stats->read_misses += 1;
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return DHT_READ_ERROR;
}
if (memcmp(((char*)table->recv_entry) + 1, send_key, table->key_size) != 0) {
if (i == (table->index_count)-1) {
table->read_misses += 1;
table->stats->read_misses += 1;
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return DHT_READ_ERROR;
}
} else break;
}
//unlock window of target rank
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
memcpy((char *) destination, (char *) table->recv_entry + table->key_size + 1, table->data_size);
return DHT_SUCCESS;
}
int DHT_to_file(DHT* table, char* filename) {
//open file
MPI_File file;
if (MPI_File_open(table->communicator, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &file) != 0) return DHT_FILE_IO_ERROR;
int rank;
MPI_Comm_rank(table->communicator, &rank);
//write header (key_size and data_size)
if (rank == 0) {
if (MPI_File_write(file, &table->key_size, 1, MPI_INT, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
if (MPI_File_write(file, &table->data_size, 1, MPI_INT, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
}
if (MPI_File_seek_shared(file, DHT_HEADER_SIZE, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
char* ptr;
int bucket_size = table->key_size + table->data_size + 1;
//iterate over local memory
for (unsigned int i = 0; i < table->table_size; i++) {
ptr = (char *) table->mem_alloc + (i * bucket_size);
//if bucket has been written to (checked by written_flag)...
if (read_flag(*ptr)) {
//write key and data to file
if (MPI_File_write_shared(file, ptr + 1, bucket_size - 1, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
}
}
//close file
if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR;
return DHT_SUCCESS;
}
int DHT_from_file(DHT* table, char* filename) {
MPI_File file;
MPI_Offset f_size;
int e_size, m_size, cur_pos, rank, offset;
char* buffer;
void* key;
void* data;
if (MPI_File_open(table->communicator, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &file) != 0) return DHT_FILE_IO_ERROR;
if (MPI_File_get_size(file, &f_size) != 0) return DHT_FILE_IO_ERROR;
MPI_Comm_rank(table->communicator, &rank);
e_size = table->key_size + table->data_size;
m_size = e_size > DHT_HEADER_SIZE ? e_size : DHT_HEADER_SIZE;
buffer = (char *) malloc(m_size);
if (MPI_File_read(file, buffer, DHT_HEADER_SIZE, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_READ_ERROR;
if (*(int *) buffer != table->key_size) return DHT_WRONG_FILE;
if (*(int *) (buffer + 4) != table->data_size) return DHT_WRONG_FILE;
offset = e_size*table->comm_size;
if (MPI_File_seek(file, DHT_HEADER_SIZE, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
cur_pos = DHT_HEADER_SIZE + (rank * e_size);
while(cur_pos < f_size) {
if (MPI_File_seek(file, cur_pos, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
MPI_Offset tmp;
MPI_File_get_position(file, &tmp);
if (MPI_File_read(file, buffer, e_size, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_READ_ERROR;
key = buffer;
data = (buffer+table->key_size);
if (DHT_write(table, key, data) == DHT_MPI_ERROR) return DHT_MPI_ERROR;
cur_pos += offset;
}
free (buffer);
if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR;
return DHT_SUCCESS;
}
/*
* frees up memory and accumulate counter
*/
int DHT_free(DHT* table, int* collision_counter, int* readerror_counter) {
int buf;
if (collision_counter != NULL) {
buf = 0;
if (MPI_Reduce(&table->collisions, &buf, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
*collision_counter = buf;
}
if (readerror_counter != NULL) {
buf = 0;
if (MPI_Reduce(&table->read_misses, &buf, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
*readerror_counter = buf;
}
if (MPI_Win_free(&(table->window)) != 0) return DHT_MPI_ERROR;
if (MPI_Free_mem(table->mem_alloc) != 0) return DHT_MPI_ERROR;
free(table->recv_entry);
free(table->send_entry);
free(table->index);
free(table->stats->writes_local);
free(table->stats);
free(table);
return DHT_SUCCESS;
}
/*
* prints a table with statistics about current use of DHT
* for each participating process and summed up results containing:
* 1. occupied buckets (in respect to the memory of this process)
* 2. free buckets (in respect to the memory of this process)
* 3. calls of DHT_write (w_access)
* 4. calls of DHT_read (r_access)
* 5. read misses (see DHT_READ_ERROR)
* 6. collisions (see DHT_WRITE_SUCCESS_WITH_COLLISION)
* 3-6 will reset with every call of this function
* finally the amount of new written entries is printed out (in relation to last call of this funtion)
*/
int DHT_print_statistics(DHT *table) {
int *written_buckets;
int *read_misses, sum_read_misses;
int *collisions, sum_collisions;
int sum_w_access, sum_r_access, *w_access, *r_access;
int rank;
MPI_Comm_rank(table->communicator, &rank);
//disable possible warning of unitialized variable, which is not the case
//since we're using MPI_Gather to obtain all values only on rank 0
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
//obtaining all values from all processes in the communicator
if (rank == 0) read_misses = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->read_misses, 1, MPI_INT, read_misses, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->read_misses, &sum_read_misses, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->read_misses = 0;
if (rank == 0) collisions = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->collisions, 1, MPI_INT, collisions, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->collisions, &sum_collisions, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->collisions = 0;
if (rank == 0) w_access = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->w_access, 1, MPI_INT, w_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->w_access, &sum_w_access, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->w_access = 0;
if (rank == 0) r_access = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->r_access, 1, MPI_INT, r_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->r_access, &sum_r_access, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->r_access = 0;
if (rank == 0) written_buckets = (int*) calloc(table->comm_size, sizeof(int));
if (MPI_Reduce(table->stats->writes_local, written_buckets, table->comm_size, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (rank == 0) { //only process with rank 0 will print out results as a table
int sum_written_buckets = 0;
for (int i=0; i < table->comm_size; i++) {
sum_written_buckets += written_buckets[i];
}
int members = 7;
int padsize = (members*12)+1;
char pad[padsize+1];
memset(pad, '-', padsize*sizeof(char));
pad[padsize]= '\0';
printf("\n");
printf("%-35s||resets with every call of this function\n", " ");
printf("%-11s|%-11s|%-11s||%-11s|%-11s|%-11s|%-11s\n",
"rank",
"occupied",
"free",
"w_access",
"r_access",
"read misses",
"collisions");
printf("%s\n", pad);
for (int i = 0; i < table->comm_size; i++) {
printf("%-11d|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n",
i,
written_buckets[i],
table->table_size-written_buckets[i],
w_access[i],
r_access[i],
read_misses[i],
collisions[i]);
}
printf("%s\n", pad);
printf("%-11s|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n",
"sum",
sum_written_buckets,
(table->table_size*table->comm_size)-sum_written_buckets,
sum_w_access,
sum_r_access,
sum_read_misses,
sum_collisions);
printf("%s\n", pad);
printf("%s %d\n",
"new entries:",
sum_written_buckets - table->stats->old_writes);
printf("\n");
fflush(stdout);
table->stats->old_writes = sum_written_buckets;
}
//enable warning again
#pragma GCC diagnostic pop
MPI_Barrier(table->communicator);
return DHT_SUCCESS;
}

112
src/DHT.h
View File

@ -1,112 +0,0 @@
/*
* File: DHT.h
* Author: max luebke
*
* Created on 16. November 2017, 09:14
*/
#ifndef DHT_H
#define DHT_H
#include <mpi.h>
#include <stdint.h>
#define DHT_MPI_ERROR -1
#define DHT_READ_ERROR -2
#define DHT_SUCCESS 0
#define DHT_WRITE_SUCCESS_WITH_COLLISION 1
#define DHT_WRONG_FILE 11
#define DHT_FILE_IO_ERROR 12
#define DHT_FILE_READ_ERROR 13
#define DHT_FILE_WRITE_ERROR 14
#define DHT_HEADER_SIZE 8
typedef struct {;
int *writes_local, old_writes;
int read_misses, collisions;
int w_access, r_access;
} DHT_stats;
typedef struct {
MPI_Win window;
int data_size;
int key_size;
unsigned int table_size;
MPI_Comm communicator;
int comm_size;
uint64_t(*hash_func) (int, void*);
void* recv_entry;
void* send_entry;
void* mem_alloc;
int read_misses;
int collisions;
unsigned int *index;
unsigned int index_count;
DHT_stats *stats;
} DHT;
/*
* parameters:
* MPI_Comm comm - communicator of processes that are holding the DHT
* int size_per_process - number of buckets each process will create
* int data_size - size of data in bytes
* int key_size - size of key in bytes
* *hash_func - pointer to hashfunction
*
* return:
* NULL if error during initialization
* DHT* if success
*/
extern DHT* DHT_create(MPI_Comm comm, unsigned int size_per_process, int data_size, int key_size, uint64_t(*hash_func)(int, void*));
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
* void* data - pointer to data
* void* - pointer to key
*
* return:
* error value (see above)
*/
extern int DHT_write(DHT *table, void* key, void* data);
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
* void* key - pointer to key
* void* destination - pointer which will hold the resulting data from DHT
*
* return:
* error value (see above)
*/
extern int DHT_read(DHT *table, void* key, void* destination);
extern int DHT_to_file(DHT *table, char* filename);
extern int DHT_from_file(DHT *table, char* filename);
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
* int* collision_counter - pointer which will hold the total count of collisions
* int* readerror_counter - pointer which will hold the total count of read errors
*
* return:
* error value (see above)
*/
extern int DHT_free(DHT *table, int* collision_counter, int* readerror_counter);
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
*
* return:
* error value (DHT_SUCCESS or DHT_MPI_ERROR)
*/
extern int DHT_print_statistics(DHT *table);
#endif /* DHT_H */

17
src/DHT/CMakeLists.txt Normal file
View File

@ -0,0 +1,17 @@
add_library(DHT DHT.c DHT.h)
target_include_directories(DHT PUBLIC ${MPI_C_INCLUDE_DIRS})
target_link_libraries(DHT PRIVATE MPI::MPI_C)
target_compile_definitions(DHT PUBLIC OMPI_SKIP_MPICXX)
option(DHT_Debug "Toggle output of stastic table for each iteration" OFF)
if (DHT_Debug)
target_compile_definitions(DHT PUBLIC DHT_STATISTICS)
endif()
find_library(MATH_LIBRARY m)
find_library(CRYPTO_LIBRARY crypto)
add_library(DHT_Wrapper DHT_Wrapper.cpp DHT_Wrapper.h)
target_include_directories(DHT_Wrapper PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(DHT_Wrapper PRIVATE DHT ${MATH_LIBRARY} ${CRYPTO_LIBRARY} POET_Util)

497
src/DHT/DHT.c Normal file
View File

@ -0,0 +1,497 @@
/*
** Copyright (C) 2017-2021 Max Luebke (University of Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "DHT.h"
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static void determine_dest(uint64_t hash, int comm_size,
unsigned int table_size, unsigned int *dest_rank,
uint64_t *index, unsigned int index_count) {
/** temporary index */
uint64_t tmp_index;
/** how many bytes do we need for one index? */
int index_size = sizeof(double) - (index_count - 1);
for (int i = 0; i < index_count; i++) {
tmp_index = 0;
memcpy(&tmp_index, (char *)&hash + i, index_size);
index[i] = (uint64_t)(tmp_index % table_size);
}
*dest_rank = (unsigned int)(hash % comm_size);
}
static void set_flag(char *flag_byte) {
*flag_byte = 0;
*flag_byte |= (1 << 0);
}
static int read_flag(char flag_byte) {
if ((flag_byte & 0x01) == 0x01) {
return 1;
} else
return 0;
}
DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size,
unsigned int key_size, uint64_t (*hash_func)(int, void *)) {
DHT *object;
MPI_Win window;
void *mem_alloc;
int comm_size, index_bytes;
// calculate how much bytes for the index are needed to address count of
// buckets per process
index_bytes = (int)ceil(log2(size));
if (index_bytes % 8 != 0) index_bytes = index_bytes + (8 - (index_bytes % 8));
// allocate memory for dht-object
object = (DHT *)malloc(sizeof(DHT));
if (object == NULL) return NULL;
// every memory allocation has 1 additional byte for flags etc.
if (MPI_Alloc_mem(size * (1 + data_size + key_size), MPI_INFO_NULL,
&mem_alloc) != 0)
return NULL;
if (MPI_Comm_size(comm, &comm_size) != 0) return NULL;
// since MPI_Alloc_mem doesn't provide memory allocation with the memory set
// to zero, we're doing this here
memset(mem_alloc, '\0', size * (1 + data_size + key_size));
// create windows on previously allocated memory
if (MPI_Win_create(mem_alloc, size * (1 + data_size + key_size),
(1 + data_size + key_size), MPI_INFO_NULL, comm,
&window) != 0)
return NULL;
// fill dht-object
object->data_size = data_size;
object->key_size = key_size;
object->table_size = size;
object->window = window;
object->hash_func = hash_func;
object->comm_size = comm_size;
object->communicator = comm;
object->read_misses = 0;
object->evictions = 0;
object->recv_entry = malloc(1 + data_size + key_size);
object->send_entry = malloc(1 + data_size + key_size);
object->index_count = 9 - (index_bytes / 8);
object->index = (uint64_t *)malloc((object->index_count) * sizeof(uint64_t));
object->mem_alloc = mem_alloc;
// if set, initialize dht_stats
#ifdef DHT_STATISTICS
DHT_stats *stats;
stats = (DHT_stats *)malloc(sizeof(DHT_stats));
if (stats == NULL) return NULL;
object->stats = stats;
object->stats->writes_local = (int *)calloc(comm_size, sizeof(int));
object->stats->old_writes = 0;
object->stats->read_misses = 0;
object->stats->evictions = 0;
object->stats->w_access = 0;
object->stats->r_access = 0;
#endif
return object;
}
int DHT_write(DHT *table, void *send_key, void *send_data) {
unsigned int dest_rank, i;
int result = DHT_SUCCESS;
#ifdef DHT_STATISTICS
table->stats->w_access++;
#endif
// determine destination rank and index by hash of key
determine_dest(table->hash_func(table->key_size, send_key), table->comm_size,
table->table_size, &dest_rank, table->index,
table->index_count);
// concatenating key with data to write entry to DHT
set_flag((char *)table->send_entry);
memcpy((char *)table->send_entry + 1, (char *)send_key, table->key_size);
memcpy((char *)table->send_entry + table->key_size + 1, (char *)send_data,
table->data_size);
// locking window of target rank with exclusive lock
if (MPI_Win_lock(MPI_LOCK_EXCLUSIVE, dest_rank, 0, table->window) != 0)
return DHT_MPI_ERROR;
for (i = 0; i < table->index_count; i++) {
if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size,
MPI_BYTE, dest_rank, table->index[i],
1 + table->data_size + table->key_size, MPI_BYTE,
table->window) != 0)
return DHT_MPI_ERROR;
if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
// increment eviction counter if receiving key doesn't match sending key
// entry has write flag and last index is reached.
if (read_flag(*(char *)table->recv_entry)) {
if (memcmp(send_key, (char *)table->recv_entry + 1, table->key_size) !=
0) {
if (i == (table->index_count) - 1) {
table->evictions += 1;
#ifdef DHT_STATISTICS
table->stats->evictions += 1;
#endif
result = DHT_WRITE_SUCCESS_WITH_EVICTION;
break;
}
} else
break;
} else {
#ifdef DHT_STATISTICS
table->stats->writes_local[dest_rank]++;
#endif
break;
}
}
// put data to DHT (with last selected index by value i)
if (MPI_Put(table->send_entry, 1 + table->data_size + table->key_size,
MPI_BYTE, dest_rank, table->index[i],
1 + table->data_size + table->key_size, MPI_BYTE,
table->window) != 0)
return DHT_MPI_ERROR;
// unlock window of target rank
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return result;
}
int DHT_read(DHT *table, void *send_key, void *destination) {
unsigned int dest_rank, i;
#ifdef DHT_STATISTICS
table->stats->r_access++;
#endif
// determine destination rank and index by hash of key
determine_dest(table->hash_func(table->key_size, send_key), table->comm_size,
table->table_size, &dest_rank, table->index,
table->index_count);
// locking window of target rank with shared lock
if (MPI_Win_lock(MPI_LOCK_SHARED, dest_rank, 0, table->window) != 0)
return DHT_MPI_ERROR;
// receive data
for (i = 0; i < table->index_count; i++) {
if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size,
MPI_BYTE, dest_rank, table->index[i],
1 + table->data_size + table->key_size, MPI_BYTE,
table->window) != 0)
return DHT_MPI_ERROR;
if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
// increment read error counter if write flag isn't set ...
if ((read_flag(*(char *)table->recv_entry)) == 0) {
table->read_misses += 1;
#ifdef DHT_STATISTICS
table->stats->read_misses += 1;
#endif
// unlock window and return
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return DHT_READ_MISS;
}
// ... or key doesn't match passed by key and last index reached.
if (memcmp(((char *)table->recv_entry) + 1, send_key, table->key_size) !=
0) {
if (i == (table->index_count) - 1) {
table->read_misses += 1;
#ifdef DHT_STATISTICS
table->stats->read_misses += 1;
#endif
// unlock window an return
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return DHT_READ_MISS;
}
} else
break;
}
// unlock window of target rank
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
// if matching key was found copy data into memory of passed pointer
memcpy((char *)destination, (char *)table->recv_entry + table->key_size + 1,
table->data_size);
return DHT_SUCCESS;
}
int DHT_to_file(DHT *table, const char *filename) {
// open file
MPI_File file;
if (MPI_File_open(table->communicator, filename,
MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL,
&file) != 0)
return DHT_FILE_IO_ERROR;
int rank;
MPI_Comm_rank(table->communicator, &rank);
// write header (key_size and data_size)
if (rank == 0) {
if (MPI_File_write(file, &table->key_size, 1, MPI_INT, MPI_STATUS_IGNORE) !=
0)
return DHT_FILE_WRITE_ERROR;
if (MPI_File_write(file, &table->data_size, 1, MPI_INT,
MPI_STATUS_IGNORE) != 0)
return DHT_FILE_WRITE_ERROR;
}
// seek file pointer behind header for all processes
if (MPI_File_seek_shared(file, DHT_FILEHEADER_SIZE, MPI_SEEK_SET) != 0)
return DHT_FILE_IO_ERROR;
char *ptr;
int bucket_size = table->key_size + table->data_size + 1;
// iterate over local memory
for (unsigned int i = 0; i < table->table_size; i++) {
ptr = (char *)table->mem_alloc + (i * bucket_size);
// if bucket has been written to (checked by written_flag)...
if (read_flag(*ptr)) {
// write key and data to file
if (MPI_File_write_shared(file, ptr + 1, bucket_size - 1, MPI_BYTE,
MPI_STATUS_IGNORE) != 0)
return DHT_FILE_WRITE_ERROR;
}
}
// close file
if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR;
return DHT_SUCCESS;
}
int DHT_from_file(DHT *table, const char *filename) {
MPI_File file;
MPI_Offset f_size;
int bucket_size, buffer_size, cur_pos, rank, offset;
char *buffer;
void *key;
void *data;
// open file
if (MPI_File_open(table->communicator, filename, MPI_MODE_RDONLY,
MPI_INFO_NULL, &file) != 0)
return DHT_FILE_IO_ERROR;
// get file size
if (MPI_File_get_size(file, &f_size) != 0) return DHT_FILE_IO_ERROR;
MPI_Comm_rank(table->communicator, &rank);
// calculate bucket size
bucket_size = table->key_size + table->data_size;
// buffer size is either bucket size or, if bucket size is smaller than the
// file header, the size of DHT_FILEHEADER_SIZE
buffer_size =
bucket_size > DHT_FILEHEADER_SIZE ? bucket_size : DHT_FILEHEADER_SIZE;
// allocate buffer
buffer = (char *)malloc(buffer_size);
// read file header
if (MPI_File_read(file, buffer, DHT_FILEHEADER_SIZE, MPI_BYTE,
MPI_STATUS_IGNORE) != 0)
return DHT_FILE_READ_ERROR;
// compare if written header data and key size matches current sizes
if (*(int *)buffer != table->key_size) return DHT_WRONG_FILE;
if (*(int *)(buffer + 4) != table->data_size) return DHT_WRONG_FILE;
// set offset for each process
offset = bucket_size * table->comm_size;
// seek behind header of DHT file
if (MPI_File_seek(file, DHT_FILEHEADER_SIZE, MPI_SEEK_SET) != 0)
return DHT_FILE_IO_ERROR;
// current position is rank * bucket_size + OFFSET
cur_pos = DHT_FILEHEADER_SIZE + (rank * bucket_size);
// loop over file and write data to DHT with DHT_write
while (cur_pos < f_size) {
if (MPI_File_seek(file, cur_pos, MPI_SEEK_SET) != 0)
return DHT_FILE_IO_ERROR;
// TODO: really necessary?
MPI_Offset tmp;
MPI_File_get_position(file, &tmp);
if (MPI_File_read(file, buffer, bucket_size, MPI_BYTE, MPI_STATUS_IGNORE) !=
0)
return DHT_FILE_READ_ERROR;
// extract key and data and write to DHT
key = buffer;
data = (buffer + table->key_size);
if (DHT_write(table, key, data) == DHT_MPI_ERROR) return DHT_MPI_ERROR;
// increment current position
cur_pos += offset;
}
free(buffer);
if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR;
return DHT_SUCCESS;
}
int DHT_free(DHT *table, int *eviction_counter, int *readerror_counter) {
int buf;
if (eviction_counter != NULL) {
buf = 0;
if (MPI_Reduce(&table->evictions, &buf, 1, MPI_INT, MPI_SUM, 0,
table->communicator) != 0)
return DHT_MPI_ERROR;
*eviction_counter = buf;
}
if (readerror_counter != NULL) {
buf = 0;
if (MPI_Reduce(&table->read_misses, &buf, 1, MPI_INT, MPI_SUM, 0,
table->communicator) != 0)
return DHT_MPI_ERROR;
*readerror_counter = buf;
}
if (MPI_Win_free(&(table->window)) != 0) return DHT_MPI_ERROR;
if (MPI_Free_mem(table->mem_alloc) != 0) return DHT_MPI_ERROR;
free(table->recv_entry);
free(table->send_entry);
free(table->index);
#ifdef DHT_STATISTICS
free(table->stats->writes_local);
free(table->stats);
#endif
free(table);
return DHT_SUCCESS;
}
int DHT_print_statistics(DHT *table) {
#ifdef DHT_STATISTICS
int *written_buckets;
int *read_misses, sum_read_misses;
int *evictions, sum_evictions;
int sum_w_access, sum_r_access, *w_access, *r_access;
int rank;
MPI_Comm_rank(table->communicator, &rank);
// disable possible warning of unitialized variable, which is not the case
// since we're using MPI_Gather to obtain all values only on rank 0
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
// obtaining all values from all processes in the communicator
if (rank == 0) read_misses = (int *)malloc(table->comm_size * sizeof(int));
if (MPI_Gather(&table->stats->read_misses, 1, MPI_INT, read_misses, 1,
MPI_INT, 0, table->communicator) != 0)
return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->read_misses, &sum_read_misses, 1, MPI_INT,
MPI_SUM, 0, table->communicator) != 0)
return DHT_MPI_ERROR;
table->stats->read_misses = 0;
if (rank == 0) evictions = (int *)malloc(table->comm_size * sizeof(int));
if (MPI_Gather(&table->stats->evictions, 1, MPI_INT, evictions, 1, MPI_INT, 0,
table->communicator) != 0)
return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->evictions, &sum_evictions, 1, MPI_INT, MPI_SUM,
0, table->communicator) != 0)
return DHT_MPI_ERROR;
table->stats->evictions = 0;
if (rank == 0) w_access = (int *)malloc(table->comm_size * sizeof(int));
if (MPI_Gather(&table->stats->w_access, 1, MPI_INT, w_access, 1, MPI_INT, 0,
table->communicator) != 0)
return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->w_access, &sum_w_access, 1, MPI_INT, MPI_SUM, 0,
table->communicator) != 0)
return DHT_MPI_ERROR;
table->stats->w_access = 0;
if (rank == 0) r_access = (int *)malloc(table->comm_size * sizeof(int));
if (MPI_Gather(&table->stats->r_access, 1, MPI_INT, r_access, 1, MPI_INT, 0,
table->communicator) != 0)
return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->r_access, &sum_r_access, 1, MPI_INT, MPI_SUM, 0,
table->communicator) != 0)
return DHT_MPI_ERROR;
table->stats->r_access = 0;
if (rank == 0) written_buckets = (int *)calloc(table->comm_size, sizeof(int));
if (MPI_Reduce(table->stats->writes_local, written_buckets, table->comm_size,
MPI_INT, MPI_SUM, 0, table->communicator) != 0)
return DHT_MPI_ERROR;
if (rank == 0) { // only process with rank 0 will print out results as a
// table
int sum_written_buckets = 0;
for (int i = 0; i < table->comm_size; i++) {
sum_written_buckets += written_buckets[i];
}
int members = 7;
int padsize = (members * 12) + 1;
char pad[padsize + 1];
memset(pad, '-', padsize * sizeof(char));
pad[padsize] = '\0';
printf("\n");
printf("%-35s||resets with every call of this function\n", " ");
printf("%-11s|%-11s|%-11s||%-11s|%-11s|%-11s|%-11s\n", "rank", "occupied",
"free", "w_access", "r_access", "read misses", "evictions");
printf("%s\n", pad);
for (int i = 0; i < table->comm_size; i++) {
printf("%-11d|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n", i,
written_buckets[i], table->table_size - written_buckets[i],
w_access[i], r_access[i], read_misses[i], evictions[i]);
}
printf("%s\n", pad);
printf("%-11s|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n", "sum",
sum_written_buckets,
(table->table_size * table->comm_size) - sum_written_buckets,
sum_w_access, sum_r_access, sum_read_misses, sum_evictions);
printf("%s\n", pad);
printf("%s %d\n",
"new entries:", sum_written_buckets - table->stats->old_writes);
printf("\n");
fflush(stdout);
table->stats->old_writes = sum_written_buckets;
}
// enable warning again
#pragma GCC diagnostic pop
MPI_Barrier(table->communicator);
return DHT_SUCCESS;
#endif
}

312
src/DHT/DHT.h Normal file
View File

@ -0,0 +1,312 @@
/*
** Copyright (C) 2017-2021 Max Luebke (University of Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/**
* @file DHT.h
* @author Max Lübke (mluebke@uni-potsdam.de)
* @brief API to interact with the DHT
* @version 0.1
* @date 16 Nov 2017
*
* This file implements the creation of a DHT by using the MPI
* one-sided-communication. There is also the possibility to write or read data
* from or to the DHT. In addition, the current state of the DHT can be written
* to a file and read in again later.
*/
#ifndef DHT_H
#define DHT_H
#include <mpi.h>
#include <stdint.h>
/** Returned if some error in MPI routine occurs. */
#define DHT_MPI_ERROR -1
/** Returned by a call of DHT_read if no bucket with given key was found. */
#define DHT_READ_MISS -2
/** Returned by DHT_write if a bucket was evicted. */
#define DHT_WRITE_SUCCESS_WITH_EVICTION -3
/** Returned when no errors occured. */
#define DHT_SUCCESS 0
/** Returned by DHT_from_file if the given file does not match expected file. */
#define DHT_WRONG_FILE -11
/** Returned by DHT file operations if MPI file operation fails. */
#define DHT_FILE_IO_ERROR -12
/** Returned by DHT file operations if error occured in MPI_Read operation. */
#define DHT_FILE_READ_ERROR -13
/** Returned by DHT file operations if error occured in MPI_Write operation. */
#define DHT_FILE_WRITE_ERROR -14
/** Size of the file header in byte. */
#define DHT_FILEHEADER_SIZE 8
/**
* Internal struct to store statistics about read and write accesses and also
* read misses and evictions.
* <b>All values will be resetted to zero after a call of
* DHT_print_statistics().</b>
* Internal use only!
*
* @todo There's maybe a better solution than DHT_print_statistics and this
* struct
*/
typedef struct {
/** Count of writes to specific process this process did. */
int* writes_local;
/** Writes after last call of DHT_print_statistics. */
int old_writes;
/** How many read misses occur? */
int read_misses;
/** How many buckets where evicted? */
int evictions;
/** How many calls of DHT_write() did this process? */
int w_access;
/** How many calls of DHT_read() did this process? */
int r_access;
} DHT_stats;
/**
* Struct which serves as a handler or so called \a DHT-object. Will
* be created by DHT_create and must be passed as a parameter to every following
* function. Stores all relevant data.
* Do not touch outside DHT functions!
*/
typedef struct {
/** Created MPI Window, which serves as the DHT memory area of the process. */
MPI_Win window;
/** Size of the data of a bucket entry in byte. */
int data_size;
/** Size of the key of a bucket entry in byte. */
int key_size;
/** Count of buckets for each process. */
unsigned int table_size;
/** MPI communicator of all participating processes. */
MPI_Comm communicator;
/** Size of the MPI communicator respectively all participating processes. */
int comm_size;
/** Pointer to a hashfunction. */
uint64_t (*hash_func)(int, void*);
/** Pre-allocated memory where a bucket can be received. */
void* recv_entry;
/** Pre-allocated memory where a bucket to send can be stored. */
void* send_entry;
/** Allocated memory on which the MPI window was created. */
void* mem_alloc;
/** Count of read misses over all time. */
int read_misses;
/** Count of evictions over all time. */
int evictions;
/** Array of indeces where a bucket can be stored. */
uint64_t* index;
/** Count of possible indeces. */
unsigned int index_count;
#ifdef DHT_STATISTICS
/** Detailed statistics of the usage of the DHT. */
DHT_stats* stats;
#endif
} DHT;
/**
* @brief Create a DHT.
*
* When calling this function, the required memory is allocated and a
* MPI_Window is created. This allows the execution of MPI_Get and
* MPI_Put operations for one-sided communication. Then the number of
* indexes is calculated and finally all relevant data is entered into the
* \a DHT-object which is returned.
*
* @param comm MPI communicator which addresses all participating process of the
* DHT.
* @param size_per_process Number of buckets per process.
* @param data_size Size of data in byte.
* @param key_size Size of the key in byte.
* @param hash_func Pointer to a hash function. This function must take the size
* of the key and a pointer to the key as input parameters and return a 64 bit
* hash.
* @return DHT* The returned value is the \a DHT-object which serves as a handle
* for all DHT operations. If an error occured NULL is returned.
*/
extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process,
unsigned int data_size, unsigned int key_size,
uint64_t (*hash_func)(int, void*));
/**
* @brief Write data into DHT.
*
* When DHT_write is called, the address window is locked with a
* LOCK_EXCLUSIVE for write access. Now the first bucket is received
* using MPI_Get and it is checked if the bucket is empty or if the received key
* matches the passed key. If this is the case, the data of the bucket is
* overwritten with the new value. If not, the function continues with the next
* index until no more indexes are available. When the last index is reached and
* there are no more indexes available, the last examined bucket is replaced.
* After successful writing, the memory window is released and the function
* returns.
*
* @param table Pointer to the \a DHT-object.
* @param key Pointer to the key.
* @param data Pointer to the data.
* @return int Returns either DHT_SUCCESS on success or correspondending error
* value on eviction or error.
*/
extern int DHT_write(DHT* table, void* key, void* data);
/**
* @brief Read data from DHT.
*
* At the beginning, the target process and all possible indices are determined.
* After that a SHARED lock on the address window for read access is done
* and the first entry is retrieved. Now the received key is compared
* with the key passed to the function. If they coincide the correct data
* was found. If not it continues with the next index. If the last
* possible bucket is reached and the keys still do not match the read
* error counter is incremented. After the window has been released
* again, the function returns with a corresponding return value (read
* error or error-free read). The data to be read out is also written to
* the memory area of the passed pointer.
*
* @param table Pointer to the \a DHT-object.
* @param key Pointer to the key.
* @param destination Pointer to memory area where retreived data should be
* stored.
* @return int Returns either DHT_SUCCESS on success or correspondending error
* value on read miss or error.
*/
extern int DHT_read(DHT* table, void* key, void* destination);
/**
* @brief Write current state of DHT to file.
*
* All contents are written as a memory dump, so that no conversion takes place.
* First, an attempt is made to open or create a file. If this is successful the
* file header consisting of data and key size is written. Then each process
* reads its memory area of the DHT and each bucket that was marked as written
* is added to the file using MPI file operations.
*
* @param table Pointer to the \a DHT-object.
* @param filename Name of the file to write to.
* @return int Returns DHT_SUCCESS on succes, DHT_FILE_IO_ERROR if file can't be
* opened/closed or DHT_WRITE_ERROR if file is not writable.
*/
extern int DHT_to_file(DHT* table, const char* filename);
/**
* @brief Read state of DHT from file.
*
* One needs a previously written DHT file (by DHT_from_file).
* First of all, an attempt is made to open the specified file. If this is
* succeeded the file header is read and compared with the current values of the
* DHT. If the data and key sizes do not differ, one can continue. Each process
* reads one line of the file and writes it to the DHT with DHT_write. This
* happens until no more lines are left. The writing is done by the
* implementation of DHT_write.
*
* @param table Pointer to the \a DHT-object.
* @param filename Name of the file to read from.
* @return int Returns DHT_SUCCESS on succes, DHT_FILE_IO_ERROR if file can't be
* opened/closed, DHT_READ_MISS if file is not readable or DHT_WRONG_FILE if
* file doesn't match expectation. This is possible if the data size or key size
* is different.
*/
extern int DHT_from_file(DHT* table, const char* filename);
/**
* @brief Free ressources of DHT.
*
* Finally, to free all resources after using the DHT, the function
* DHT_free must be used. This will free the MPI\_Window, as well as the
* associated memory. Also all internal variables are released. Optionally, the
* count of evictions and read misses can also be obtained.
*
* @param table Pointer to the \a DHT-object.
* @param eviction_counter \a optional: Pointer to integer where the count of
* evictions should be stored.
* @param readerror_counter \a optional: Pointer to integer where the count of
* read errors should be stored.
* @return int Returns either DHT_SUCCESS on success or DHT_MPI_ERROR on
* internal MPI error.
*/
extern int DHT_free(DHT* table, int* eviction_counter, int* readerror_counter);
/**
* @brief Prints a table with statistics about current use of DHT.
*
* These statistics are from each participated process and also summed up over
* all processes. Detailed statistics are:
* -# occupied buckets (in respect to the memory of this process)
* -# free buckets (in respect to the memory of this process)
* -# calls of DHT_write (w_access)
* -# calls of DHT_read (r_access)
* -# read misses (see DHT_READ_MISS)
* -# collisions (see DHT_WRITE_SUCCESS_WITH_EVICTION)
* 3-6 will reset with every call of this function finally the amount of new
* written entries is printed out (since the last call of this funtion).
*
* This is done by collective MPI operations with the root process with rank 0,
* which will also print a table with all informations to stdout.
*
* Also, as this function was implemented for a special case (POET project) one
* need to define DHT_STATISTICS to the compiler macros to use this
* function (eg. <emph>gcc -DDHT_STATISTICS ... </emph>).
* @param table Pointer to the \a DHT-object.
* @return int Returns DHT_SUCCESS on success or DHT_MPI_ERROR on internal MPI
* error.
*/
extern int DHT_print_statistics(DHT* table);
/**
* @brief Determine destination rank and index.
*
* This is done by looping over all possbile indices. First of all, set a
* temporary index to zero and copy count of bytes for each index into the
* memory area of the temporary index. After that the current index is
* calculated by the temporary index modulo the table size. The destination rank
* of the process is simply determined by hash modulo the communicator size.
*
* @param hash Calculated 64 bit hash.
* @param comm_size Communicator size.
* @param table_size Count of buckets per process.
* @param dest_rank Reference to the destination rank variable.
* @param index Pointer to the array index.
* @param index_count Count of possible indeces.
*/
static void determine_dest(uint64_t hash, int comm_size,
unsigned int table_size, unsigned int* dest_rank,
uint64_t* index, unsigned int index_count);
/**
* @brief Set the occupied flag.
*
* This will set the first bit of a bucket to 1.
*
* @param flag_byte First byte of a bucket.
*/
static void set_flag(char* flag_byte);
/**
* @brief Get the occupied flag.
*
* This function determines whether the occupied flag of a bucket was set or
* not.
*
* @param flag_byte First byte of a bucket.
* @return int Returns 1 for true or 0 for false.
*/
static int read_flag(char flag_byte);
#endif /* DHT_H */

232
src/DHT/DHT_Wrapper.cpp Normal file
View File

@ -0,0 +1,232 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "DHT_Wrapper.h"
#include <math.h>
#include <openssl/md5.h>
#include <iostream>
using namespace poet;
using namespace std;
uint64_t poet::get_md5(int key_size, void *key) {
MD5_CTX ctx;
unsigned char sum[MD5_DIGEST_LENGTH];
uint64_t retval, *v1, *v2;
// calculate md5 using MD5 functions
MD5_Init(&ctx);
MD5_Update(&ctx, key, key_size);
MD5_Final(sum, &ctx);
// divide hash in 2 64 bit parts and XOR them
v1 = (uint64_t *)&sum[0];
v2 = (uint64_t *)&sum[8];
retval = *v1 ^ *v2;
return retval;
}
DHT_Wrapper::DHT_Wrapper(SimParams &params, MPI_Comm dht_comm,
int buckets_per_process, int data_size, int key_size) {
// initialize DHT object
dht_object =
DHT_create(dht_comm, buckets_per_process, data_size, key_size, &get_md5);
// allocate memory for fuzzing buffer
fuzzing_buffer = (double *)malloc(key_size);
// extract needed values from sim_param struct
t_simparams tmp = params.getNumParams();
this->dt_differ = tmp.dt_differ;
this->dht_log = tmp.dht_log;
this->dht_signif_vector = params.getDHTSignifVector();
this->dht_prop_type_vector = params.getDHTPropTypeVector();
}
DHT_Wrapper::~DHT_Wrapper() {
// free DHT
DHT_free(dht_object, NULL, NULL);
// free fuzzing buffer
free(fuzzing_buffer);
}
void DHT_Wrapper::checkDHT(int length, std::vector<bool> &out_result_index,
double *work_package, double dt) {
void *key;
int res;
// var count -> count of variables per grid cell
int var_count = dht_prop_type_vector.size();
// loop over every grid cell contained in work package
for (int i = 0; i < length; i++) {
// point to current grid cell
key = (void *)&(work_package[i * var_count]);
// fuzz data (round, logarithm etc.)
fuzzForDHT(var_count, key, dt);
// overwrite input with data from DHT, IF value is found in DHT
res = DHT_read(dht_object, fuzzing_buffer, key);
// if DHT_SUCCESS value was found ...
if (res == DHT_SUCCESS) {
// ... and grid cell will be marked as 'not to be simulating'
out_result_index[i] = false;
dht_hits++;
}
// ... otherwise ...
else if (res == DHT_READ_MISS) {
// grid cell needs to be simulated by PHREEQC
out_result_index[i] = true;
dht_miss++;
} else {
// MPI ERROR ... WHAT TO DO NOW?
// RUNNING CIRCLES WHILE SCREAMING
}
}
}
void DHT_Wrapper::fillDHT(int length, std::vector<bool> &result_index,
double *work_package, double *results, double dt) {
void *key;
void *data;
int res;
// var count -> count of variables per grid cell
int var_count = dht_prop_type_vector.size();
// loop over every grid cell contained in work package
for (int i = 0; i < length; i++) {
key = (void *)&(work_package[i * var_count]);
data = (void *)&(results[i * var_count]);
// If true grid cell was simulated, needs to be inserted into dht
if (result_index[i]) {
// fuzz data (round, logarithm etc.)
fuzzForDHT(var_count, key, dt);
// insert simulated data with fuzzed key into DHT
res = DHT_write(dht_object, fuzzing_buffer, data);
// if data was successfully written ...
if (res != DHT_SUCCESS) {
// ... also check if a previously written value was evicted
if (res == DHT_WRITE_SUCCESS_WITH_EVICTION) {
// and increment internal eviciton counter
dht_evictions++;
} else {
// MPI ERROR ... WHAT TO DO NOW?
// RUNNING CIRCLES WHILE SCREAMING
}
}
}
}
}
int DHT_Wrapper::tableToFile(const char *filename) {
int res = DHT_to_file(dht_object, filename);
return res;
}
int DHT_Wrapper::fileToTable(const char *filename) {
int res = DHT_from_file(dht_object, filename);
if (res != DHT_SUCCESS) return res;
#ifdef DHT_STATISTICS
DHT_print_statistics(dht_object);
#endif
return DHT_SUCCESS;
}
void DHT_Wrapper::printStatistics() {
int res;
res = DHT_print_statistics(dht_object);
if (res != DHT_SUCCESS) {
// MPI ERROR ... WHAT TO DO NOW?
// RUNNING CIRCLES WHILE SCREAMING
}
}
uint64_t DHT_Wrapper::getHits() { return this->dht_hits; }
uint64_t DHT_Wrapper::getMisses() { return this->dht_miss; }
uint64_t DHT_Wrapper::getEvictions() { return this->dht_evictions; }
void DHT_Wrapper::fuzzForDHT(int var_count, void *key, double dt) {
unsigned int i = 0;
// introduce fuzzing to allow more hits in DHT
// loop over every variable of grid cell
for (i = 0; i < (unsigned int)var_count; i++) {
// check if variable is defined as 'act'
if (dht_prop_type_vector[i] == "act") {
// if log is enabled (default)
if (dht_log) {
// if variable is smaller than 0, which would be a strange result,
// warn the user and set fuzzing_buffer to 0 at this index
if (((double *)key)[i] < 0) {
cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Negative value in "
"key!"
<< endl;
fuzzing_buffer[i] = 0;
}
// if variable is 0 set fuzzing buffer to 0
else if (((double *)key)[i] == 0)
fuzzing_buffer[i] = 0;
// otherwise ...
else
// round current variable value by applying log with base 10, negate
// (since the actual values will be between 0 and 1) and cut result
// after significant digit
fuzzing_buffer[i] =
ROUND(-(std::log10(((double *)key)[i])), dht_signif_vector[i]);
}
// if log is disabled
else {
// just round by cutting after signifanct digit
fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_signif_vector[i]);
}
}
// if variable is defined as 'logact' (log was already applied e.g. pH)
else if (dht_prop_type_vector[i] == "logact") {
// just round by cutting after signifanct digit
fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_signif_vector[i]);
}
// if defined ass 'ignore' ...
else if (dht_prop_type_vector[i] == "ignore") {
// ... just set fuzzing buffer to 0
fuzzing_buffer[i] = 0;
}
// and finally, if type is not defined, print error message
else {
cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Probably wrong "
"prop_type!"
<< endl;
}
}
// if timestep differs over iterations set current current time step at the
// end of fuzzing buffer
if (dt_differ) fuzzing_buffer[var_count] = dt;
}

300
src/DHT/DHT_Wrapper.h Normal file
View File

@ -0,0 +1,300 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef DHT_WRAPPER_H
#define DHT_WRAPPER_H
#include <SimParams.h>
#include <string>
#include <vector>
extern "C" {
#include <DHT.h>
}
#include <mpi.h>
/**
* @brief Cut double value after signif digit
*
* Macro to round a double value by cutting every digit after significant digit
*
*/
#define ROUND(value, signif) \
(((int)(pow(10.0, (double)signif) * value)) * pow(10.0, (double)-signif))
namespace poet {
/**
* @brief Return user-defined md5sum
*
* This function will calculate a hashsum with the help of md5sum. Therefore the
* md5sum for a given key is calculated and divided into two 64-bit parts. These
* will be XORed and returned as the hash.
*
* @param key_size Size of key in bytes
* @param key Pointer to key
* @return uint64_t Hashsum as an unsigned 64-bit integer
*/
static uint64_t get_md5(int key_size, void *key);
/**
* @brief C++-Wrapper around DHT implementation
*
* Provides an API to interact with the current DHT implentation. This class is
* POET specific and can't be used outside the POET application.
*
*/
class DHT_Wrapper {
public:
/**
* @brief Construct a new dht wrapper object
*
* The constructor will initialize the private dht_object of this class by
* calling DHT_create with all given parameters. Also the fuzzing buffer will
* be allocated and all needed parameters extracted from simparams struct.
*
* @param params Simulation parameter object
* @param dht_comm Communicator which addresses all participating DHT
* processes
* @param buckets_per_process Count of buckets to allocate for each process
* @param data_size Size of data in bytes
* @param key_size Size of key in bytes
*/
DHT_Wrapper(SimParams &params, MPI_Comm dht_comm, int buckets_per_process,
int data_size, int key_size);
/**
* @brief Destroy the dht wrapper object
*
* By destroying this object the DHT will also be freed. Since all statistics
* are stored inside this object, no statistics will be retrieved during the
* call of DHT_free. After freeing the DHT the fuzzing buffer will be also
* freed.
*
*/
~DHT_Wrapper();
/**
* @brief Check if values of workpackage are stored in DHT
*
* Call DHT_read for all grid cells of the given workpackage and if a
* previously simulated grid cell was found mark this grid cell as 'not be
* simulated'. Therefore all values of a grid cell are fuzzed by fuzzForDHT
* and used as input key. The correspondending retrieved value might be stored
* directly into the memory area of the work_package and out_result_index is
* marked with false ('not to be simulated').
*
* @param length Count of grid cells inside work package
* @param[out] out_result_index Indexing work packages which should be
* simulated
* @param[in,out] work_package Pointer to current work package
* @param dt Current timestep of simulation
*/
void checkDHT(int length, std::vector<bool> &out_result_index,
double *work_package, double dt);
/**
* @brief Write simulated values into DHT
*
* Call DHT_write for all grid cells of the given workpackage which was
* simulated shortly before by the worker. Whether the grid cell was simulated
* is given by result_index. For every grid cell indicated with true inside
* result_index write the simulated value into the DHT.
*
* @param length Count of grid cells inside work package
* @param result_index Indexing work packages which was simulated
* @param work_package Pointer to current work package which was used as input
* of PHREEQC
* @param results Pointer to current work package which are the resulting
* outputs of the PHREEQC simulation
* @param dt Current timestep of simulation
*/
void fillDHT(int length, std::vector<bool> &result_index,
double *work_package, double *results, double dt);
/**
* @brief Dump current DHT state into file.
*
* This function will simply execute DHT_to_file with given file name (see
* DHT.h for more info).
*
* @param filename Name of the dump file
* @return int Returns 0 on success, otherwise an error value
*/
int tableToFile(const char *filename);
/**
* @brief Load dump file into DHT.
*
* This function will simply execute DHT_from_file with given file name (see
* DHT.h for more info).
*
* @param filename Name of the dump file
* @return int Returns 0 on success, otherwise an error value
*/
int fileToTable(const char *filename);
/**
* @brief Print a detailed statistic of DHT usage.
*
* This function will simply execute DHT_print_statistics with given file name
* (see DHT.h for more info).
*
*/
void printStatistics();
/**
* @brief Get the Hits object
*
* @return uint64_t Count of hits
*/
uint64_t getHits();
/**
* @brief Get the Misses object
*
* @return uint64_t Count of read misses
*/
uint64_t getMisses();
/**
* @brief Get the Evictions object
*
* @return uint64_t Count of evictions
*/
uint64_t getEvictions();
private:
/**
* @brief Transform given workpackage into DHT key
*
* A given workpackage will be transformed into a DHT key by rounding each
* value of a workpackage to a given significant digit. Three different types
* of variables 'act', 'logact' and 'ignore' are used. Those types are given
* via the dht_signif_vector.
*
* If a variable is defined as 'act', dht_log is true and non-negative, the
* logarithm with base 10 will be applied. After that the value is negated. In
* case the value is 0 the fuzzing_buffer is also set to 0 at this position.
* If the value is negative a correspondending warning will be printed to
* stderr and the fuzzing buffer will be set to 0 at this index.
*
* If a variable is defined as 'logact' the value will be cut after the
* significant digit.
*
* If a variable ist defined as 'ignore' the fuzzing_buffer will be set to 0
* at the index of the variable.
*
* If dt_differ is true the current time step of the simulation will be set at
* the end of the fuzzing_buffer.
*
* @param var_count Count of variables for the current work package
* @param key Pointer to work package handled as the key
* @param dt Current time step of the simulation
*/
void fuzzForDHT(int var_count, void *key, double dt);
/**
* @brief DHT handle
*
* Stores information about the DHT. Will be used as a handle for each DHT
* library call.
*
*/
DHT *dht_object;
/**
* @brief Count of hits
*
* The counter will be incremented if a previously simulated workpackage can
* be retrieved with a given key.
*
*/
uint64_t dht_hits = 0;
/**
* @brief Count of read misses
*
* The counter will be incremented if a given key doesn't retrieve a value
* from the DHT.
*
*/
uint64_t dht_miss = 0;
/**
* @brief Count of evictions
*
* If a value in the DHT must be evicted because of lack of space/reaching the
* last index etc., this counter will be incremented.
*
*/
uint64_t dht_evictions = 0;
/**
* @brief Rounded work package values
*
* Stores rounded work package values and serves as the DHT key pointer.
*
*/
double *fuzzing_buffer;
/**
* @brief Indicates change in time step during simulation
*
* If set to true, the time step of simulation will differ between iterations,
* so the current time step must be stored inside the DHT key. Otherwise wrong
* values would be obtained.
*
* If set to false the time step doesn't need to be stored in the DHT key.
*
*/
bool dt_differ;
/**
* @brief Logarithm before rounding
*
* Indicates if the logarithm with base 10 will be applied to a variable
* before rounding.
*
* Defaults to true.
*
*/
bool dht_log;
/**
* @brief Significant digits for each variable
*
* Stores the rounding/significant digits for each variable of the work
* package.
*
*/
std::vector<int> dht_signif_vector;
/**
* @brief Type of each variable
*
* Defines the type of each variable of the work package.
*
*/
std::vector<std::string> dht_prop_type_vector;
};
} // namespace poet
#endif // DHT_WRAPPER_H

View File

@ -1,51 +0,0 @@
## Simple Makefile for MPI use of RInside
## comment this out if you need a different version of R,
## and set R_HOME accordingly as an environment variable
R_HOME := $(shell R RHOME)
sources := $(wildcard *.cpp)
programs := kin
# OpenMPI header and libraries
MPICPPFLAGS := $(shell mpic++ -showme:compile)
MPILIBS := $(shell mpic++ -showme:link)
## include headers and libraries for R
RCPPFLAGS := $(shell $(R_HOME)/bin/R CMD config --cppflags)
RLDFLAGS := $(shell $(R_HOME)/bin/R CMD config --ldflags)
RBLAS := $(shell $(R_HOME)/bin/R CMD config BLAS_LIBS)
RLAPACK := $(shell $(R_HOME)/bin/R CMD config LAPACK_LIBS)
## include headers and libraries for Rcpp interface classes
## note that RCPPLIBS will be empty with Rcpp (>= 0.11.0) and can be omitted
RCPPINCL := $(shell echo 'Rcpp:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave)
RCPPLIBS := $(shell echo 'Rcpp:::LdFlags()' | $(R_HOME)/bin/R --vanilla --slave)
## include headers and libraries for RInside embedding classes
RINSIDEINCL := $(shell echo 'RInside:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave)
RINSIDELIBS := $(shell echo 'RInside:::LdFlags()' | $(R_HOME)/bin/R --vanilla --slave)
## compiler etc settings used in default make rules
CXX := $(shell $(R_HOME)/bin/R CMD config CXX)
CPPFLAGS := -D STRICT_R_HEADERS -Wall -ggdb -O3 $(shell $(R_HOME)/bin/R CMD config CPPFLAGS)
CXXFLAGS := $(MPICPPFLAGS) $(RCPPFLAGS) $(RCPPINCL) $(RINSIDEINCL) $(shell $(R_HOME)/bin/R CMD config CXXFLAGS)
LDLIBS := $(MPILIBS) $(RLDFLAGS) $(RBLAS) $(RLAPACK) $(RCPPLIBS) $(RINSIDELIBS) -lcrypto ## -fpermissive
default: all
kin : DHT.cpp worker.cpp dht_wrapper.cpp r_utils.cpp kin.cpp
all : $(programs)
@test -x /usr/bin/strip && strip $^
run : $(programs)
@test -x /usr/bin/mpirun && for p in $(programs); do echo; echo "Running $$p:"; mpirun -n 4 ./$$p; done
clean:
rm -vf $(programs)

View File

@ -1,175 +0,0 @@
#include "dht_wrapper.h"
#include <openssl/md5.h>
/*init globals*/
bool dht_enabled;
int dht_snaps;
int dht_strategy;
int dht_significant_digits;
std::string dht_file;
std::vector<int> dht_significant_digits_vector;
std::vector<string> prop_type_vector;
bool dht_logarithm;
uint64_t dht_size_per_process;
uint64_t dht_hits, dht_miss, dht_collision;
RInside *R_DHT;
std::vector<bool> dht_flags;
DHT *dht_object;
double *fuzzing_buffer;
bool dt_differ;
/*functions*/
uint64_t get_md5(int key_size, void *key) {
MD5_CTX ctx;
unsigned char sum[MD5_DIGEST_LENGTH];
uint64_t retval, *v1, *v2;
MD5_Init(&ctx);
MD5_Update(&ctx, key, key_size);
MD5_Final(sum, &ctx);
v1 = (uint64_t *)&sum[0];
v2 = (uint64_t *)&sum[8];
retval = *v1 ^ *v2;
return retval;
}
double Round_off(RInside &R, double N, double n) {
double result;
R["roundsig"] = n;
R["roundin"] = N;
result = R.parseEval("signif(roundin, digits=roundsig)");
return result;
}
/*
* Stores fuzzed version of key in fuzzing_buffer
*/
void fuzz_for_dht(RInside &R, int var_count, void *key, double dt) {
unsigned int i = 0;
//introduce fuzzing to allow more hits in DHT
for (i = 0; i < (unsigned int)var_count; i++) {
if (prop_type_vector[i] == "act") {
//with log10
if (dht_logarithm) {
if (((double *)key)[i] < 0)
cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Negative value at key!" << endl;
else if (((double *)key)[i] == 0)
fuzzing_buffer[i] = 0;
else
//fuzzing_buffer[i] = Round_off(R, std::log10(((double *)key)[i]), dht_significant_digits_vector[i] - 1);
fuzzing_buffer[i] = ROUND(-(std::log10(((double *)key)[i])), dht_significant_digits_vector[i]);
} else {
//without log10
//fuzzing_buffer[i] = Round_off(R, ((double *)key)[i], dht_significant_digits_vector[i]);
fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_significant_digits_vector[i]);
}
} else if (prop_type_vector[i] == "logact") {
//fuzzing_buffer[i] = Round_off(R, ((double *)key)[i], dht_significant_digits_vector[i]);
fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_significant_digits_vector[i]);
} else if (prop_type_vector[i] == "ignore") {
fuzzing_buffer[i] = 0;
} else {
cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Probably wrong prop_type!" << endl;
}
}
if (dt_differ)
fuzzing_buffer[var_count] = dt;
}
void check_dht(RInside &R, int length, std::vector<bool> &out_result_index, double *work_package) {
void *key;
int res;
int var_count = prop_type_vector.size();
double dt;
dt = R.parseEval("mysetup$dt");
for (int i = 0; i < length; i++) {
key = (void *)&(work_package[i * var_count]);
//fuzz data (round, logarithm etc.)
fuzz_for_dht(R, var_count, key, dt);
//overwrite input with data from DHT, IF value is found in DHT
res = DHT_read(dht_object, fuzzing_buffer, key);
if (res == DHT_SUCCESS) {
//flag that this line is replaced by DHT-value, do not simulate!!
out_result_index[i] = false;
dht_hits++;
} else if (res == DHT_READ_ERROR) {
//this line is untouched, simulation is needed
out_result_index[i] = true;
dht_miss++;
} else {
//MPI ERROR ... WHAT TO DO NOW?
//RUNNING CIRCLES WHILE SCREAMING
}
}
}
void fill_dht(RInside &R, int length, std::vector<bool> &result_index, double *work_package, double *results) {
void *key;
void *data;
int res;
int var_count = prop_type_vector.size();
double dt;
dt = R.parseEval("mysetup$dt");
for (int i = 0; i < length; i++) {
key = (void *)&(work_package[i * var_count]);
data = (void *)&(results[i * var_count]);
if (result_index[i]) {
//If true -> was simulated, needs to be inserted into dht
//fuzz data (round, logarithm etc.)
fuzz_for_dht(R, var_count, key, dt);
res = DHT_write(dht_object, fuzzing_buffer, data);
if (res != DHT_SUCCESS) {
if (res == DHT_WRITE_SUCCESS_WITH_COLLISION) {
dht_collision++;
} else {
//MPI ERROR ... WHAT TO DO NOW?
//RUNNING CIRCLES WHILE SCREAMING
}
}
}
}
}
void print_statistics() {
int res;
res = DHT_print_statistics(dht_object);
if (res != DHT_SUCCESS) {
//MPI ERROR ... WHAT TO DO NOW?
//RUNNING CIRCLES WHILE SCREAMING
}
}
int table_to_file(char *filename) {
int res = DHT_to_file(dht_object, filename);
return res;
}
int file_to_table(char *filename) {
int res = DHT_from_file(dht_object, filename);
if (res != DHT_SUCCESS)
return res;
DHT_print_statistics(dht_object);
return DHT_SUCCESS;
}

View File

@ -1,50 +0,0 @@
#pragma once
#include <RInside.h>
#include <string>
#include <vector>
#include <math.h>
#include "DHT.h"
using namespace std;
using namespace Rcpp;
/*Functions*/
uint64_t get_md5(int key_size, void* key);
void fuzz_for_dht(RInside &R, int var_count, void *key, double dt);
void check_dht(RInside &R, int length, std::vector<bool> &out_result_index, double *work_package);
void fill_dht(RInside &R, int length, std::vector<bool> &result_index, double *work_package, double *results);
void print_statistics();
int table_to_file(char* filename);
int file_to_table(char* filename);
/*globals*/
extern bool dht_enabled;
extern int dht_snaps;
extern std::string dht_file;
extern bool dt_differ;
//Default DHT Size per process in Byte (defaults to 1 GiB)
#define DHT_SIZE_PER_PROCESS 1073741824
//sets default dht access and distribution strategy
#define DHT_STRATEGY 0
// 0 -> DHT is on workers, access from workers only
// 1 -> DHT is on workers + master, access from master only !NOT IMPLEMENTED YET!
#define ROUND(value,signif) (((int) (pow(10.0, (double) signif) * value)) * pow(10.0, (double) -signif))
extern int dht_strategy;
extern int dht_significant_digits;
extern std::vector<int> dht_significant_digits_vector;
extern std::vector<string> prop_type_vector;
extern bool dht_logarithm;
extern uint64_t dht_size_per_process;
//global DHT object, can be NULL if not initialized, check strategy
extern DHT* dht_object;
//DHT Performance counter
extern uint64_t dht_hits, dht_miss, dht_collision;
extern double* fuzzing_buffer;
extern std::vector<bool> dht_flags;

View File

@ -1,9 +0,0 @@
#pragma once
#define BUFFER_OFFSET 5
/*Globals*/
extern double* mpi_buffer;
extern double* mpi_buffer_results;
extern uint32_t work_package_size;

View File

@ -1,818 +0,0 @@
#include <string>
#include <vector>
#include <iostream>
#include <cstring>
#include <RInside.h> // for the embedded R via RInside
#include <mpi.h> // mpi header file
#include "argh.h" // Argument handler https://github.com/adishavit/argh BSD-licenced
#include "DHT.h" // MPI-DHT Implementation
#include "worker.h"
#include "r_utils.h"
#include "dht_wrapper.h"
#include "global_buffer.h"
using namespace std;
using namespace Rcpp;
double* mpi_buffer;
double* mpi_buffer_results;
uint32_t work_package_size;
#define WORK_PACKAGE_SIZE_DEFAULT 5
bool store_result;
std::set<std::string> paramList() {
std::set<std::string> options;
//global
options.insert("work-package-size");
//only DHT
options.insert("dht-signif");
options.insert("dht-strategy");
options.insert("dht-size");
options.insert("dht-snaps");
options.insert("dht-file");
return options;
}
std::set<std::string> flagList() {
std::set<std::string> options;
//global
options.insert("ignore-result");
//only DHT
options.insert("dht");
options.insert("dht-log");
return options;
}
std::list<std::string> checkOptions(argh::parser cmdl) {
std::list<std::string> retList;
std::set<std::string> flist = flagList();
std::set<std::string> plist = paramList();
for (auto& flag: cmdl.flags()) {
if (!(flist.find(flag) != flist.end())) retList.push_back(flag);
}
for (auto& param: cmdl.params()) {
if (!(plist.find(param.first) != plist.end())) retList.push_back(param.first);
}
return retList;
}
typedef struct
{
char has_work;
double* send_addr;
} worker_struct;
int main(int argc, char *argv[]) {
double sim_start, sim_b_transport, sim_a_transport, sim_b_chemistry, sim_a_chemistry,
sim_end;
double cummul_transport = 0.f;
double cummul_chemistry = 0.f;
double cummul_workers = 0.f;
double cummul_chemistry_master = 0.f;
double cummul_master_seq_pre_loop = 0.f;
double cummul_master_seq_loop = 0.f;
double master_idle = 0.f;
double master_send_a, master_send_b;
double cummul_master_send = 0.f;
double master_recv_a, master_recv_b;
double cummul_master_recv = 0.f;
double sim_a_seq, sim_b_seq, sim_c_seq, sim_d_seq;
double idle_a, idle_b;
double sim_c_chemistry, sim_d_chemistry;
double sim_e_chemistry, sim_f_chemistry;
argh::parser cmdl(argv);
// cout << "CPP: Start Init (MPI)" << endl;
MPI_Init( &argc, &argv );
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
/*Create custom Communicator with all processes except 0 (the master) for DHT storage*/
//only needed if strategy == 0, but done anyway
MPI_Group group_world;
MPI_Group dht_group;
MPI_Comm dht_comm;
int* process_ranks;
// make a list of processes in the new communicator
process_ranks= (int*) malloc(world_size*sizeof(int));
for(int I = 1; I < world_size; I++)
process_ranks[I-1] = I;
//get the group under MPI_COMM_WORLD
MPI_Comm_group(MPI_COMM_WORLD, &group_world);
//create the new group
MPI_Group_incl(group_world, world_size-1, process_ranks, &dht_group);
// create the new communicator
MPI_Comm_create(MPI_COMM_WORLD, dht_group, &dht_comm);
free (process_ranks); //cleanup
// cout << "Done";
if (cmdl[{"help", "h"}]) {
if (world_rank == 0) {
cout << "Todo" << endl <<
"See README.md for further information." << endl;
}
MPI_Finalize();
return EXIT_SUCCESS;
}
/*INIT is now done separately in an R file provided here as argument!*/
if (!cmdl(2)) {
if (world_rank == 0) {
cerr << "ERROR. Kin needs 2 positional arguments: " << endl <<
"1) the R script defining your simulation and" << endl <<
"2) the directory prefix where to save results and profiling" << endl;
}
MPI_Finalize();
return EXIT_FAILURE;
}
std::list<std::string> optionsError = checkOptions(cmdl);
if (!optionsError.empty()) {
if (world_rank == 0) {
cerr << "Unrecognized option(s):\n" << endl;
for (auto option: optionsError) {
cerr << option << endl;
}
cerr << "\nMake sure to use available options. Exiting!" << endl;
}
MPI_Finalize();
return EXIT_FAILURE;
}
/*Parse DHT arguments*/
dht_enabled = cmdl["dht"];
// cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n';
if (dht_enabled) {
cmdl("dht-strategy", 0) >> dht_strategy;
// cout << "CPP: DHT strategy is " << dht_strategy << endl;
cmdl("dht-signif", 5) >> dht_significant_digits;
// cout << "CPP: DHT significant digits = " << dht_significant_digits << endl;
dht_logarithm = cmdl["dht-log"];
// cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" : "OFF" ) << endl;
cmdl("dht-size", DHT_SIZE_PER_PROCESS) >> dht_size_per_process;
// cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << endl;
cmdl("dht-snaps", 0) >> dht_snaps;
cmdl("dht-file") >> dht_file;
}
/*Parse work package size*/
cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> work_package_size;
/*Parse output options*/
store_result = !cmdl["ignore-result"];
if (world_rank==0) {
cout << "CPP: Complete results storage is " << ( store_result ? "ON" : "OFF" ) << endl;
cout << "CPP: Work Package Size: " << work_package_size << endl;
cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n';
if (dht_enabled) {
cout << "CPP: DHT strategy is " << dht_strategy << endl;
cout << "CPP: DHT key default digits (ignored if 'signif_vector' is defined) = " << dht_significant_digits << endl;
cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" : "OFF" ) << endl;
cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << endl;
cout << "CPP: DHT save snapshots is " << dht_snaps << endl;
cout << "CPP: DHT load file is " << dht_file << endl;
}
}
cout << "CPP: R Init (RInside) on process " << world_rank << endl;
RInside R(argc, argv);
// if local_rank == 0 then master else worker
R["local_rank"] = world_rank;
/*Loading Dependencies*/
std::string r_load_dependencies =
"suppressMessages(library(Rmufits));"
"suppressMessages(library(RedModRphree));"
"source('kin_r_library.R');"
"source('parallel_r_library.R');";
R.parseEvalQ(r_load_dependencies);
std::string filesim;
cmdl(1) >> filesim; // <- first positional argument
R["filesim"] = wrap(filesim); // assign a char* (string) to 'filesim'
R.parseEvalQ("source(filesim)"); // eval the init string, ignoring any returns
std::string out_dir;
if (world_rank == 0) { // only rank 0 initializes goes through the whole initialization
cmdl(2) >> out_dir; // <- second positional argument
R["fileout"] = wrap(out_dir); // assign a char* (string) to 'fileout'
// Note: R::sim_init() checks if the directory already exists,
// if not it makes it
// pass the boolean "store_result" to the R process
R["store_result"] = store_result;
//get timestep vector from grid_init function ...
std::string master_init_code = "mysetup <- master_init(setup=setup)";
R.parseEval(master_init_code);
dt_differ = R.parseEval("mysetup$dt_differ");
MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
} else { // workers will only read the setup DataFrame defined by input file
R.parseEval("mysetup <- setup");
MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
}
if (world_rank==0) {
cout << "CPP: R init done on process with rank " << world_rank << endl;
}
//initialize chemistry on all processes
std::string init_chemistry_code = "mysetup <- init_chemistry(setup=mysetup)";
R.parseEval(init_chemistry_code);
/* Retrieve state_C from R context for MPI buffer generation */
Rcpp::DataFrame state_C = R.parseEval("mysetup$state_C");
/* Init Parallel helper functions */
R["n_procs"] = world_size-1; /* worker count */
R["work_package_size"] = work_package_size;
// Removed additional field for ID in previous versions
if (world_rank == 0)
{
mpi_buffer = (double*) calloc(state_C.nrow() * (state_C.ncol()), sizeof(double));
} else
{
mpi_buffer = (double*) calloc((work_package_size * (state_C.ncol())) + BUFFER_OFFSET, sizeof(double));
mpi_buffer_results = (double*) calloc(work_package_size * (state_C.ncol()), sizeof(double));
}
if (world_rank==0) {
cout << "CPP: parallel init completed (buffers allocated)!" << endl;
}
// MDL: pass to R the DHT stuff (basically, only for storing of
// simulation parameters). These 2 variables are always defined:
R["dht_enabled"] = dht_enabled;
R["dht_log"] = dht_logarithm;
if (dht_enabled)
{
//cout << "\nCreating DHT\n";
//determine size of dht entries
int dht_data_size = state_C.ncol() * sizeof(double);
int dht_key_size = state_C.ncol() * sizeof(double) + (dt_differ * sizeof(double));
//determine bucket count for preset memory usage
//bucket size is key + value + 1 byte for status
int dht_buckets_per_process = dht_size_per_process / (1 + dht_data_size + dht_key_size);
// MDL : following code moved here from worker.cpp
/*Load significance vector from R setup file (or set default)*/
bool signif_vector_exists = R.parseEval("exists('signif_vector')");
if (signif_vector_exists)
{
dht_significant_digits_vector = as<std::vector<int>>(R["signif_vector"]);
} else
{
dht_significant_digits_vector.assign(dht_object->key_size / sizeof(double), dht_significant_digits);
}
/*Load property type vector from R setup file (or set default)*/
bool prop_type_vector_exists = R.parseEval("exists('prop_type')");
if (prop_type_vector_exists)
{
prop_type_vector = as<std::vector<string>>(R["prop_type"]);
} else
{
prop_type_vector.assign(dht_object->key_size / sizeof(double), "act");
}
if(world_rank == 0)
{
//print only on master, values are equal on all workes
cout << "CPP: dht_data_size: " << dht_data_size << "\n";
cout << "CPP: dht_key_size: " << dht_key_size << "\n";
cout << "CPP: dht_buckets_per_process: " << dht_buckets_per_process << endl;
// MDL: new output on signif_vector and prop_type
if (signif_vector_exists) {
cout << "CPP: using problem-specific rounding digits: " << endl;
R.parseEval("print(data.frame(prop=prop, type=prop_type, digits=signif_vector))");
} else
{
cout << "CPP: using DHT default rounding digits = " << dht_significant_digits << endl;
}
// MDL: pass to R the DHT stuff. These variables exist
// only if dht_enabled is true
R["dht_final_signif"] = dht_significant_digits_vector;
R["dht_final_proptype"] = prop_type_vector;
}
if (dht_strategy == 0)
{
if(world_rank != 0) {
dht_object = DHT_create(dht_comm, dht_buckets_per_process, dht_data_size, dht_key_size, get_md5);
//storing for access from worker and callback functions
fuzzing_buffer = (double *) malloc (dht_key_size);
}
} else {
dht_object = DHT_create(MPI_COMM_WORLD, dht_buckets_per_process, dht_data_size, dht_key_size, get_md5);
}
if (world_rank==0) {
cout << "CPP: DHT successfully created!" << endl;
}
}
// MDL: store all parameters
if (world_rank==0) {
cout << "CPP: Calling R Function to store calling parameters" << endl;
R.parseEvalQ("StoreSetup(setup=mysetup)");
}
MPI_Barrier(MPI_COMM_WORLD);
if (world_rank == 0)
{ /* This is executed by the master */
Rcpp::NumericVector master_send;
Rcpp::NumericVector master_recv;
sim_a_seq = MPI_Wtime();
worker_struct* workerlist = (worker_struct*) calloc(world_size-1, sizeof(worker_struct));
int need_to_receive;
MPI_Status probe_status;
double* timings;
uint64_t* dht_perfs = NULL;
int local_work_package_size;
// a temporary send buffer
double* send_buffer;
send_buffer = (double*) calloc((work_package_size * (state_C.ncol() )) + BUFFER_OFFSET, sizeof(double));
// helper variables
int iteration;
double dt, current_sim_time;
int n_wp = 1; // holds the actual number of wp which is
// computed later in R::distribute_work_packages()
std::vector<int> wp_sizes_vector; // vector with the sizes of
// each package
sim_start = MPI_Wtime();
//Iteration Count is dynamic, retrieving value from R (is only needed by master for the following loop)
uint32_t maxiter = R.parseEval("mysetup$maxiter");
sim_b_seq = MPI_Wtime();
cummul_master_seq_pre_loop += sim_b_seq - sim_a_seq;
/*SIMULATION LOOP*/
for(uint32_t iter = 1; iter < maxiter+1; iter++)
{
sim_a_seq = MPI_Wtime();
cummul_master_send = 0.f;
cummul_master_recv = 0.f;
cout << "CPP: Evaluating next time step" << endl;
R.parseEvalQ("mysetup <- master_iteration_setup(mysetup)");
/*displaying iteration number, with C++ and R iterator*/
cout << "CPP: Going through iteration " << iter << endl;
cout << "CPP: R's $iter: "<< ((uint32_t) (R.parseEval("mysetup$iter"))) <<". Iteration" << endl;
cout << "CPP: Calling Advection" << endl;
sim_b_transport = MPI_Wtime();
R.parseEvalQ("mysetup <- master_advection(setup=mysetup)");
sim_a_transport = MPI_Wtime();
cout << "CPP: Chemistry" << endl;
/*Fallback for sequential execution*/
sim_b_chemistry = MPI_Wtime();
if(world_size == 1)
{
// MDL : the transformation of values into pH and pe
// takes now place in master_advection() so the
// following line is unneeded
// R.parseEvalQ("mysetup$state_T <- RedModRphree::Act2pH(mysetup$state_T)");
R.parseEvalQ("result <- slave_chemistry(setup=mysetup, data=mysetup$state_T)");
R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
} else { /*send work to workers*/
// NEW: only in the first iteration we call
// R::distribute_work_packages()!!
if (iter==1)
{
R.parseEvalQ("wp_ids <- distribute_work_packages(len=nrow(mysetup$state_T), package_size=work_package_size)");
// we only sort once the vector
R.parseEvalQ("ordered_ids <- order(wp_ids)");
R.parseEvalQ("wp_sizes_vector <- compute_wp_sizes(wp_ids)");
n_wp = (int) R.parseEval("length(wp_sizes_vector)");
wp_sizes_vector = as<std::vector<int>>(R["wp_sizes_vector"]);
cout << "CPP: Total number of work packages: " << n_wp << endl;
R.parseEval("stat_wp_sizes(wp_sizes_vector)");
}
/* shuffle and extract data
MDL: we now apply :Act2pH directly in master_advection
*/
// R.parseEval("tmp <- shuffle_field(RedModRphree::Act2pH(mysetup$state_T), ordered_ids)");
R.parseEval("tmp <- shuffle_field(mysetup$state_T, ordered_ids)");
Rcpp::DataFrame chemistry_data = R.parseEval("tmp");
convert_R_Dataframe_2_C_buffer(mpi_buffer, chemistry_data);
// cout << "CPP: shuffle_field() done" << endl;
/* send and receive work; this is done by counting
* the wp */
int pkg_to_send = n_wp;
int pkg_to_recv = n_wp;
size_t colCount = chemistry_data.ncol();
int free_workers = world_size-1;
double* work_pointer = mpi_buffer;
sim_c_chemistry = MPI_Wtime();
/* visual progress */
float progress = 0.0;
int barWidth = 70;
//retrieve data from R runtime
iteration = (int) R.parseEval("mysetup$iter");
dt = (double) R.parseEval("mysetup$requested_dt");
current_sim_time = (double) R.parseEval("mysetup$simulation_time-mysetup$requested_dt");
int count_pkgs = 0;
sim_b_seq = MPI_Wtime();
sim_c_chemistry = MPI_Wtime();
while (pkg_to_recv > 0) // start dispatching work packages
{
/* visual progress */
progress = (float) (count_pkgs+1)/n_wp;
cout << "[";
int pos = barWidth * progress;
for (int iprog = 0; iprog < barWidth; ++iprog) {
if (iprog < pos)
cout << "=";
else if (iprog == pos)
cout << ">";
else
cout << " ";
}
std::cout << "] " << int(progress * 100.0) << " %\r";
std::cout.flush();
/* end visual progress */
if (pkg_to_send > 0) {
master_send_a = MPI_Wtime();
/*search for free workers and send work*/
for (int p = 0; p < world_size-1; p++) {
if (workerlist[p].has_work == 0 && pkg_to_send > 0) /* worker is free */ {
// to enable different work_package_size, set local copy of work_package_size to
// either global work_package size or remaining 'to_send' packages
// to_send >= work_package_size ? local_work_package_size = work_package_size : local_work_package_size = to_send;
local_work_package_size = (int) wp_sizes_vector[count_pkgs];
count_pkgs++;
// cout << "CPP: sending pkg n. " << count_pkgs << " with size " << local_work_package_size << endl;
/*push pointer forward to next work package, after taking the current one*/
workerlist[p].send_addr = work_pointer;
int end_of_wp = local_work_package_size * colCount;
work_pointer = &(work_pointer[end_of_wp]);
// fill send buffer starting with work_package ...
std::memcpy(send_buffer, workerlist[p].send_addr, (end_of_wp) * sizeof(double));
// followed by: work_package_size
send_buffer[end_of_wp] = (double) local_work_package_size;
// current iteration of simulation
send_buffer[end_of_wp + 1] = (double) iteration;
// size of timestep in seconds
send_buffer[end_of_wp + 2] = dt;
// current time of simulation (age) in seconds
send_buffer[end_of_wp + 3] = current_sim_time;
// placeholder for work_package_count
send_buffer[end_of_wp + 4] = 0.;
/* ATTENTION Worker p has rank p+1 */
MPI_Send(send_buffer, end_of_wp + BUFFER_OFFSET, MPI_DOUBLE, p+1, TAG_WORK, MPI_COMM_WORLD);
workerlist[p].has_work = 1;
free_workers--;
pkg_to_send -= 1;
}
}
master_send_b = MPI_Wtime();
cummul_master_send += master_send_b - master_send_a;
}
/*check if there are results to receive and receive them*/
need_to_receive = 1;
master_recv_a = MPI_Wtime();
while(need_to_receive && pkg_to_recv > 0)
{
if (pkg_to_send > 0 && free_workers > 0)
MPI_Iprobe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &need_to_receive, &probe_status);
else {
idle_a = MPI_Wtime();
MPI_Probe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &probe_status);
idle_b = MPI_Wtime();
master_idle += idle_b - idle_a;
}
if(need_to_receive)
{
int p = probe_status.MPI_SOURCE;
int size;
MPI_Get_count(&probe_status, MPI_DOUBLE, &size);
MPI_Recv(workerlist[p-1].send_addr, size, MPI_DOUBLE, p, TAG_WORK, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
workerlist[p-1].has_work = 0;
pkg_to_recv -= 1;
free_workers++;
}
}
master_recv_b = MPI_Wtime();
cummul_master_recv += master_recv_b - master_recv_a;
}
sim_c_seq = MPI_Wtime();
// don't overwrite last progress
cout << endl;
sim_d_chemistry = MPI_Wtime();
cummul_workers += sim_d_chemistry - sim_c_chemistry;
convert_C_buffer_2_R_Dataframe(mpi_buffer, chemistry_data);
R["chemistry_data"] = chemistry_data;
/* unshuffle results */
R.parseEval("result <- unshuffle_field(chemistry_data, ordered_ids)");
/* do master stuff */
sim_e_chemistry = MPI_Wtime();
R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
sim_f_chemistry = MPI_Wtime();
cummul_chemistry_master += sim_f_chemistry - sim_e_chemistry;
}
sim_a_chemistry = MPI_Wtime();
// MDL master_iteration_end just writes on disk state_T and
// state_C after every iteration if the cmdline option
// --ignore-results is not given (and thus the R variable
// store_result is TRUE)
R.parseEvalQ("mysetup <- master_iteration_end(setup=mysetup)");
cummul_transport += sim_a_transport - sim_b_transport;
cummul_chemistry += sim_a_chemistry - sim_b_chemistry;
cout << endl << "CPP: End of *coupling* iteration "<< iter <<"/" << maxiter << endl << endl;
if (dht_enabled) {
for (int i=1; i < world_size; i++) {
MPI_Send(NULL, 0, MPI_DOUBLE, i, TAG_DHT_STATS, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
if (dht_snaps == 2) {
std::stringstream outfile;
outfile << out_dir << "/iter_" << std::setfill('0') << std::setw(3) << iter << ".dht";
for (int i=1; i < world_size; i++) {
MPI_Send(outfile.str().c_str(), outfile.str().size(), MPI_CHAR, i, TAG_DHT_STORE, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
sim_d_seq = MPI_Wtime();
cummul_master_seq_loop += ((sim_b_seq - sim_a_seq) - (sim_a_transport - sim_b_transport)) + (sim_d_seq - sim_c_seq);
master_send.push_back(cummul_master_send, "it_" + to_string(iter));
master_recv.push_back(cummul_master_recv, "it_" + to_string(iter));
} // END SIMULATION LOOP
sim_end = MPI_Wtime();
if (dht_enabled && dht_snaps > 0) {
cout << "CPP: Master: Instruct workers to write DHT to file ..." << endl;
std::string outfile;
outfile = out_dir + ".dht";
for (int i=1; i < world_size; i++) {
MPI_Send(outfile.c_str(), outfile.size(), MPI_CHAR, i, TAG_DHT_STORE, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
cout << "CPP: Master: ... done" << endl;
}
Rcpp::NumericVector phreeqc_time;
Rcpp::NumericVector dht_get_time;
Rcpp::NumericVector dht_fill_time;
Rcpp::IntegerVector phreeqc_counts;
Rcpp::NumericVector idle_worker;
int phreeqc_tmp;
timings = (double*) calloc(3, sizeof(double));
if (dht_enabled) {
dht_hits = 0;
dht_miss = 0;
dht_collision = 0;
dht_perfs = (uint64_t*) calloc(3, sizeof(uint64_t));
}
double idle_worker_tmp;
for (int p = 0; p < world_size-1; p++)
{
/* ATTENTION Worker p has rank p+1 */
/* Send termination message to worker */
MPI_Send(NULL, 0, MPI_DOUBLE, p+1, TAG_FINISH, MPI_COMM_WORLD);
MPI_Recv(timings, 3, MPI_DOUBLE, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
phreeqc_time.push_back(timings[0], "w" + to_string(p+1));
MPI_Recv(&phreeqc_tmp, 1, MPI_INT, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
phreeqc_counts.push_back(phreeqc_tmp, "w" + to_string(p+1));
MPI_Recv(&idle_worker_tmp, 1, MPI_DOUBLE, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
idle_worker.push_back(idle_worker_tmp, "w" + to_string(p+1));
if (dht_enabled)
{
dht_get_time.push_back(timings[1], "w" + to_string(p+1));
dht_fill_time.push_back(timings[2], "w" + to_string(p+1));
MPI_Recv(dht_perfs, 3, MPI_UNSIGNED_LONG_LONG, p+1, TAG_DHT_PERF, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
dht_hits += dht_perfs[0];
dht_miss += dht_perfs[1];
dht_collision += dht_perfs[2];
}
}
R.parseEvalQ("profiling <- list()");
R["simtime"] = sim_end - sim_start;
R.parseEvalQ("profiling$simtime <- simtime");
R["simtime_transport"] = cummul_transport;
R.parseEvalQ("profiling$simtime_transport <- simtime_transport");
R["simtime_chemistry"] = cummul_chemistry;
R.parseEvalQ("profiling$simtime_chemistry <- simtime_chemistry");
R["simtime_workers"] = cummul_workers;
R.parseEvalQ("profiling$simtime_workers <- simtime_workers");
R["simtime_chemistry_master"] = cummul_chemistry_master;
R.parseEvalQ("profiling$simtime_chemistry_master <- simtime_chemistry_master");
R["seq_master_prep"] = cummul_master_seq_pre_loop;
R.parseEvalQ("profiling$seq_master_prep <- seq_master_prep");
R["seq_master_loop"] = cummul_master_seq_loop;
R.parseEvalQ("profiling$seq_master_loop <- seq_master_loop");
// R["master_send"] = master_send;
// R.parseEvalQ("profiling$master_send <- master_send");
// R["master_recv"] = master_recv;
// R.parseEvalQ("profiling$master_recv <- master_recv");
R["idle_master"] = master_idle;
R.parseEvalQ("profiling$idle_master <- idle_master");
R["idle_worker"] = idle_worker;
R.parseEvalQ("profiling$idle_worker <- idle_worker");
R["phreeqc_time"] = phreeqc_time;
R.parseEvalQ("profiling$phreeqc <- phreeqc_time");
R["phreeqc_count"] = phreeqc_counts;
R.parseEvalQ("profiling$phreeqc_count <- phreeqc_count");
if (dht_enabled)
{
R["dht_hits"] = dht_hits;
R.parseEvalQ("profiling$dht_hits <- dht_hits");
R["dht_miss"] = dht_miss;
R.parseEvalQ("profiling$dht_miss <- dht_miss");
R["dht_collision"] = dht_collision;
R.parseEvalQ("profiling$dht_collisions <- dht_collision");
R["dht_get_time"] = dht_get_time;
R.parseEvalQ("profiling$dht_get_time <- dht_get_time");
R["dht_fill_time"] = dht_fill_time;
R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time");
}
free(workerlist);
free(timings);
if (dht_enabled)
free(dht_perfs);
cout << "CPP: Done! Results are stored as R objects into <" << out_dir << "/timings.rds>" << endl;
/*exporting results and profiling data*/
std::string r_vis_code;
r_vis_code = "saveRDS(profiling, file=paste0(fileout,'/timings.rds'));";
R.parseEval(r_vis_code);
} else { /*This is executed by the workers*/
if (!dht_file.empty()) {
int res = file_to_table((char *) dht_file.c_str());
if (res != DHT_SUCCESS) {
if (res == DHT_WRONG_FILE) {
if (world_rank == 2) cerr << "CPP: Worker: Wrong File" << endl;
} else {
if (world_rank == 2) cerr << "CPP: Worker: Error in loading current state of DHT from file" << endl;
}
return EXIT_FAILURE;
} else {
if (world_rank == 2) cout << "CPP: Worker: Successfully loaded state of DHT from file " << dht_file << endl;
std::cout.flush();
}
}
worker_function(R);
free(mpi_buffer_results);
}
cout << "CPP: finished, cleanup of process " << world_rank << endl;
if (dht_enabled)
{
if (dht_strategy == 0)
{
if(world_rank != 0) {
DHT_free(dht_object, NULL, NULL);
}
} else {
DHT_free(dht_object, NULL, NULL);
}
}
free(mpi_buffer);
MPI_Finalize();
if (world_rank==0) {
cout << "CPP: done, bye!" << endl;
}
exit(0);
}

4
src/model/CMakeLists.txt Normal file
View File

@ -0,0 +1,4 @@
add_library(POET_Model ChemSim.cpp ChemSim.h ChemMaster.cpp ChemWorker.cpp TransportSim.cpp Grid.cpp)
target_include_directories(POET_Model PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${MPI_C_INCLUDE_DIRS})
target_link_libraries(POET_Model PRIVATE POET_Util MPI::MPI_C PUBLIC DHT_Wrapper)
target_compile_definitions(POET_Model PRIVATE OMPI_SKIP_MPICXX)

380
src/model/ChemMaster.cpp Normal file
View File

@ -0,0 +1,380 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <Rcpp.h>
#include <iostream>
#include "ChemSim.h"
using namespace poet;
using namespace std;
using namespace Rcpp;
ChemMaster::ChemMaster(SimParams &params, RRuntime &R_, Grid &grid_)
: ChemSim(params, R_, grid_) {
t_simparams tmp = params.getNumParams();
this->wp_size = tmp.wp_size;
this->dht_enabled = tmp.dht_enabled;
this->out_dir = params.getOutDir();
/* allocate memory */
workerlist = (worker_struct *)calloc(world_size - 1, sizeof(worker_struct));
send_buffer = (double *)calloc((wp_size * (grid.getCols())) + BUFFER_OFFSET,
sizeof(double));
mpi_buffer =
(double *)calloc(grid.getRows() * grid.getCols(), sizeof(double));
/* calculate distribution of work packages */
R.parseEvalQ(
"wp_ids <- distribute_work_packages(len=nrow(mysetup$state_C), "
"package_size=work_package_size)");
// we only sort once the vector
R.parseEvalQ("ordered_ids <- order(wp_ids)");
R.parseEvalQ("wp_sizes_vector <- compute_wp_sizes(wp_ids)");
R.parseEval("stat_wp_sizes(wp_sizes_vector)");
wp_sizes_vector = as<std::vector<int>>(R["wp_sizes_vector"]);
}
ChemMaster::~ChemMaster() {
free(mpi_buffer);
free(workerlist);
}
void ChemMaster::run() {
/* declare most of the needed variables here */
double chem_a, chem_b;
double seq_a, seq_b, seq_c, seq_d;
double worker_chemistry_a, worker_chemistry_b;
double sim_e_chemistry, sim_f_chemistry;
int pkg_to_send, pkg_to_recv;
int free_workers;
int i_pkgs;
/* start time measurement of whole chemistry simulation */
chem_a = MPI_Wtime();
/* start time measurement of sequential part */
seq_a = MPI_Wtime();
/* shuffle grid */
grid.shuffleAndExport(mpi_buffer);
/* retrieve needed data from R runtime */
iteration = (int)R.parseEval("mysetup$iter");
dt = (double)R.parseEval("mysetup$requested_dt");
current_sim_time =
(double)R.parseEval("mysetup$simulation_time-mysetup$requested_dt");
/* setup local variables */
pkg_to_send = wp_sizes_vector.size();
pkg_to_recv = wp_sizes_vector.size();
work_pointer = mpi_buffer;
free_workers = world_size - 1;
i_pkgs = 0;
/* end time measurement of sequential part */
seq_b = MPI_Wtime();
seq_t += seq_b - seq_a;
/* start time measurement of chemistry time needed for send/recv loop */
worker_chemistry_a = MPI_Wtime();
/* start send/recv loop */
// while there are still packages to recv
while (pkg_to_recv > 0) {
// print a progressbar to stdout
printProgressbar((int)i_pkgs, (int)wp_sizes_vector.size());
// while there are still packages to send
if (pkg_to_send > 0) {
// send packages to all free workers ...
sendPkgs(pkg_to_send, i_pkgs, free_workers);
}
// ... and try to receive them from workers who has finished their work
recvPkgs(pkg_to_recv, pkg_to_send > 0, free_workers);
}
// Just to complete the progressbar
cout << endl;
/* stop time measurement of chemistry time needed for send/recv loop */
worker_chemistry_b = MPI_Wtime();
worker_t = worker_chemistry_b - worker_chemistry_a;
/* start time measurement of sequential part */
seq_c = MPI_Wtime();
/* unshuffle grid */
grid.importAndUnshuffle(mpi_buffer);
/* do master stuff */
/* start time measurement of master chemistry */
sim_e_chemistry = MPI_Wtime();
R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
/* end time measurement of master chemistry */
sim_f_chemistry = MPI_Wtime();
chem_master += sim_f_chemistry - sim_e_chemistry;
/* end time measurement of sequential part */
seq_d = MPI_Wtime();
seq_t += seq_d - seq_c;
/* end time measurement of whole chemistry simulation */
chem_b = MPI_Wtime();
chem_t += chem_b - chem_a;
/* advise workers to end chemistry iteration */
for (int i = 1; i < world_size; i++) {
MPI_Send(NULL, 0, MPI_DOUBLE, i, TAG_DHT_ITER, MPI_COMM_WORLD);
}
}
void ChemMaster::sendPkgs(int &pkg_to_send, int &count_pkgs,
int &free_workers) {
/* declare variables */
double master_send_a, master_send_b;
int local_work_package_size;
int end_of_wp;
/* start time measurement */
master_send_a = MPI_Wtime();
/* search for free workers and send work */
for (int p = 0; p < world_size - 1; p++) {
if (workerlist[p].has_work == 0 && pkg_to_send > 0) /* worker is free */ {
/* to enable different work_package_size, set local copy of
* work_package_size to pre-calculated work package size vector */
local_work_package_size = (int)wp_sizes_vector[count_pkgs];
count_pkgs++;
/* note current processed work package in workerlist */
workerlist[p].send_addr = work_pointer;
/* push work pointer to next work package */
end_of_wp = local_work_package_size * grid.getCols();
work_pointer = &(work_pointer[end_of_wp]);
// fill send buffer starting with work_package ...
std::memcpy(send_buffer, workerlist[p].send_addr,
(end_of_wp) * sizeof(double));
// followed by: work_package_size
send_buffer[end_of_wp] = (double)local_work_package_size;
// current iteration of simulation
send_buffer[end_of_wp + 1] = (double)iteration;
// size of timestep in seconds
send_buffer[end_of_wp + 2] = dt;
// current time of simulation (age) in seconds
send_buffer[end_of_wp + 3] = current_sim_time;
// placeholder for work_package_count
send_buffer[end_of_wp + 4] = 0.;
/* ATTENTION Worker p has rank p+1 */
MPI_Send(send_buffer, end_of_wp + BUFFER_OFFSET, MPI_DOUBLE, p + 1,
TAG_WORK, MPI_COMM_WORLD);
/* Mark that worker has work to do */
workerlist[p].has_work = 1;
free_workers--;
pkg_to_send -= 1;
}
}
master_send_b = MPI_Wtime();
send_t += master_send_b - master_send_a;
}
void ChemMaster::recvPkgs(int &pkg_to_recv, bool to_send, int &free_workers) {
/* declare most of the variables here */
int need_to_receive = 1;
double master_recv_a, master_recv_b;
double idle_a, idle_b;
int p, size;
MPI_Status probe_status;
master_recv_a = MPI_Wtime();
/* start to loop as long there are packages to recv and the need to receive
*/
while (need_to_receive && pkg_to_recv > 0) {
// only of there are still packages to send and free workers are available
if (to_send && free_workers > 0)
// non blocking probing
MPI_Iprobe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &need_to_receive,
&probe_status);
else {
idle_a = MPI_Wtime();
// blocking probing
MPI_Probe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &probe_status);
idle_b = MPI_Wtime();
master_idle += idle_b - idle_a;
}
/* if need_to_receive was set to true above, so there is a message to
* receive */
if (need_to_receive) {
p = probe_status.MPI_SOURCE;
MPI_Get_count(&probe_status, MPI_DOUBLE, &size);
MPI_Recv(workerlist[p - 1].send_addr, size, MPI_DOUBLE, p, TAG_WORK,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
workerlist[p - 1].has_work = 0;
pkg_to_recv -= 1;
free_workers++;
}
}
master_recv_b = MPI_Wtime();
recv_t += master_recv_b - master_recv_a;
}
void ChemMaster::printProgressbar(int count_pkgs, int n_wp, int barWidth) {
/* visual progress */
double progress = (float)(count_pkgs + 1) / n_wp;
cout << "[";
int pos = barWidth * progress;
for (int iprog = 0; iprog < barWidth; ++iprog) {
if (iprog < pos)
cout << "=";
else if (iprog == pos)
cout << ">";
else
cout << " ";
}
std::cout << "] " << int(progress * 100.0) << " %\r";
std::cout.flush();
/* end visual progress */
}
void ChemMaster::end() {
/* call end() from base class */
ChemSim::end();
/* now we get to the part of the master */
double *timings;
int *dht_perfs;
Rcpp::NumericVector phreeqc_time;
Rcpp::NumericVector dht_get_time;
Rcpp::NumericVector dht_fill_time;
Rcpp::IntegerVector phreeqc_counts;
Rcpp::NumericVector idle_worker;
int phreeqc_tmp;
timings = (double *)calloc(3, sizeof(double));
int dht_hits = 0;
int dht_miss = 0;
int dht_evictions = 0;
if (dht_enabled) {
dht_perfs = (int *)calloc(3, sizeof(int));
}
double idle_worker_tmp;
/* loop over all workers *
* ATTENTION Worker p has rank p+1 */
for (int p = 0; p < world_size - 1; p++) {
/* Send termination message to worker */
MPI_Send(NULL, 0, MPI_DOUBLE, p + 1, TAG_FINISH, MPI_COMM_WORLD);
/* ... and receive all timings and metrics from each worker */
MPI_Recv(timings, 3, MPI_DOUBLE, p + 1, TAG_TIMING, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
phreeqc_time.push_back(timings[0], "w" + to_string(p + 1));
MPI_Recv(&phreeqc_tmp, 1, MPI_INT, p + 1, TAG_TIMING, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
phreeqc_counts.push_back(phreeqc_tmp, "w" + to_string(p + 1));
MPI_Recv(&idle_worker_tmp, 1, MPI_DOUBLE, p + 1, TAG_TIMING, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
idle_worker.push_back(idle_worker_tmp, "w" + to_string(p + 1));
if (dht_enabled) {
dht_get_time.push_back(timings[1], "w" + to_string(p + 1));
dht_fill_time.push_back(timings[2], "w" + to_string(p + 1));
MPI_Recv(dht_perfs, 3, MPI_INT, p + 1, TAG_DHT_PERF, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
dht_hits += dht_perfs[0];
dht_miss += dht_perfs[1];
dht_evictions += dht_perfs[2];
}
}
/* distribute all data to the R runtime */
R["simtime_chemistry"] = chem_t;
R.parseEvalQ("profiling$simtime_chemistry <- simtime_chemistry");
R["simtime_workers"] = worker_t;
R.parseEvalQ("profiling$simtime_workers <- simtime_workers");
R["simtime_chemistry_master"] = chem_master;
R.parseEvalQ(
"profiling$simtime_chemistry_master <- simtime_chemistry_master");
R["seq_master"] = seq_t;
R.parseEvalQ("profiling$seq_master <- seq_master");
R["idle_master"] = master_idle;
R.parseEvalQ("profiling$idle_master <- idle_master");
R["idle_worker"] = idle_worker;
R.parseEvalQ("profiling$idle_worker <- idle_worker");
R["phreeqc_time"] = phreeqc_time;
R.parseEvalQ("profiling$phreeqc <- phreeqc_time");
R["phreeqc_count"] = phreeqc_counts;
R.parseEvalQ("profiling$phreeqc_count <- phreeqc_count");
if (dht_enabled) {
R["dht_hits"] = dht_hits;
R.parseEvalQ("profiling$dht_hits <- dht_hits");
R["dht_miss"] = dht_miss;
R.parseEvalQ("profiling$dht_miss <- dht_miss");
R["dht_evictions"] = dht_evictions;
R.parseEvalQ("profiling$dht_evictions <- dht_evictions");
R["dht_get_time"] = dht_get_time;
R.parseEvalQ("profiling$dht_get_time <- dht_get_time");
R["dht_fill_time"] = dht_fill_time;
R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time");
}
/* do some cleanup */
free(timings);
if (dht_enabled) free(dht_perfs);
}
double ChemMaster::getSendTime() { return this->send_t; }
double ChemMaster::getRecvTime() { return this->recv_t; }
double ChemMaster::getIdleTime() { return this->master_idle; }
double ChemMaster::getWorkerTime() { return this->worker_t; }
double ChemMaster::getChemMasterTime() { return this->chem_master; }
double ChemMaster::getSeqTime() { return this->seq_t; }

60
src/model/ChemSim.cpp Normal file
View File

@ -0,0 +1,60 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "ChemSim.h"
#include <Rcpp.h>
#include <iostream>
using namespace Rcpp;
using namespace poet;
ChemSim::ChemSim(SimParams &params, RRuntime &R_, Grid &grid_)
: R(R_), grid(grid_) {
t_simparams tmp = params.getNumParams();
this->world_rank = tmp.world_rank;
this->world_size = tmp.world_size;
this->wp_size = tmp.wp_size;
this->out_dir = params.getOutDir();
}
void ChemSim::run() {
double chem_a, chem_b;
/* start time measuring */
chem_a = MPI_Wtime();
R.parseEvalQ(
"result <- slave_chemistry(setup=mysetup, data=mysetup$state_T)");
R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
/* end time measuring */
chem_b = MPI_Wtime();
chem_t += chem_b - chem_a;
}
void ChemSim::end() {
R["simtime_chemistry"] = chem_t;
R.parseEvalQ("profiling$simtime_chemistry <- simtime_chemistry");
}
double ChemSim::getChemistryTime() { return this->chem_t; }

573
src/model/ChemSim.h Normal file
View File

@ -0,0 +1,573 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef CHEMSIM_H
#define CHEMSIM_H
#include <DHT_Wrapper.h>
#include <RRuntime.h>
#include <SimParams.h>
#include <mpi.h>
#include <vector>
#include "Grid.h"
/** Number of data elements that are kept free at each work package */
#define BUFFER_OFFSET 5
/** Message tag indicating work */
#define TAG_WORK 42
/** Message tag indicating to finish loop */
#define TAG_FINISH 43
/** Message tag indicating timing profiling */
#define TAG_TIMING 44
/** Message tag indicating collecting DHT performance */
#define TAG_DHT_PERF 45
/** Message tag indicating simulation reached the end of an itertation */
#define TAG_DHT_ITER 47
namespace poet {
/**
* @brief Base class of the chemical simulation
*
* Providing member functions to run an iteration and to end a simulation. Also
* containing basic parameters for simulation.
*
*/
class ChemSim {
public:
/**
* @brief Construct a new ChemSim object
*
* Creating a new instance of class ChemSim will just extract simulation
* parameters from SimParams object.
*
* @param params SimParams object
* @param R_ R runtime
* @param grid_ Initialized grid
*/
ChemSim(SimParams &params, RRuntime &R_, Grid &grid_);
/**
* @brief Run iteration of simulation in sequential mode
*
* This will call the correspondending R function slave_chemistry, followed by
* the execution of master_chemistry.
*
* @todo change function name. Maybe 'slave' to 'seq'.
*
*/
virtual void run();
/**
* @brief End simulation
*
* End the simulation by distribute the measured runtime of simulation to the
* R runtime.
*
*/
virtual void end();
/**
* @brief Get the Chemistry Time
*
* @return double Runtime of sequential chemistry simulation in seconds
*/
double getChemistryTime();
protected:
/**
* @brief Current simulation time or 'age' of simulation
*
*/
double current_sim_time = 0;
/**
* @brief Current iteration
*
*/
int iteration = 0;
/**
* @brief Current simulation timestep
*
*/
int dt = 0;
/**
* @brief Rank of process in MPI_COMM_WORLD
*
*/
int world_rank;
/**
* @brief Size of communicator MPI_COMM_WORLD
*
*/
int world_size;
/**
* @brief Number of grid cells in each work package
*
*/
unsigned int wp_size;
/**
* @brief Instance of RRuntime object
*
*/
RRuntime &R;
/**
* @brief Initialized grid object
*
*/
Grid &grid;
/**
* @brief Stores information about size of the current work package
*
*/
std::vector<int> wp_sizes_vector;
/**
* @brief Absolute path to output path
*
*/
std::string out_dir;
/**
* @brief Pointer to sending buffer
*
*/
double *send_buffer;
/**
* @brief Worker struct
*
* This struct contains information which worker as work and which work
* package he is working on.
*
*/
typedef struct {
char has_work;
double *send_addr;
} worker_struct;
/**
* @brief Pointer to worker_struct
*
*/
worker_struct *workerlist;
/**
* @brief Pointer to mpi_buffer
*
* Typically for the master this is a continous C memory area containing the
* grid. For worker the memory area will just have the size of a work package.
*
*/
double *mpi_buffer;
/**
* @brief Total chemistry runtime
*
*/
double chem_t = 0.f;
};
/**
* @brief Class providing execution of master chemistry
*
* Providing member functions to run an iteration and to end a simulation. Also
* a loop to send and recv pkgs from workers is implemented.
*
*/
class ChemMaster : public ChemSim {
public:
/**
* @brief Construct a new ChemMaster object
*
* The following steps are executed to create a new object of ChemMaster:
* -# all needed simulation parameters are extracted
* -# memory is allocated
* -# distribution of work packages is calculated
*
* @param params Simulation parameters as SimParams object
* @param R_ R runtime
* @param grid_ Grid object
*/
ChemMaster(SimParams &params, RRuntime &R_, Grid &grid_);
/**
* @brief Destroy the ChemMaster object
*
* By freeing ChemMaster all buffers allocated in the Constructor are freed.
*
*/
~ChemMaster();
/**
* @brief Run iteration of simulation in parallel mode
*
* To run the chemistry simulation parallel following steps are done:
*
* -# 'Shuffle' the grid by previously calculated distribution of work
* packages. Convert R grid to C memory area.
* -# Start the send/recv loop.
* Detailed description in sendPkgs respectively in recvPkgs.
* -# 'Unshuffle'
* the grid and convert C memory area to R grid.
* -# Run 'master_chemistry'
*
* The main tasks are instrumented with time measurements.
*
*/
void run() override;
/**
* @brief End chemistry simulation.
*
* Notify the worker to finish their 'work'-loop. This is done by sending
* every worker an empty message with the tag TAG_FINISH. Now the master will
* receive measured times and DHT metrics from all worker one after another.
* Finally he will write all data to the R runtime and return this function.
*
*/
void end() override;
/**
* @brief Get the send time
*
* Time spent in send loop.
*
* @return double sent time in seconds
*/
double getSendTime();
/**
* @brief Get the recv time
*
* Time spent in recv loop.
*
* @return double recv time in seconds
*/
double getRecvTime();
/**
* @brief Get the idle time
*
* Time master was idling in MPI_Probe of recv loop.
*
* @return double idle time in seconds
*/
double getIdleTime();
/**
* @brief Get the Worker time
*
* Time spent in whole send/recv loop.
*
* @return double worker time in seconds
*/
double getWorkerTime();
/**
* @brief Get the ChemMaster time
*
* Time spent in 'master_chemistry' R function.
*
* @return double ChemMaster time in seconds
*/
double getChemMasterTime();
/**
* @brief Get the sequential time
*
* Time master executed code which must be run sequential.
*
* @return double seqntial time in seconds.
*/
double getSeqTime();
private:
/**
* @brief Print a progressbar
*
* Prints a progressbar to stdout according to count of processed work
* packages in this iteration.
*
* @param count_pkgs Last processed index of work package
* @param n_wp Number of work packages
* @param barWidth Width of the progressbar/Count of characters to display the
* bar
*/
void printProgressbar(int count_pkgs, int n_wp, int barWidth = 70);
/**
* @brief Start send loop
*
* Send a work package to every free worker, which are noted in a worker
* struct. After a work package was sent move pointer on work grid to the next
* work package. Use MPI_Send to transfer work package to worker.
*
* @param pkg_to_send Pointer to variable containing how much work packages
* are still to send
* @param count_pkgs Pointer to variable indexing the current work package
* @param free_workers Pointer to variable with the count of free workers
*/
void sendPkgs(int &pkg_to_send, int &count_pkgs, int &free_workers);
/**
* @brief Start recv loop
*
* Receive processed work packages by worker. This is done by first probing
* for a message. If a message is receivable, receive it and put result into
* respective memory area. Continue, but now with a non blocking MPI_Probe. If
* a message is receivable or if no work packages are left to send, receive
* it. Otherwise or if all remaining work packages are received exit loop.
*
* @param pkg_to_recv Pointer to variable counting the to receiving work
* packages
* @param to_send Bool indicating if there are still work packages to send
* @param free_workers Pointer to worker to variable holding the number of
* free workers
*/
void recvPkgs(int &pkg_to_recv, bool to_send, int &free_workers);
/**
* @brief Indicating usage of DHT
*
*/
bool dht_enabled;
/**
* @brief Default number of grid cells in each work package
*
*/
unsigned int wp_size;
/**
* @brief Pointer to current to be processed work package
*
*/
double *work_pointer;
/**
* @brief Time spent in send loop
*
*/
double send_t = 0.f;
/**
* @brief Time spent in recv loop
*
*/
double recv_t = 0.f;
/**
* @brief Time master is idling in MPI_Probe
*
*/
double master_idle = 0.f;
/**
* @brief Time spent in send/recv loop
*
*/
double worker_t = 0.f;
/**
* @brief Time spent in sequential chemistry part
*
*/
double chem_master = 0.f;
/**
* @brief Time spent in sequential instructions
*
*/
double seq_t = 0.f;
};
/**
* @brief Class providing execution of worker chemistry
*
* Providing mainly a function to loop and wait for messages from the master.
*
*/
class ChemWorker : public ChemSim {
public:
/**
* @brief Construct a new ChemWorker object
*
* The following steps are executed to create a new object of ChemWorker:
* -# all needed simulation parameters are extracted
* -# memory is allocated
* -# Preparetion to create a DHT
* -# and finally create a new DHT_Wrapper
*
* @param params Simulation parameters as SimParams object
* @param R_ R runtime
* @param grid_ Grid object
* @param dht_comm Communicator addressing all processes marked as worker
*/
ChemWorker(SimParams &params, RRuntime &R_, Grid &grid_, MPI_Comm dht_comm);
/**
* @brief Destroy the ChemWorker object
*
* Therefore all buffers are freed and the DHT_Wrapper object is destroyed.
*
*/
~ChemWorker();
/**
* @brief Start the 'work' loop
*
* Loop in an endless loop. At the beginning probe for a message from the
* master process. If there is a receivable message evaluate the message tag.
*
*/
void loop();
private:
/**
* @brief Evaluating message to receive as work package
*
* These steps are done:
*
* -# Receive message
* -# Evaluate message header containing information about work package size,
* current iteration and timestep, simulation age
* -# if DHT is enabled check DHT for previously simulated results
* -# run simulation of work package
* -# send results back to master
* -# if DHT is enabled write simulated grid points to DHT
*
* @param probe_status message status of produced by MPI_Probe in loop
*/
void doWork(MPI_Status &probe_status);
/**
* @brief Action to do after iteration
*
* If DHT is enabled print statistics and if dht_snaps is set to 2 write DHT
* snapshots.
*
*/
void postIter();
/**
* @brief Message tag evaluates to TAG_FINISH
*
* Send all the collected timings and (possbile) DHT metrics to the master.
*
*/
void finishWork();
/**
* @brief Write DHT snapshot
*
*/
void writeFile();
/**
* @brief Read DHT snapshot
*
*/
void readFile();
/**
* @brief Indicates usage of DHT
*
*/
bool dht_enabled;
/**
* @brief Boolean if dt differs between iterations
*
*/
bool dt_differ;
/**
* @brief Value between 0 and 2, indicating when to write DHT snapshots
*
*/
int dht_snaps;
/**
* @brief Absolute path to DHT snapshot file
*
*/
std::string dht_file;
/**
* @brief Count of bytes each process should allocate for the DHT
*
*/
unsigned int dht_size_per_process;
/**
* @brief Indicates which grid cells were previously simulated and don't need
* to be simulated now
*
*/
std::vector<bool> dht_flags;
/**
* @brief simulated results are stored here
*
*/
double *mpi_buffer_results;
/**
* @brief Instance of DHT_Wrapper
*
*/
DHT_Wrapper *dht;
/**
* @brief Array to store timings
*
* The values are stored in following order
*
* -# PHREEQC time
* -# DHT_get time
* -# DHT_fill time
*
*/
double timing[3];
/**
* @brief Time worker is idling in MPI_Probe
*
*/
double idle_t = 0.f;
/**
* @brief Count of PHREEQC calls
*
*/
int phreeqc_count = 0;
};
} // namespace poet
#endif // CHEMSIM_H

316
src/model/ChemWorker.cpp Normal file
View File

@ -0,0 +1,316 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <Rcpp.h>
#include <iostream>
#include <string>
#include "ChemSim.h"
using namespace poet;
using namespace std;
using namespace Rcpp;
ChemWorker::ChemWorker(SimParams &params, RRuntime &R_, Grid &grid_,
MPI_Comm dht_comm)
: ChemSim(params, R_, grid_) {
t_simparams tmp = params.getNumParams();
this->dt_differ = tmp.dt_differ;
this->dht_enabled = tmp.dht_enabled;
this->dht_size_per_process = tmp.dht_size_per_process;
this->dht_snaps = tmp.dht_snaps;
this->dht_file = params.getDHTFile();
mpi_buffer = (double *)calloc((wp_size * (grid.getCols())) + BUFFER_OFFSET,
sizeof(double));
mpi_buffer_results =
(double *)calloc(wp_size * (grid.getCols()), sizeof(double));
if (world_rank == 1)
cout << "CPP: Worker: DHT usage is " << (dht_enabled ? "ON" : "OFF")
<< endl;
if (dht_enabled) {
int data_size = grid.getCols() * sizeof(double);
int key_size =
grid.getCols() * sizeof(double) + (dt_differ * sizeof(double));
int dht_buckets_per_process =
dht_size_per_process / (1 + data_size + key_size);
if (world_rank == 1)
cout << "CPP: Worker: data size: " << data_size << " bytes" << endl
<< "CPP: Worker: key size: " << key_size << " bytes" << endl
<< "CPP: Worker: buckets per process " << dht_buckets_per_process
<< endl;
dht = new DHT_Wrapper(params, dht_comm, dht_buckets_per_process, data_size,
key_size);
if (world_rank == 1) cout << "CPP: Worker: DHT created!" << endl;
if (!dht_file.empty()) readFile();
// set size
dht_flags.resize(wp_size, true);
// assign all elements to true (default)
dht_flags.assign(wp_size, true);
}
timing[0] = 0.0;
timing[1] = 0.0;
timing[2] = 0.0;
}
ChemWorker::~ChemWorker() {
free(mpi_buffer);
free(mpi_buffer_results);
if (dht_enabled) delete dht;
}
void ChemWorker::loop() {
MPI_Status probe_status;
while (1) {
double idle_a = MPI_Wtime();
MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &probe_status);
double idle_b = MPI_Wtime();
/* there is a work package to receive */
if (probe_status.MPI_TAG == TAG_WORK) {
idle_t += idle_b - idle_a;
doWork(probe_status);
}
/* end of iteration */
else if (probe_status.MPI_TAG == TAG_DHT_ITER) {
postIter();
}
/* end of simulation */
else if (probe_status.MPI_TAG == TAG_FINISH) {
finishWork();
break;
}
}
}
void ChemWorker::doWork(MPI_Status &probe_status) {
int count;
int local_work_package_size = 0;
double dht_get_start, dht_get_end;
double phreeqc_time_start, phreeqc_time_end;
double dht_fill_start, dht_fill_end;
/* get number of doubles to be received */
MPI_Get_count(&probe_status, MPI_DOUBLE, &count);
/* receive */
MPI_Recv(mpi_buffer, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
/* decrement count of work_package by BUFFER_OFFSET */
count -= BUFFER_OFFSET;
/* check for changes on all additional variables given by the 'header' of
* mpi_buffer */
// work_package_size
if (mpi_buffer[count] != local_work_package_size) { // work_package_size
local_work_package_size = mpi_buffer[count];
R["work_package_size"] = local_work_package_size;
R.parseEvalQ("mysetup$work_package_size <- work_package_size");
}
// current iteration of simulation
if (mpi_buffer[count + 1] != iteration) {
iteration = mpi_buffer[count + 1];
R["iter"] = iteration;
R.parseEvalQ("mysetup$iter <- iter");
}
// current timestep size
if (mpi_buffer[count + 2] != dt) {
dt = mpi_buffer[count + 2];
R["dt"] = dt;
R.parseEvalQ("mysetup$dt <- dt");
}
// current simulation time ('age' of simulation)
if (mpi_buffer[count + 3] != current_sim_time) {
current_sim_time = mpi_buffer[count + 3];
R["simulation_time"] = current_sim_time;
R.parseEvalQ("mysetup$simulation_time <- simulation_time");
}
/* 4th double value is currently a placeholder */
// if (mpi_buffer[count+4] != placeholder) {
// placeholder = mpi_buffer[count+4];
// R["mysetup$placeholder"] = placeholder;
// }
if (dht_enabled) {
/* resize helper vector dht_flags of work_package_size changes */
if ((int)dht_flags.size() != local_work_package_size) {
dht_flags.resize(local_work_package_size, true); // set size
dht_flags.assign(local_work_package_size,
true); // assign all elements to true (default)
}
/* check for values in DHT */
dht_get_start = MPI_Wtime();
dht->checkDHT(local_work_package_size, dht_flags, mpi_buffer, dt);
dht_get_end = MPI_Wtime();
/* distribute dht_flags to R Runtime */
R["dht_flags"] = as<LogicalVector>(wrap(dht_flags));
}
/* Convert grid to R runtime */
grid.importWP(mpi_buffer, wp_size);
if (dht_enabled) {
R.parseEvalQ("work_package <- work_package_full[dht_flags,]");
} else {
R.parseEvalQ("work_package <- work_package_full");
}
R.parseEvalQ("work_package <- as.matrix(work_package)");
unsigned int nrows = R.parseEval("nrow(work_package)");
if (nrows > 0) {
/*Single Line error Workaround*/
if (nrows <= 1) {
// duplicate line to enable correct simmulation
R.parseEvalQ(
"work_package <- work_package[rep(1:nrow(work_package), "
"times = 2), ]");
}
/* Run PHREEQC */
phreeqc_time_start = MPI_Wtime();
R.parseEvalQ(
"result <- as.data.frame(slave_chemistry(setup=mysetup, "
"data = work_package))");
phreeqc_time_end = MPI_Wtime();
} else {
// undefined behaviour, isn't it?
}
phreeqc_count++;
if (dht_enabled) {
R.parseEvalQ("result_full <- work_package_full");
if (nrows > 0) R.parseEvalQ("result_full[dht_flags,] <- result");
} else {
R.parseEvalQ("result_full <- result");
}
/* convert grid to C domain */
grid.exportWP(mpi_buffer_results);
/* send results to master */
MPI_Request send_req;
MPI_Isend(mpi_buffer_results, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD,
&send_req);
if (dht_enabled) {
/* write results to DHT */
dht_fill_start = MPI_Wtime();
dht->fillDHT(local_work_package_size, dht_flags, mpi_buffer,
mpi_buffer_results, dt);
dht_fill_end = MPI_Wtime();
timing[1] += dht_get_end - dht_get_start;
timing[2] += dht_fill_end - dht_fill_start;
}
timing[0] += phreeqc_time_end - phreeqc_time_start;
MPI_Wait(&send_req, MPI_STATUS_IGNORE);
}
void ChemWorker::postIter() {
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_DHT_ITER, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
if (dht_enabled) {
dht->printStatistics();
if (dht_snaps == 2) {
writeFile();
}
}
// synchronize all processes
MPI_Barrier(MPI_COMM_WORLD);
}
void ChemWorker::writeFile() {
cout.flush();
std::stringstream out;
out << out_dir << "/iter_" << setfill('0') << setw(3) << iteration << ".dht";
int res = dht->tableToFile(out.str().c_str());
if (res != DHT_SUCCESS && world_rank == 2)
cerr << "CPP: Worker: Error in writing current state of DHT to file."
<< endl;
else if (world_rank == 2)
cout << "CPP: Worker: Successfully written DHT to file " << out.str()
<< endl;
}
void ChemWorker::readFile() {
int res = dht->fileToTable((char *)dht_file.c_str());
if (res != DHT_SUCCESS) {
if (res == DHT_WRONG_FILE) {
if (world_rank == 1)
cerr << "CPP: Worker: Wrong file layout! Continue with empty DHT ..."
<< endl;
} else {
if (world_rank == 1)
cerr << "CPP: Worker: Error in loading current state of DHT from "
"file. Continue with empty DHT ..."
<< endl;
}
} else {
if (world_rank == 2)
cout << "CPP: Worker: Successfully loaded state of DHT from file "
<< dht_file << endl;
std::cout.flush();
}
}
void ChemWorker::finishWork() {
/* before death, submit profiling/timings to master*/
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_FINISH, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
// timings
MPI_Send(timing, 3, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
MPI_Send(&phreeqc_count, 1, MPI_INT, 0, TAG_TIMING, MPI_COMM_WORLD);
MPI_Send(&idle_t, 1, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
if (dht_enabled) {
// dht_perf
int dht_perf[3];
dht_perf[0] = dht->getHits();
dht_perf[1] = dht->getMisses();
dht_perf[2] = dht->getEvictions();
MPI_Send(dht_perf, 3, MPI_INT, 0, TAG_DHT_PERF, MPI_COMM_WORLD);
}
if (dht_enabled && dht_snaps > 0) writeFile();
}

65
src/model/Grid.cpp Normal file
View File

@ -0,0 +1,65 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "Grid.h"
using namespace poet;
using namespace Rcpp;
void Grid::init() {
R.parseEval("GRID_TMP <- mysetup$state_C");
this->ncol = R.parseEval("ncol(GRID_TMP)");
this->nrow = R.parseEval("nrow(GRID_TMP)");
}
unsigned int Grid::getCols() { return this->ncol; }
unsigned int Grid::getRows() { return this->nrow; }
void Grid::shuffleAndExport(double *buffer) {
R.parseEval("tmp <- shuffle_field(mysetup$state_T, ordered_ids)");
R.setBufferDataFrame("tmp");
R.to_C_domain(buffer);
}
void Grid::importAndUnshuffle(double *buffer) {
R.setBufferDataFrame("GRID_TMP");
R.from_C_domain(buffer);
R["GRID_CHEM_DATA"] = R.getBufferDataFrame();
R.parseEval("result <- unshuffle_field(GRID_CHEM_DATA, ordered_ids)");
}
void Grid::importWP(double *buffer, unsigned int wp_size) {
R["GRID_WP_SKELETON"] = getSkeletonDataFrame(wp_size);
R.setBufferDataFrame("GRID_WP_SKELETON");
R.from_C_domain(buffer);
R["work_package_full"] = R.getBufferDataFrame();
}
void Grid::exportWP(double *buffer) {
R.setBufferDataFrame("result_full");
R.to_C_domain(buffer);
}
Rcpp::DataFrame Grid::getSkeletonDataFrame(unsigned int rows) {
R["GRID_ROWS"] = rows;
Rcpp::DataFrame tmp = R.parseEval("head(GRID_TMP,GRID_ROWS)");
return tmp;
}

146
src/model/Grid.h Normal file
View File

@ -0,0 +1,146 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef GRID_H
#define GRID_H
#include <RRuntime.h>
#include <Rcpp.h>
namespace poet {
/**
* @brief Class describing the grid
*
* Providing methods to shuffle and unshuffle grid (for the master) as also to
* import and export a work package (for worker).
*
* @todo find better abstraction
*
*/
class Grid {
public:
/**
* @brief Construct a new Grid object
*
* This will call the default constructor and initializes private RRuntime
* with given R runtime.
*
* @param R
*/
Grid(RRuntime &R) : R(R){};
/**
* @brief Init the grid
*
* At this moment init will only declare and define a variable inside the R
* runtime called grid_tmp since the whole Grid initialization and management
* is done by the R runtime. This may change in the future.
*
*/
void init();
/**
* @brief Returns the number of elements for each gridcell
*
* @return unsigned int Number of elements
*/
unsigned int getCols();
/**
* @brief Returns the number of grid cells
*
* @return unsigned int Number of grid cells
*/
unsigned int getRows();
/**
* @brief Shuffle the grid and export it to C memory area
*
* This will call shuffle_field inside R runtime, set the resulting grid as
* buffered data frame in RRuntime object and write R grid to continous C
* memory area.
*
* @param[in,out] buffer Pointer to C memory area
*/
void shuffleAndExport(double *buffer);
/**
* @brief Unshuffle the grid and import it from C memory area into R runtime
*
* Write C memory area into temporary R grid variable and unshuffle it.
*
* @param buffer Pointer to C memory area
*/
void importAndUnshuffle(double *buffer);
/**
* @brief Import a C memory area as a work package into R runtime
*
* Get a skeleton from getSkeletonDataFrame inside R runtime and set this as
* buffer data frame of RRuntime object. Now convert C memory area to R data
* structure.
*
* @param buffer Pointer to C memory area
* @param wp_size Count of grid cells per work package
*/
void importWP(double *buffer, unsigned int wp_size);
/**
* @brief Export a work package from R runtime into C memory area
*
* Set buffer data frame in RRuntime object to data frame holding the results
* and convert this to C memory area.
*
* @param buffer Pointer to C memory area
*/
void exportWP(double *buffer);
private:
/**
* @brief Instance of RRuntime
*
*/
RRuntime R;
/**
* @brief Number of columns of grid
*
*/
unsigned int ncol;
/**
* @brief Number of rows of grid
*
*/
unsigned int nrow;
/**
* @brief Get a SkeletonDataFrame
*
* Return a skeleton with \e n rows of current grid
*
* @param rows number of rows to return skeleton
* @return Rcpp::DataFrame Can be seen as a skeleton. The content of the data
* frame might be irrelevant.
*/
Rcpp::DataFrame getSkeletonDataFrame(unsigned int rows);
};
} // namespace poet
#endif // GRID_H

View File

@ -0,0 +1,44 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "TransportSim.h"
#include <mpi.h>
using namespace poet;
TransportSim::TransportSim(RRuntime &R_) : R(R_) {}
void TransportSim::run() {
double sim_a_transport, sim_b_transport;
sim_b_transport = MPI_Wtime();
R.parseEvalQ("mysetup <- master_advection(setup=mysetup)");
sim_a_transport = MPI_Wtime();
transport_t += sim_a_transport - sim_b_transport;
}
void TransportSim::end() {
R["simtime_transport"] = transport_t;
R.parseEvalQ("profiling$simtime_transport <- simtime_transport");
}
double TransportSim::getTransportTime() { return this->transport_t; }

82
src/model/TransportSim.h Normal file
View File

@ -0,0 +1,82 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef TRANSPORT_SIM_H
#define TRANSPORT_SIM_H
#include <RRuntime.h>
namespace poet {
/**
* @brief Class describing transport simulation
*
* Offers simple methods to run an iteration and end the simulation.
*
*/
class TransportSim {
public:
/**
* @brief Construct a new TransportSim object
*
* The instance will only be initialized with given R object.
*
* @param R RRuntime object
*/
TransportSim(RRuntime &R);
/**
* @brief Run simulation for one iteration
*
* This will simply call the R function 'master_advection'
*
*/
void run();
/**
* @brief End simulation
*
* All measured timings are distributed to the R runtime
*
*/
void end();
/**
* @brief Get the transport time
*
* @return double time spent in transport
*/
double getTransportTime();
private:
/**
* @brief Instance of RRuntime
*
*/
RRuntime &R;
/**
* @brief time spent for transport
*
*/
double transport_t = 0.f;
};
} // namespace poet
#endif // TRANSPORT_SIM_H

218
src/poet.cpp Normal file
View File

@ -0,0 +1,218 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <ChemSim.h>
#include <Grid.h>
#include <RRuntime.h>
#include <Rcpp.h>
#include <SimParams.h>
#include <TransportSim.h>
#include <cstring>
#include <iostream>
#include <string>
#include <vector>
#include <poet.h>
using namespace std;
using namespace poet;
using namespace Rcpp;
int main(int argc, char *argv[]) {
double sim_start, sim_end;
int world_size, world_rank;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
/*Create custom Communicator with all processes except 0 (the master) for DHT
* storage */
MPI_Comm dht_comm;
if (world_rank == 0) {
MPI_Comm_split(MPI_COMM_WORLD, MPI_UNDEFINED, world_rank, &dht_comm);
} else {
MPI_Comm_split(MPI_COMM_WORLD, 1, world_rank, &dht_comm);
}
if (world_rank == 0) {
cout << "Running POET in version " << poet_version << endl << endl;
}
/* initialize R runtime */
RRuntime R(argc, argv);
/*Loading Dependencies*/
std::string r_load_dependencies = "suppressMessages(library(Rmufits));"
"suppressMessages(library(RedModRphree));"
"source('../R_lib/kin_r_library.R');"
"source('../R_lib/parallel_r_library.R');";
R.parseEvalQ(r_load_dependencies);
SimParams params(world_rank, world_size);
int pret = params.parseFromCmdl(argv, R);
if (pret == PARSER_ERROR) {
MPI_Finalize();
return EXIT_FAILURE;
} else if (pret == PARSER_HELP) {
MPI_Finalize();
return EXIT_SUCCESS;
}
cout << "CPP: R Init (RInside) on process " << world_rank << endl;
bool dt_differ;
if (world_rank == 0) {
// get timestep vector from grid_init function ...
std::string master_init_code = "mysetup <- master_init(setup=setup)";
R.parseEval(master_init_code);
dt_differ = R.parseEval("mysetup$dt_differ");
// ... and broadcast it to every other rank unequal to 0
MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
}
/* workers will only read the setup DataFrame defined by input file */
else {
R.parseEval("mysetup <- setup");
MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
}
params.setDtDiffer(dt_differ);
// initialize chemistry on all processes
std::string init_chemistry_code = "mysetup <- init_chemistry(setup=mysetup)";
R.parseEval(init_chemistry_code);
Grid grid(R);
grid.init();
params.initVectorParams(R, grid.getCols());
// MDL: store all parameters
if (world_rank == 0) {
cout << "CPP: Calling R Function to store calling parameters" << endl;
R.parseEvalQ("StoreSetup(setup=mysetup)");
}
if (world_rank == 0) {
cout << "CPP: Init done on process with rank " << world_rank << endl;
}
MPI_Barrier(MPI_COMM_WORLD);
/* THIS IS EXECUTED BY THE MASTER */
if (world_rank == 0) {
ChemMaster master(params, R, grid);
TransportSim trans(R);
sim_start = MPI_Wtime();
/* Iteration Count is dynamic, retrieving value from R (is only needed by
* master for the following loop) */
uint32_t maxiter = R.parseEval("mysetup$maxiter");
/* SIMULATION LOOP */
for (uint32_t iter = 1; iter < maxiter + 1; iter++) {
cout << "CPP: Evaluating next time step" << endl;
R.parseEvalQ("mysetup <- master_iteration_setup(mysetup)");
/* displaying iteration number, with C++ and R iterator */
cout << "CPP: Going through iteration " << iter << endl;
cout << "CPP: R's $iter: " << ((uint32_t)(R.parseEval("mysetup$iter")))
<< ". Iteration" << endl;
cout << "CPP: Calling Advection" << endl;
/* run transport */
trans.run();
cout << "CPP: Chemistry" << endl;
/* Fallback for sequential execution */
if (world_size == 1) {
master.ChemSim::run();
}
/* otherwise run parallel */
else {
master.run();
}
// MDL master_iteration_end just writes on disk state_T and
// state_C after every iteration if the cmdline option
// --ignore-results is not given (and thus the R variable
// store_result is TRUE)
R.parseEvalQ("mysetup <- master_iteration_end(setup=mysetup)");
cout << endl
<< "CPP: End of *coupling* iteration " << iter << "/" << maxiter
<< endl
<< endl;
MPI_Barrier(MPI_COMM_WORLD);
} // END SIMULATION LOOP
cout << "CPP: finished simulation loop" << endl;
sim_end = MPI_Wtime();
cout << "CPP: start timing profiling" << endl;
R.parseEvalQ("profiling <- list()");
R["simtime"] = sim_end - sim_start;
R.parseEvalQ("profiling$simtime <- simtime");
trans.end();
if (world_size == 1) {
master.ChemSim::end();
} else {
master.end();
}
string r_vis_code;
r_vis_code = "saveRDS(profiling, file=paste0(fileout,'/timings.rds'));";
R.parseEval(r_vis_code);
cout << "CPP: Done! Results are stored as R objects into <"
<< params.getOutDir() << "/timings.rds>" << endl;
}
/* THIS IS EXECUTED BY THE WORKERS */
else {
ChemWorker worker(params, R, grid, dht_comm);
worker.loop();
}
cout << "CPP: finished, cleanup of process " << world_rank << endl;
MPI_Finalize();
if (world_rank == 0) {
cout << "CPP: done, bye!" << endl;
}
exit(0);
}

6
src/poet.h.in Normal file
View File

@ -0,0 +1,6 @@
#ifndef POET_H
#define POET_H
const char *poet_version = "@POET_VERSION@";
#endif // POET_H

View File

@ -1,44 +0,0 @@
#include "r_utils.h"
/* This function converts a pure double dataframe into a double array.
buffer <- double array, needs to be allocated before
df <- reference to input dataframe
*/
void convert_R_Dataframe_2_C_buffer(double* buffer, Rcpp::DataFrame &df)
{
size_t rowCount = df.nrow();
size_t colCount = df.ncol();
for (size_t i = 0; i < rowCount; i++)
{
for (size_t j = 0; j < colCount; j++)
{
/* Access column vector j and extract value of line i */
Rcpp::DoubleVector col = df[j];
buffer[i * colCount + j] = col[i];
}
}
}
/* This function converts a double array into a double dataframe.
buffer <- input double array
df <- reference to output dataframe, needs to be of fitting size, structure will be taken from it
*/
void convert_C_buffer_2_R_Dataframe(double* buffer, Rcpp::DataFrame &df)
{
size_t rowCount = df.nrow();
size_t colCount = df.ncol();
for (size_t i = 0; i < rowCount; i++)
{
for (size_t j = 0; j < colCount; j++)
{
/* Access column vector j and extract value of line i */
Rcpp::DoubleVector col = df[j];
col[i] = buffer[i * colCount + j];
}
}
}

View File

@ -1,6 +0,0 @@
#pragma once
#include <RInside.h>
/*Functions*/
void convert_R_Dataframe_2_C_buffer(double* buffer, Rcpp::DataFrame &df);
void convert_C_buffer_2_R_Dataframe(double* buffer, Rcpp::DataFrame &df);

4
src/util/CMakeLists.txt Normal file
View File

@ -0,0 +1,4 @@
add_library(POET_Util RRuntime.cpp SimParams.cpp)
target_include_directories(POET_Util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${R_INCLUDE_DIRS})
target_link_libraries(POET_Util PUBLIC ${R_LIBRARIES})
target_compile_definitions(POET_Util PUBLIC STRICT_R_HEADERS)

64
src/util/RRuntime.cpp Normal file
View File

@ -0,0 +1,64 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "RRuntime.h"
#include <RInside.h>
#include <Rcpp.h>
#include <string>
using namespace poet;
void RRuntime::to_C_domain(double *buffer) {
size_t rowCount = dfbuff.nrow();
size_t colCount = dfbuff.ncol();
for (size_t i = 0; i < rowCount; i++) {
for (size_t j = 0; j < colCount; j++) {
/* Access column vector j and extract value of line i */
Rcpp::DoubleVector col = dfbuff[j];
buffer[i * colCount + j] = col[i];
}
}
}
void RRuntime::from_C_domain(double *buffer) {
size_t rowCount = dfbuff.nrow();
size_t colCount = dfbuff.ncol();
for (size_t i = 0; i < rowCount; i++) {
for (size_t j = 0; j < colCount; j++) {
/* Access column vector j and extract value of line i */
Rcpp::DoubleVector col = dfbuff[j];
col[i] = buffer[i * colCount + j];
}
}
}
void RRuntime::setBufferDataFrame(std::string dfname) {
this->dfbuff = parseEval(dfname);
}
Rcpp::DataFrame RRuntime::getBufferDataFrame() { return this->dfbuff; }
size_t RRuntime::getBufferNCol() { return (this->dfbuff).ncol(); }
size_t RRuntime::getBufferNRow() { return (this->dfbuff).nrow(); }

137
src/util/RRuntime.h Normal file
View File

@ -0,0 +1,137 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef RRUNTIME_H
#define RRUNTIME_H
#include <RInside.h>
#include <Rcpp.h>
#include <string>
namespace poet {
/**
* @brief Provides an interface to a R runtime.
*
* RRuntime is a wrapper class around a RInside (R) runtime and provides several
* simplified methods to use R commands inside POET.
*
*/
class RRuntime : public RInside {
public:
/**
* @brief Construct a new RRuntime object
*
* Since this is an inherited class of RInside the constructor of the super
* class is just called.
*
* @param argc Argument counter of the program
* @param argv Argument values of the program
*/
RRuntime(const int argc, const char *const argv[]) : RInside(argc, argv){};
/**
* Convert a R dataframe into a C continious memory area.
*
* @param buffer Name of the R internal variable name.
*/
/**
* @brief Convert a R dataframe into a C continious memory area.
*
* A buffer data frame must be set beforehand with setBufferDataFrame. Then
* each value will be set into the continious memory area. This is done row
* wise.
*
* @todo: Might be more performant if all columns would be loaded at once and
* not for each column seperatly
*
* @param buffer Pointer to pre-allocated memory
*/
void to_C_domain(double *buffer);
/**
* @brief Convert continious C memory area into R dataframe and puts it into R
* runtime.
*
* A buffer data frame must be set beforehand with setBufferDataFrame. Then
* each value will be set into buffered data frame of this object. This is
* done row wise.
*
* @todo: Might be more performant if all columns would be loaded at once and
* not for each column seperatly
*
* @param buffer Pointer to memory area which should be converted into R
* dataframe.
*/
void from_C_domain(double *buffer);
/**
* @brief Set the Buffer Data Frame object
*
* Set the buffered data frame (will be mostly the grid) of this object.
*
* @param dfname Name of the data frame inside R runtime
*/
void setBufferDataFrame(std::string dfname);
/**
* @brief Get the Buffer Data Frame object
*
* Returning the current buffered data frame as a Rcpp data frame.
*
* @return Rcpp::DataFrame Current buffered data frame
*/
Rcpp::DataFrame getBufferDataFrame();
/**
* @brief Get the Buffer N Col object
*
* Get the numbers of columns of the buffered data frame.
*
* @return size_t Count of columns of buffered data frame
*/
size_t getBufferNCol();
/**
* @brief Get the Buffer N Row object
*
* Get the numbers of rows of the buffered data frame.
*
* @return size_t Count of rows of buffered data frame
*/
size_t getBufferNRow();
private:
/**
* @brief Buffered data frame
*
* This is used to convert a R data frame into continious memory used by C/C++
* runtime and vice versa. Must be set with setBufferDataFrame and can be
* manipulated with from_C_domain.
*
* @todo: Find a cleaner solution. Maybe abstraction via another class.
*
*/
Rcpp::DataFrame dfbuff;
};
} // namespace poet
#endif // RRUNTIME_H

219
src/util/SimParams.cpp Normal file
View File

@ -0,0 +1,219 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "SimParams.h"
#include <Rcpp.h>
#include <iostream>
using namespace poet;
using namespace std;
using namespace Rcpp;
SimParams::SimParams(int world_rank_, int world_size_) {
this->simparams.world_rank = world_rank_;
this->simparams.world_size = world_size_;
}
int SimParams::parseFromCmdl(char *argv[], RRuntime &R) {
// initialize argh object
argh::parser cmdl(argv);
// if user asked for help
if (cmdl[{"help", "h"}]) {
if (simparams.world_rank == 0) {
cout << "Todo" << endl
<< "See README.md for further information." << endl;
}
return PARSER_HELP;
}
// if positional arguments are missing
else if (!cmdl(2)) {
if (simparams.world_rank == 0) {
cerr << "ERROR. Kin needs 2 positional arguments: " << endl
<< "1) the R script defining your simulation and" << endl
<< "2) the directory prefix where to save results and profiling"
<< endl;
}
return PARSER_ERROR;
}
// collect all parameters which are not known, print them to stderr and return
// with PARSER_ERROR
std::list<std::string> optionsError = validateOptions(cmdl);
if (!optionsError.empty()) {
if (simparams.world_rank == 0) {
cerr << "Unrecognized option(s):\n" << endl;
for (auto option : optionsError) {
cerr << option << endl;
}
cerr << "\nMake sure to use available options. Exiting!" << endl;
}
return PARSER_ERROR;
}
/*Parse DHT arguments*/
simparams.dht_enabled = cmdl["dht"];
// cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n';
if (simparams.dht_enabled) {
cmdl("dht-strategy", 0) >> simparams.dht_strategy;
// cout << "CPP: DHT strategy is " << dht_strategy << endl;
cmdl("dht-signif", 5) >> simparams.dht_significant_digits;
// cout << "CPP: DHT significant digits = " << dht_significant_digits <<
// endl;
simparams.dht_log = !(cmdl["dht-nolog"]);
// cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON"
// : "OFF" ) << endl;
cmdl("dht-size", DHT_SIZE_PER_PROCESS) >> simparams.dht_size_per_process;
// cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process <<
// endl;
cmdl("dht-snaps", 0) >> simparams.dht_snaps;
cmdl("dht-file") >> dht_file;
}
/*Parse work package size*/
cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> simparams.wp_size;
/*Parse output options*/
simparams.store_result = !cmdl["ignore-result"];
if (simparams.world_rank == 0) {
cout << "CPP: Complete results storage is "
<< (simparams.store_result ? "ON" : "OFF") << endl;
cout << "CPP: Work Package Size: " << simparams.wp_size << endl;
cout << "CPP: DHT is " << (simparams.dht_enabled ? "ON" : "OFF") << '\n';
if (simparams.dht_enabled) {
cout << "CPP: DHT strategy is " << simparams.dht_strategy << endl;
cout << "CPP: DHT key default digits (ignored if 'signif_vector' is "
"defined) = "
<< simparams.dht_significant_digits << endl;
cout << "CPP: DHT logarithm before rounding: "
<< (simparams.dht_log ? "ON" : "OFF") << endl;
cout << "CPP: DHT size per process (Byte) = "
<< simparams.dht_size_per_process << endl;
cout << "CPP: DHT save snapshots is " << simparams.dht_snaps << endl;
cout << "CPP: DHT load file is " << dht_file << endl;
}
}
cmdl(1) >> filesim;
cmdl(2) >> out_dir;
/* distribute information to R runtime */
// if local_rank == 0 then master else worker
R["local_rank"] = simparams.world_rank;
// assign a char* (string) to 'filesim'
R["filesim"] = wrap(filesim);
// assign a char* (string) to 'fileout'
R["fileout"] = wrap(out_dir);
// pass the boolean "store_result" to the R process
R["store_result"] = simparams.store_result;
// worker count
R["n_procs"] = simparams.world_size - 1;
// work package size
R["work_package_size"] = simparams.wp_size;
// dht enabled?
R["dht_enabled"] = simparams.dht_enabled;
// log before rounding?
R["dht_log"] = simparams.dht_log;
// eval the init string, ignoring any returns
R.parseEvalQ("source(filesim)");
return PARSER_OK;
}
void SimParams::initVectorParams(RRuntime &R, int col_count) {
if (simparams.dht_enabled) {
/*Load significance vector from R setup file (or set default)*/
bool signif_vector_exists = R.parseEval("exists('signif_vector')");
if (signif_vector_exists) {
dht_signif_vector = as<std::vector<int>>(R["signif_vector"]);
} else {
dht_signif_vector.assign(col_count, simparams.dht_significant_digits);
}
/*Load property type vector from R setup file (or set default)*/
bool prop_type_vector_exists = R.parseEval("exists('prop_type')");
if (prop_type_vector_exists) {
dht_prop_type_vector = as<std::vector<string>>(R["prop_type"]);
} else {
dht_prop_type_vector.assign(col_count, "act");
}
if (simparams.world_rank == 0) {
// MDL: new output on signif_vector and prop_type
if (signif_vector_exists) {
cout << "CPP: using problem-specific rounding digits: " << endl;
R.parseEval(
"print(data.frame(prop=prop, type=prop_type, "
"digits=signif_vector))");
} else {
cout << "CPP: using DHT default rounding digits = "
<< simparams.dht_significant_digits << endl;
}
// MDL: pass to R the DHT stuff. These variables exist
// only if dht_enabled is true
R["dht_final_signif"] = dht_signif_vector;
R["dht_final_proptype"] = dht_prop_type_vector;
}
}
}
void SimParams::setDtDiffer(bool dt_differ) { simparams.dt_differ = dt_differ; }
t_simparams SimParams::getNumParams() { return this->simparams; }
std::vector<int> SimParams::getDHTSignifVector() {
return this->dht_signif_vector;
}
std::vector<std::string> SimParams::getDHTPropTypeVector() {
return this->dht_prop_type_vector;
}
std::string SimParams::getDHTFile() { return this->dht_file; }
std::string SimParams::getFilesim() { return this->filesim; }
std::string SimParams::getOutDir() { return this->out_dir; }
std::list<std::string> SimParams::validateOptions(argh::parser cmdl) {
/* store all unknown parameters here */
std::list<std::string> retList;
/* loop over all flags and compare to given flaglist*/
for (auto &flag : cmdl.flags()) {
if (!(flaglist.find(flag) != flaglist.end())) retList.push_back(flag);
}
/* and loop also over params and compare to given paramlist */
for (auto &param : cmdl.params()) {
if (!(paramlist.find(param.first) != paramlist.end()))
retList.push_back(param.first);
}
return retList;
}

274
src/util/SimParams.h Normal file
View File

@ -0,0 +1,274 @@
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
** Copyright (C) 2018-2021 Marco De Lucia (GFZ Potsdam)
**
** POET is free software; you can redistribute it and/or modify it under the
** terms of the GNU General Public License as published by the Free Software
** Foundation; either version 2 of the License, or (at your option) any later
** version.
**
** POET is distributed in the hope that it will be useful, but WITHOUT ANY
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
** A PARTICULAR PURPOSE. See the GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License along with
** this program; if not, write to the Free Software Foundation, Inc., 51
** Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef PARSER_H
#define PARSER_H
#include <string>
#include "RRuntime.h"
#include "argh.h" // Argument handler https://github.com/adishavit/argh
// BSD-licenced
/** Return value if no error occured */
#define PARSER_OK 0
/** Return value if error occured during parsing of program arguments */
#define PARSER_ERROR -1
/** Return value if user asked for help message with program parameter */
#define PARSER_HELP -2
/** Standard DHT Size (Defaults to 1 GiB) */
#define DHT_SIZE_PER_PROCESS 1073741824
/** Standard work package size */
#define WORK_PACKAGE_SIZE_DEFAULT 5
namespace poet {
/**
* @brief Defining all simulation parameters
*
*/
typedef struct {
/** Count of processes in MPI_COMM_WORLD */
int world_size;
/** rank of proces in MPI_COMM_WORLD */
int world_rank;
/** indicates if DHT should be used */
bool dht_enabled;
/** apply logarithm to key before rounding */
bool dht_log;
/** indicates if timestep dt differs between iterations */
bool dt_differ;
/** Indicates, when a DHT snapshot should be written */
int dht_snaps;
/** <b>not implemented</b>: How a DHT is distributed over processes */
int dht_strategy;
/** Size of DHt per process in byter */
unsigned int dht_size_per_process;
/** Default significant digit for rounding */
int dht_significant_digits;
/** Default work package size */
unsigned int wp_size;
/** indicates if resulting grid should be stored after every iteration */
bool store_result;
} t_simparams;
/**
* @brief Reads information from program arguments and R runtime
*
* Providing functions to initialize parameters of the simulation using command
* line parameters and parameters from the R runtime. This class will also parse
* arguments from the commandline and decides if argument is known or unknown.
*
* Stores and distribute current simulation parameters at any time.
*
*/
class SimParams {
public:
/**
* @brief Construct a new SimParams object
*
* With all given parameters a new instance of this class will be created.
*
* @param world_rank Rank of process inside MPI_COMM_WORLD
* @param world_size Size of communicator MPI_COMM_WORLD
*/
SimParams(int world_rank, int world_size);
/**
* @brief Parse program arguments
*
* This is done by the argh.h library.
*
* First, the function will check if there is a flag 'help' or 'h'. If this is
* the case a help message is printed and the function will return with
* PARSER_HELP.
*
* Second, if there are not 2 positional arguments an error will be printed to
* stderr and the function returns with PARSER_ERROR.
*
* Then all given program parameters and flags will be read and checked, if
* there are known by validateOptions. A list of all unknown options might be
* returned, printed out and the function will return with PARSER_ERROR.
* Oterhwise the function continuos.
*
* Now all program arguments will be stored inside t_simparams struct, printed
* out and the function returns with PARSER_OK.
*
* Also, all parsed agruments are distributed to the R runtime.
*
* @param argv Argument value of the program
* @param R Instantiated R runtime
* @return int Returns with 0 if no error occured, otherwise value less than 0
* is returned.
*/
int parseFromCmdl(char *argv[], RRuntime &R);
/**
* @brief Init std::vector values
*
* This will initialize dht_signif_vector and dht_prop_type_vector internally
* depending on whether vectors are defined by R-Simulation file or not.
* 'init_chemistry' must be run beforehand.
*
* @param R R runtime
* @param col_count Count of variables per grid cell (typically the count of
* columns of each grid cell)
*/
void initVectorParams(RRuntime &R, int col_count);
/**
* @brief Set if dt differs
*
* Set a boolean variable if the timestep differs between iterations of
* simulation.
*
* @param dt_differ Boolean value, if dt differs
*/
void setDtDiffer(bool dt_differ);
/**
* @brief Get the numerical params struct
*
* Returns a struct which contains all numerical or boolean simulation
* parameters.
*
* @return t_simparams Parameter struct
*/
t_simparams getNumParams();
/**
* @brief Get the DHT_Signif_Vector
*
* Returns a vector indicating which significant values are used for each
* variable of a grid cell.
*
* @return std::vector<int> Vector of integers containing information about
* significant digits
*/
std::vector<int> getDHTSignifVector();
/**
* @brief Get the DHT_Prop_Type_Vector
*
* Returns a vector indicating of which type a variable of a grid cell is.
*
* @return std::vector<std::string> Vector if strings defining a type of a
* variable
*/
std::vector<std::string> getDHTPropTypeVector();
/**
* @brief Return name of DHT snapshot.
*
* Name of the DHT file which is used to initialize the DHT with a previously
* written snapshot.
*
* @return std::string Absolute paht to the DHT snapshot
*/
std::string getDHTFile();
/**
* @brief Get the filesim name
*
* Returns a string containing the absolute path to a R file defining the
* simulation.
*
* @return std::string Absolute path to R file
*/
std::string getFilesim();
/**
* @brief Get the output directory
*
* Returns the name of an absolute path where all output files should be
* stored.
*
* @return std::string Absolute path to output path
*/
std::string getOutDir();
private:
/**
* @brief Validate program parameters and flags
*
* Therefore this function iterates over the list of flags and parameters and
* compare them to the class member flagList and paramList. If a program
* argument is not included it is put to a list. This list will be returned.
*
* @return std::list<std::string> List with all unknown parameters. Might be
* empty.
*/
std::list<std::string> validateOptions(argh::parser cmdl);
/**
* @brief Contains all valid program flags.
*
*/
std::set<std::string> flaglist{"ignore-result", "dht", "dht-nolog"};
/**
* @brief Contains all valid program parameters.
*
*/
std::set<std::string> paramlist{"work-package-size", "dht-signif",
"dht-strategy", "dht-size",
"dht-snaps", "dht-file"};
/**
* @brief Struct containing all simulation parameters
*
* Contains only those values which are standard arithmetic C types.
*
*/
t_simparams simparams;
/**
* @brief Defines significant digits for each variable of a grid cell
*
*/
std::vector<int> dht_signif_vector;
/**
* @brief Defines the type of a variable
*
*/
std::vector<std::string> dht_prop_type_vector;
/**
* @brief Absolute path to a DHT snapshot
*
*/
std::string dht_file;
/**
* @brief Absolute path to R file containing simulation definitions
*
*/
std::string filesim;
/**
* @brief Absolute path to output dir
*
*/
std::string out_dir;
};
} // namespace poet
#endif // PARSER_H

View File

@ -1,3 +1,30 @@
/*
** Copyright (c) 2016, Adi Shavit All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions are met:
**
** * Redistributions of source code must retain the above copyright notice, this
** list of conditions and the following disclaimer. * Redistributions in
** binary form must reproduce the above copyright notice, this list of
** conditions and the following disclaimer in the documentation and/or other
** materials provided with the distribution. * Neither the name of nor the
** names of its contributors may be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
** POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <algorithm>

View File

@ -1,277 +0,0 @@
#include "worker.h"
#include "dht_wrapper.h"
#include "global_buffer.h"
#include "r_utils.h"
#include <mpi.h>
#include <iostream>
void worker_function(RInside& R)
{
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Status probe_status;
int count;
int local_work_package_size;
int iteration;
double dt, current_sim_time;
double idle_a, idle_b;
double cummul_idle = 0.f;
double dht_get_start=0, dht_get_end=0;
double dht_fill_start=0, dht_fill_end=0;
double phreeqc_time_start=0, phreeqc_time_end=0;
int phreeqc_count = 0;
//timing[0] -> phreeqc
//timing[1] -> dht_get
//timing[2] -> dht_fill
double timing[3];
timing[0] = 0.0;
timing[1] = 0.0;
timing[2] = 0.0;
//dht_perf[0] -> hits
//dht_perf[1] -> miss
//dht_perf[2] -> collisions
uint64_t dht_perf[3];
if (dht_enabled)
{
dht_flags.resize(work_package_size, true); //set size
dht_flags.assign(work_package_size, true); //assign all elements to true (default)
dht_hits = 0;
dht_miss = 0;
dht_collision = 0;
// MDL: This code has now been moved to kin.cpp
// /*Load significance vector from R setup file (or set default)*/
// bool signif_vector_exists = R.parseEval("exists('signif_vector')");
// if (signif_vector_exists)
// {
// dht_significant_digits_vector = as<std::vector<int>>(R["signif_vector"]);
// } else
// {
// dht_significant_digits_vector.assign(dht_object->key_size / sizeof(double), dht_significant_digits);
// }
// /*Load property type vector from R setup file (or set default)*/
// bool prop_type_vector_exists = R.parseEval("exists('prop_type')");
// if (prop_type_vector_exists)
// {
// prop_type_vector = as<std::vector<string>>(R["prop_type"]);
// } else
// {
// prop_type_vector.assign(dht_object->key_size / sizeof(double), "normal");
// }
}
//initialization of helper variables
iteration = 0;
dt = 0;
current_sim_time = 0;
local_work_package_size = 0;
/*worker loop*/
while(1)
{
/*Wait for Message*/
idle_a = MPI_Wtime();
MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &probe_status);
idle_b = MPI_Wtime();
if (probe_status.MPI_TAG == TAG_WORK)
{ /* do work */
cummul_idle += idle_b - idle_a;
/* get number of doubles sent */
MPI_Get_count(&probe_status, MPI_DOUBLE, &count);
/* receive */
MPI_Recv(mpi_buffer, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
//decrement count of work_package by BUFFER_OFFSET
count -= BUFFER_OFFSET;
//check for changes on all additional variables given by the 'header' of mpi_buffer
if (mpi_buffer[count] != local_work_package_size) { //work_package_size
local_work_package_size = mpi_buffer[count];
R["work_package_size"] = local_work_package_size;
R.parseEvalQ("mysetup$work_package_size <- work_package_size");
}
if (mpi_buffer[count+1] != iteration) { //current iteration of simulation
iteration = mpi_buffer[count+1];
R["iter"] = iteration;
R.parseEvalQ("mysetup$iter <- iter");
}
if (mpi_buffer[count+2] != dt) { //current timestep size
dt = mpi_buffer[count+2];
R["dt"] = dt;
R.parseEvalQ("mysetup$dt <- dt");
}
if (mpi_buffer[count+3] != current_sim_time) { //current simulation time ('age' of simulation)
current_sim_time = mpi_buffer[count+3];
R["simulation_time"] = current_sim_time;
R.parseEvalQ("mysetup$simulation_time <- simulation_time");
}
/* 4th double value is currently a placeholder */
// if (mpi_buffer[count+4] != placeholder) {
// placeholder = mpi_buffer[count+4];
// R["mysetup$placeholder"] = placeholder;
// }
/* get df with right structure to fill in work package */
R.parseEvalQ("tmp2 <- head(mysetup$state_C, work_package_size)");
// R.parseEval("print(rownames(tmp2)[1:5])");
// R.parseEval("print(head(tmp2, 2))");
// R.parseEvalQ("tmp2$id <- as.double(rownames(tmp2))");
Rcpp::DataFrame buffer = R.parseEval("tmp2");
if (dht_enabled)
{
// DEBUG
// cout << "RANK " << world_rank << " start checking DHT\n";
//resize helper vector dht_flags of work_package_size changes
if ((int) dht_flags.size() != local_work_package_size) {
dht_flags.resize(local_work_package_size, true); //set size
dht_flags.assign(local_work_package_size, true); //assign all elements to true (default)
}
dht_get_start = MPI_Wtime();
check_dht(R, local_work_package_size, dht_flags, mpi_buffer);
dht_get_end = MPI_Wtime();
//DEBUG
//cout << "RANK " << world_rank << " checking DHT complete \n";
R["dht_flags"] = as<LogicalVector>(wrap(dht_flags));
//R.parseEvalQ("print(head(dht_flags))");
}
/* work */
convert_C_buffer_2_R_Dataframe(mpi_buffer, buffer);
R["work_package_full"] = buffer;
//R["work_package"] = buffer;
//DEBUG
//R.parseEvalQ("print(head(work_package_full))");
//R.parseEvalQ("print( c(length(dht_flags), nrow(work_package_full)) )");
if (dht_enabled)
{
R.parseEvalQ("work_package <- work_package_full[dht_flags,]");
} else {
R.parseEvalQ("work_package <- work_package_full");
}
//DEBUG
// R.parseEvalQ("print(head(work_package),2)");
// R.parseEvalQ("rownames(work_package) <- work_package$id");
// R.parseEval("print(paste('id %in% colnames(work_package)', 'id' %in% colnames(work_package)");
// R.parseEvalQ("id_store <- rownames(work_package)"); //"[, ncol(work_package)]");
// R.parseEvalQ("work_package$id <- NULL");
R.parseEvalQ("work_package <- as.matrix(work_package)");
unsigned int nrows = R.parseEval("nrow(work_package)");
if (nrows > 0)
{
/*Single Line error Workaround*/
if (nrows <=1)
{
//duplicate line to enable correct simmulation
R.parseEvalQ("work_package <- work_package[rep(1:nrow(work_package), times = 2), ]");
}
phreeqc_count++;
phreeqc_time_start = MPI_Wtime();
// MDL
// R.parseEvalQ("print('Work_package:\n'); print(head(work_package , 2)); cat('RCpp: worker_function:', local_rank, ' \n')");
R.parseEvalQ("result <- as.data.frame(slave_chemistry(setup=mysetup, data = work_package))");
phreeqc_time_end = MPI_Wtime();
// R.parseEvalQ("result$id <- id_store");
} else
{
//cout << "Work-Package is empty, skipping phreeqc!" << endl;
}
if (dht_enabled)
{
R.parseEvalQ("result_full <- work_package_full");
if (nrows > 0)
R.parseEvalQ("result_full[dht_flags,] <- result");
} else {
R.parseEvalQ("result_full <- result");
}
Rcpp::DataFrame result = R.parseEval("result_full");
convert_R_Dataframe_2_C_buffer(mpi_buffer_results, result);
/* send results to master */
MPI_Request send_req;
MPI_Isend(mpi_buffer_results, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD, &send_req);
if (dht_enabled)
{
dht_fill_start = MPI_Wtime();
fill_dht(R, local_work_package_size, dht_flags, mpi_buffer, mpi_buffer_results);
dht_fill_end = MPI_Wtime();
timing[1] += dht_get_end - dht_get_start;
timing[2] += dht_fill_end - dht_fill_start;
}
timing[0] += phreeqc_time_end - phreeqc_time_start;
MPI_Wait(&send_req,MPI_STATUS_IGNORE);
} else if (probe_status.MPI_TAG == TAG_FINISH)
{ /* recv and die */
/* before death, submit profiling/timings to master*/
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_FINISH, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
//timings
MPI_Send(timing, 3, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
MPI_Send(&phreeqc_count, 1, MPI_INT, 0, TAG_TIMING, MPI_COMM_WORLD);
MPI_Send(&cummul_idle, 1, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
if(dht_enabled)
{
//dht_perf
dht_perf[0] = dht_hits;
dht_perf[1] = dht_miss;
dht_perf[2] = dht_collision;
MPI_Send(dht_perf, 3, MPI_UNSIGNED_LONG_LONG, 0, TAG_DHT_PERF, MPI_COMM_WORLD);
}
break;
} else if ((probe_status.MPI_TAG == TAG_DHT_STATS)) {
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_DHT_STATS, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
print_statistics();
MPI_Barrier(MPI_COMM_WORLD);
} else if ((probe_status.MPI_TAG == TAG_DHT_STORE)) {
char* outdir;
MPI_Get_count(&probe_status, MPI_CHAR, &count);
outdir = (char *) calloc(count + 1, sizeof(char));
MPI_Recv(outdir, count, MPI_CHAR, 0, TAG_DHT_STORE, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
int res = table_to_file((char *) outdir);
if (res != DHT_SUCCESS) {
if (world_rank == 2) cerr << "CPP: Worker: Error in writing current state of DHT to file (TAG_DHT_STORE)" << endl;
} else {
if (world_rank == 2) cout << "CPP: Worker: Successfully written DHT to file " << outdir << endl;
}
free(outdir);
MPI_Barrier(MPI_COMM_WORLD);
}
}
}

View File

@ -1,17 +0,0 @@
#pragma once
#include <RInside.h>
using namespace std;
using namespace Rcpp;
/*Functions*/
void worker_function(RInside &R);
/*Globals*/
#define TAG_WORK 42
#define TAG_FINISH 43
#define TAG_TIMING 44
#define TAG_DHT_PERF 45
#define TAG_DHT_STATS 46
#define TAG_DHT_STORE 47