Initial commit

This commit is contained in:
Max Lübke 2020-11-18 14:24:12 +01:00
commit 8f687a18c1
No known key found for this signature in database
GPG Key ID: D3201E51647D1199
24 changed files with 7319 additions and 0 deletions

63
README.md Normal file
View File

@ -0,0 +1,63 @@

<!--
Time-stamp: "Last modified 2020-02-01 18:14:13 delucia"
-->
# install libraries from MDL
library(devtools)
devtools::install_git("https://gitext.gfz-potsdam.de/delucia/RedModRphree.git")
devtools::install_git("https://gitext.gfz-potsdam.de/delucia/Rmufits.git")
# USAGE
mpirun ./kin <OPTIONS> <simfile.R> <DIRECTORY>
OPTIONS:
--work-package-size=<1-n> ... size of work packages (default 5)
--ignore-result ... disables store of simulation resuls
DHT:
--dht ... enable dht (default is off)
--dht-log ... enable logarithm application before rounding (default is off)
--dht-signif=<1-n> ... set rounding to number of significant digits (default 5)
(only used if no vector is given in setup file)
(for individual values per column use R vector "signif_vector" in setup file)
--dht-strategy=<0-1> ... change dht strategy, not implemented yet (default 0, dht on workers)
--dht-size=<1-n> ... size of dht per process involved (see dht-strategy) in byte (default 1GiB)
--dht-snaps=<0-2> ... enable or disable storage of DHT snapshots
0 = snapshots are disabled
1 = only stores snapshot at the end of the simulation with name <DIRECTORY>.dht
2 = stores snapshot at the end and after each iteration
iteration snapshot files are stored in <DIRECTORY>/iter<n>.dht
--dht-file=<snapshot> ... initializes DHT with the given snapshot file
###############################################################################
# about the usage of MPI_Wtime()
From the OpenMPI Man Page:
For example, on platforms that support it, the clock_gettime() function will be used
to obtain a monotonic clock value with whatever precision is supported on that platform (e.g., nanoseconds).
# External Libraries
Cmdline Parsing -> https://github.com/adishavit/argh
# Examples included (more to come)
1) SimDol2D.R ... simple chemistry (Calcite/Dolomite) on a 50x50 2D grid, 20 time steps
2) SimDolKtz.R ... simple chemistry (Calcite/Dolomite) on Ketzin grid (~650k elements), 20 time steps
The flow snapshots are NOT INCLUDED in svn but must be provided separately

423
src/DHT.cpp Normal file
View File

@ -0,0 +1,423 @@
#include "DHT.h"
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
/*
* determining destination rank and index by hash of key
*
* return values by reference
*/
static void determine_dest(uint64_t hash, int comm_size, unsigned int table_size, unsigned int *dest_rank, unsigned int *index, unsigned int index_count) {
uint64_t tmp;
int char_hop = 9-index_count;
unsigned int i;
for (i = 0; i < index_count ; i++) {
tmp = 0;
memcpy(&tmp,(unsigned char *)&hash+i, char_hop);
index[i] = (unsigned int) (tmp % table_size);
}
*dest_rank = (unsigned int) (hash % comm_size);
}
/**
* set write flag to 1
*/
static void set_flag(char* flag_byte) {
*flag_byte = 0;
*flag_byte |= (1 << 0);
}
/**
* return 1 if write flag is set
* else 0
*/
static int read_flag(char flag_byte) {
if ((flag_byte & 0x01) == 0x01) {
return 1;
} else return 0;
}
/*
* allocating memory for DHT object and buckets.
* creating MPI window for OSC
* filling DHT object with passed by parameters, window, 2 counters for R/W errors and 2 pointers with allocated memory for further use
* return DHT object
*/
DHT* DHT_create(MPI_Comm comm, unsigned int size, int data_size, int key_size, uint64_t(*hash_func) (int, void*)) {
DHT *object;
MPI_Win window;
void* mem_alloc;
int comm_size, tmp;
tmp = (int) ceil(log2(size));
if (tmp%8 != 0) tmp = tmp + (8-(tmp%8));
object = (DHT*) malloc(sizeof(DHT));
if (object == NULL) return NULL;
//every memory allocation has 1 additional byte for flags etc.
if (MPI_Alloc_mem(size * (1 + data_size + key_size), MPI_INFO_NULL, &mem_alloc) != 0) return NULL;
if (MPI_Comm_size(comm, &comm_size) != 0) return NULL;
memset(mem_alloc, '\0', size * (1 + data_size + key_size));
if (MPI_Win_create(mem_alloc, size * (1 + data_size + key_size), (1 + data_size + key_size), MPI_INFO_NULL, comm, &window) != 0) return NULL;
object->data_size = data_size;
object->key_size = key_size;
object->table_size = size;
object->window = window;
object->hash_func = hash_func;
object->comm_size = comm_size;
object->communicator = comm;
object->read_misses = 0;
object->collisions = 0;
object->recv_entry = malloc(1 + data_size + key_size);
object->send_entry = malloc(1 + data_size + key_size);
object->index_count = 9-(tmp/8);
object->index = (unsigned int*) malloc((9-(tmp/8))*sizeof(int));
object->mem_alloc = mem_alloc;
DHT_stats *stats;
stats = (DHT_stats*) malloc(sizeof(DHT_stats));
if (stats == NULL) return NULL;
object->stats = stats;
object->stats->writes_local = (int*) calloc(comm_size, sizeof(int));
object->stats->old_writes = 0;
object->stats->read_misses = 0;
object->stats->collisions = 0;
object->stats->w_access = 0;
object->stats->r_access = 0;
return object;
}
/*
* puts passed by data with key to DHT
*
* returning DHT_MPI_ERROR = -1 if MPI error occurred
* else DHT_SUCCESS = 0 if success
*/
int DHT_write(DHT *table, void* send_key, void* send_data) {
unsigned int dest_rank, i;
int result = DHT_SUCCESS;
table->stats->w_access++;
//determine destination rank and index by hash of key
determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, table->table_size, &dest_rank, table->index, table->index_count);
//concatenating key with data to write entry to DHT
set_flag((char *) table->send_entry);
memcpy((char *) table->send_entry + 1, (char *) send_key, table->key_size);
memcpy((char *) table->send_entry + table->key_size + 1, (char *) send_data, table->data_size);
//locking window of target rank with exclusive lock
if (MPI_Win_lock(MPI_LOCK_EXCLUSIVE, dest_rank, 0, table->window) != 0)
return DHT_MPI_ERROR;
for (i = 0; i < table->index_count; i++)
{
if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
//increment collision counter if receiving key doesn't match sending key
//,entry has write flag + last index is reached
if (read_flag(*(char *)table->recv_entry)) {
if (memcmp(send_key, (char *) table->recv_entry + 1, table->key_size) != 0) {
if (i == (table->index_count)-1) {
table->collisions += 1;
table->stats->collisions += 1;
result = DHT_WRITE_SUCCESS_WITH_COLLISION;
break;
}
} else break;
} else {
table->stats->writes_local[dest_rank]++;
break;
}
}
//put data to DHT
if (MPI_Put(table->send_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
//unlock window of target rank
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return result;
}
/*
* gets data from the DHT by key
*
* return DHT_SUCCESS = 0 if success
* DHT_MPI_ERROR = -1 if MPI error occurred
* DHT_READ_ERROR = -2 if receiving key doesn't match sending key
*/
int DHT_read(DHT *table, void* send_key, void* destination) {
unsigned int dest_rank, i;
table->stats->r_access++;
//determine destination rank and index by hash of key
determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, table->table_size, &dest_rank, table->index, table->index_count);
//locking window of target rank with shared lock
if (MPI_Win_lock(MPI_LOCK_SHARED, dest_rank, 0, table->window) != 0) return DHT_MPI_ERROR;
//receive data
for (i = 0; i < table->index_count; i++) {
if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
//increment read error counter if write flag isn't set or key doesn't match passed by key + last index reached
//else copy data to dereference of passed by destination pointer
if ((read_flag(*(char *) table->recv_entry)) == 0) {
table->read_misses += 1;
table->stats->read_misses += 1;
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return DHT_READ_ERROR;
}
if (memcmp(((char*)table->recv_entry) + 1, send_key, table->key_size) != 0) {
if (i == (table->index_count)-1) {
table->read_misses += 1;
table->stats->read_misses += 1;
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
return DHT_READ_ERROR;
}
} else break;
}
//unlock window of target rank
if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
memcpy((char *) destination, (char *) table->recv_entry + table->key_size + 1, table->data_size);
return DHT_SUCCESS;
}
int DHT_to_file(DHT* table, char* filename) {
//open file
MPI_File file;
if (MPI_File_open(table->communicator, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &file) != 0) return DHT_FILE_IO_ERROR;
int rank;
MPI_Comm_rank(table->communicator, &rank);
//write header (key_size and data_size)
if (rank == 0) {
if (MPI_File_write(file, &table->key_size, 1, MPI_INT, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
if (MPI_File_write(file, &table->data_size, 1, MPI_INT, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
}
if (MPI_File_seek_shared(file, DHT_HEADER_SIZE, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
char* ptr;
int bucket_size = table->key_size + table->data_size + 1;
//iterate over local memory
for (unsigned int i = 0; i < table->table_size; i++) {
ptr = (char *) table->mem_alloc + (i * bucket_size);
//if bucket has been written to (checked by written_flag)...
if (read_flag(*ptr)) {
//write key and data to file
if (MPI_File_write_shared(file, ptr + 1, bucket_size - 1, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
}
}
//close file
if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR;
return DHT_SUCCESS;
}
int DHT_from_file(DHT* table, char* filename) {
MPI_File file;
MPI_Offset f_size;
int e_size, m_size, cur_pos, rank, offset;
char* buffer;
void* key;
void* data;
if (MPI_File_open(table->communicator, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &file) != 0) return DHT_FILE_IO_ERROR;
if (MPI_File_get_size(file, &f_size) != 0) return DHT_FILE_IO_ERROR;
MPI_Comm_rank(table->communicator, &rank);
e_size = table->key_size + table->data_size;
m_size = e_size > DHT_HEADER_SIZE ? e_size : DHT_HEADER_SIZE;
buffer = (char *) malloc(m_size);
if (MPI_File_read(file, buffer, DHT_HEADER_SIZE, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_READ_ERROR;
if (*(int *) buffer != table->key_size) return DHT_WRONG_FILE;
if (*(int *) (buffer + 4) != table->data_size) return DHT_WRONG_FILE;
offset = e_size*table->comm_size;
if (MPI_File_seek(file, DHT_HEADER_SIZE, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
cur_pos = DHT_HEADER_SIZE + (rank * e_size);
while(cur_pos < f_size) {
if (MPI_File_seek(file, cur_pos, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
MPI_Offset tmp;
MPI_File_get_position(file, &tmp);
if (MPI_File_read(file, buffer, e_size, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_READ_ERROR;
key = buffer;
data = (buffer+table->key_size);
if (DHT_write(table, key, data) == DHT_MPI_ERROR) return DHT_MPI_ERROR;
cur_pos += offset;
}
free (buffer);
if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR;
return DHT_SUCCESS;
}
/*
* frees up memory and accumulate counter
*/
int DHT_free(DHT* table, int* collision_counter, int* readerror_counter) {
int buf;
if (collision_counter != NULL) {
buf = 0;
if (MPI_Reduce(&table->collisions, &buf, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
*collision_counter = buf;
}
if (readerror_counter != NULL) {
buf = 0;
if (MPI_Reduce(&table->read_misses, &buf, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
*readerror_counter = buf;
}
if (MPI_Win_free(&(table->window)) != 0) return DHT_MPI_ERROR;
if (MPI_Free_mem(table->mem_alloc) != 0) return DHT_MPI_ERROR;
free(table->recv_entry);
free(table->send_entry);
free(table->index);
free(table->stats->writes_local);
free(table->stats);
free(table);
return DHT_SUCCESS;
}
/*
* prints a table with statistics about current use of DHT
* for each participating process and summed up results containing:
* 1. occupied buckets (in respect to the memory of this process)
* 2. free buckets (in respect to the memory of this process)
* 3. calls of DHT_write (w_access)
* 4. calls of DHT_read (r_access)
* 5. read misses (see DHT_READ_ERROR)
* 6. collisions (see DHT_WRITE_SUCCESS_WITH_COLLISION)
* 3-6 will reset with every call of this function
* finally the amount of new written entries is printed out (in relation to last call of this funtion)
*/
int DHT_print_statistics(DHT *table) {
int *written_buckets;
int *read_misses, sum_read_misses;
int *collisions, sum_collisions;
int sum_w_access, sum_r_access, *w_access, *r_access;
int rank;
MPI_Comm_rank(table->communicator, &rank);
//disable possible warning of unitialized variable, which is not the case
//since we're using MPI_Gather to obtain all values only on rank 0
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
//obtaining all values from all processes in the communicator
if (rank == 0) read_misses = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->read_misses, 1, MPI_INT, read_misses, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->read_misses, &sum_read_misses, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->read_misses = 0;
if (rank == 0) collisions = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->collisions, 1, MPI_INT, collisions, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->collisions, &sum_collisions, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->collisions = 0;
if (rank == 0) w_access = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->w_access, 1, MPI_INT, w_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->w_access, &sum_w_access, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->w_access = 0;
if (rank == 0) r_access = (int*) malloc(table->comm_size*sizeof(int));
if (MPI_Gather(&table->stats->r_access, 1, MPI_INT, r_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (MPI_Reduce(&table->stats->r_access, &sum_r_access, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
table->stats->r_access = 0;
if (rank == 0) written_buckets = (int*) calloc(table->comm_size, sizeof(int));
if (MPI_Reduce(table->stats->writes_local, written_buckets, table->comm_size, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
if (rank == 0) { //only process with rank 0 will print out results as a table
int sum_written_buckets = 0;
for (int i=0; i < table->comm_size; i++) {
sum_written_buckets += written_buckets[i];
}
int members = 7;
int padsize = (members*12)+1;
char pad[padsize+1];
memset(pad, '-', padsize*sizeof(char));
pad[padsize]= '\0';
printf("\n");
printf("%-35s||resets with every call of this function\n", " ");
printf("%-11s|%-11s|%-11s||%-11s|%-11s|%-11s|%-11s\n",
"rank",
"occupied",
"free",
"w_access",
"r_access",
"read misses",
"collisions");
printf("%s\n", pad);
for (int i = 0; i < table->comm_size; i++) {
printf("%-11d|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n",
i,
written_buckets[i],
table->table_size-written_buckets[i],
w_access[i],
r_access[i],
read_misses[i],
collisions[i]);
}
printf("%s\n", pad);
printf("%-11s|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n",
"sum",
sum_written_buckets,
(table->table_size*table->comm_size)-sum_written_buckets,
sum_w_access,
sum_r_access,
sum_read_misses,
sum_collisions);
printf("%s\n", pad);
printf("%s %d\n",
"new entries:",
sum_written_buckets - table->stats->old_writes);
printf("\n");
fflush(stdout);
table->stats->old_writes = sum_written_buckets;
}
//enable warning again
#pragma GCC diagnostic pop
MPI_Barrier(table->communicator);
return DHT_SUCCESS;
}

112
src/DHT.h Normal file
View File

@ -0,0 +1,112 @@
/*
* File: DHT.h
* Author: max luebke
*
* Created on 16. November 2017, 09:14
*/
#ifndef DHT_H
#define DHT_H
#include <mpi.h>
#include <stdint.h>
#define DHT_MPI_ERROR -1
#define DHT_READ_ERROR -2
#define DHT_SUCCESS 0
#define DHT_WRITE_SUCCESS_WITH_COLLISION 1
#define DHT_WRONG_FILE 11
#define DHT_FILE_IO_ERROR 12
#define DHT_FILE_READ_ERROR 13
#define DHT_FILE_WRITE_ERROR 14
#define DHT_HEADER_SIZE 8
typedef struct {;
int *writes_local, old_writes;
int read_misses, collisions;
int w_access, r_access;
} DHT_stats;
typedef struct {
MPI_Win window;
int data_size;
int key_size;
unsigned int table_size;
MPI_Comm communicator;
int comm_size;
uint64_t(*hash_func) (int, void*);
void* recv_entry;
void* send_entry;
void* mem_alloc;
int read_misses;
int collisions;
unsigned int *index;
unsigned int index_count;
DHT_stats *stats;
} DHT;
/*
* parameters:
* MPI_Comm comm - communicator of processes that are holding the DHT
* int size_per_process - number of buckets each process will create
* int data_size - size of data in bytes
* int key_size - size of key in bytes
* *hash_func - pointer to hashfunction
*
* return:
* NULL if error during initialization
* DHT* if success
*/
extern DHT* DHT_create(MPI_Comm comm, unsigned int size_per_process, int data_size, int key_size, uint64_t(*hash_func)(int, void*));
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
* void* data - pointer to data
* void* - pointer to key
*
* return:
* error value (see above)
*/
extern int DHT_write(DHT *table, void* key, void* data);
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
* void* key - pointer to key
* void* destination - pointer which will hold the resulting data from DHT
*
* return:
* error value (see above)
*/
extern int DHT_read(DHT *table, void* key, void* destination);
extern int DHT_to_file(DHT *table, char* filename);
extern int DHT_from_file(DHT *table, char* filename);
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
* int* collision_counter - pointer which will hold the total count of collisions
* int* readerror_counter - pointer which will hold the total count of read errors
*
* return:
* error value (see above)
*/
extern int DHT_free(DHT *table, int* collision_counter, int* readerror_counter);
/*
* parameters:
* DHT *table - DHT_object created by DHT_create
*
* return:
* error value (DHT_SUCCESS or DHT_MPI_ERROR)
*/
extern int DHT_print_statistics(DHT *table);
#endif /* DHT_H */

51
src/Makefile Normal file
View File

@ -0,0 +1,51 @@
## Simple Makefile for MPI use of RInside
## comment this out if you need a different version of R,
## and set R_HOME accordingly as an environment variable
R_HOME := $(shell R RHOME)
sources := $(wildcard *.cpp)
programs := kin
# OpenMPI header and libraries
MPICPPFLAGS := $(shell mpic++ -showme:compile)
MPILIBS := $(shell mpic++ -showme:link)
## include headers and libraries for R
RCPPFLAGS := $(shell $(R_HOME)/bin/R CMD config --cppflags)
RLDFLAGS := $(shell $(R_HOME)/bin/R CMD config --ldflags)
RBLAS := $(shell $(R_HOME)/bin/R CMD config BLAS_LIBS)
RLAPACK := $(shell $(R_HOME)/bin/R CMD config LAPACK_LIBS)
## include headers and libraries for Rcpp interface classes
## note that RCPPLIBS will be empty with Rcpp (>= 0.11.0) and can be omitted
RCPPINCL := $(shell echo 'Rcpp:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave)
RCPPLIBS := $(shell echo 'Rcpp:::LdFlags()' | $(R_HOME)/bin/R --vanilla --slave)
## include headers and libraries for RInside embedding classes
RINSIDEINCL := $(shell echo 'RInside:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave)
RINSIDELIBS := $(shell echo 'RInside:::LdFlags()' | $(R_HOME)/bin/R --vanilla --slave)
## compiler etc settings used in default make rules
CXX := $(shell $(R_HOME)/bin/R CMD config CXX)
CPPFLAGS := -D STRICT_R_HEADERS -Wall -ggdb -O3 $(shell $(R_HOME)/bin/R CMD config CPPFLAGS)
CXXFLAGS := $(MPICPPFLAGS) $(RCPPFLAGS) $(RCPPINCL) $(RINSIDEINCL) $(shell $(R_HOME)/bin/R CMD config CXXFLAGS)
LDLIBS := $(MPILIBS) $(RLDFLAGS) $(RBLAS) $(RLAPACK) $(RCPPLIBS) $(RINSIDELIBS) -lcrypto ## -fpermissive
default: all
kin : DHT.cpp worker.cpp dht_wrapper.cpp r_utils.cpp kin.cpp
all : $(programs)
@test -x /usr/bin/strip && strip $^
run : $(programs)
@test -x /usr/bin/mpirun && for p in $(programs); do echo; echo "Running $$p:"; mpirun -n 4 ./$$p; done
clean:
rm -vf $(programs)

214
src/RFun_Eval.R Normal file
View File

@ -0,0 +1,214 @@
## Simple library of functions to assess and visualize the results of the coupled simulations
## Time-stamp: "Last modified 2020-02-04 23:21:37 delucia"
require(RedModRphree)
require(Rmufits) ## essentially for PlotCartCellData
## function which reads all simulation results in a given directory
ReadRTSims <- function(dir) {
files_full <- list.files(dir, pattern="iter.*rds", full.names=TRUE)
files_name <- list.files(dir, pattern="iter.*rds", full.names=FALSE)
res <- lapply(files_full, readRDS)
names(res) <- gsub(".rds","",files_name, fixed=TRUE)
return(res)
}
## function which reads all successive DHT stored in a given directory
ReadAllDHT <- function(dir) {
files_full <- list.files(dir, pattern="iter.*dht", full.names=TRUE)
files_name <- list.files(dir, pattern="iter.*dht", full.names=FALSE)
res <- lapply(files_full, ReadDHT)
names(res) <- gsub(".rds","",files_name, fixed=TRUE)
return(res)
}
## function which reads one .dht file and gives a matrix
ReadDHT <- function(file)
{
conn <- file(file, "rb") ## open for reading in binary mode
if (!isSeekable(conn))
stop("Connection not seekable")
## we first reposition ourselves to the end of the file...
tmp <- seek(conn, where=0, origin = "end")
## ... and then back to the origin so to store the length in bytes
flen <- seek(conn, where=0, origin = "start")
## we read the first 2 integers (4 bytes each) containing dimensions in bytes
dims <- readBin(conn, what="integer", n=2)
## compute dimensions of the data
tots <- sum(dims)
ncol <- tots/8
nrow <- (flen - 8)/tots ## 8 here is 2*sizeof("int")
buff <- readBin(conn, what="double", n=ncol*nrow)
## close connection
close(conn)
res <- matrix(buff, nrow=nrow, ncol=ncol, byrow=TRUE)
return(res)
}
## Scatter plots of each variable in the iteration
PlotScatter <- function(sam1, sam2, which=NULL, labs=c("NO DHT", "DHT"), pch=".", cols=3, ...)
{
if ((!is.data.frame(sam1)) & ("T" %in% names(sam1)))
sam1 <- sam1$C
if ((!is.data.frame(sam2)) & ("T" %in% names(sam2)))
sam2 <- sam2$C
if (is.numeric(which))
inds <- which
else if (is.character(which))
inds <- match(which, colnames(sam1))
else if (is.null(which))
inds <- seq_along(colnames(sam1))
rows <- nrow(matrix(seq_along(inds), ncol=cols))
par(mfrow=c(rows, cols))
a <- lapply(inds, function(x) {
plot(sam1[,x], sam2[,x], main=colnames(sam1)[x], xlab=labs[1], ylab=labs[2], pch=pch, col="red", ...)
abline(0,1, col="grey", cex=1.5)
})
invisible()
}
##### Some metrics for relative comparison
## Using range as norm
RranRMSE <- function (pred, obs)
sqrt(mean((pred - obs)^2))/abs(max(pred) - min(pred))
## Using max val as norm
RmaxRMSE <- function (pred, obs)
sqrt(mean((pred - obs)^2)/abs(max(pred)))
## Using sd as norm
RsdRMSE <- function (pred, obs)
sqrt(mean((pred - obs)^2))/sd(pred)
## Using mean as norm
RmeanRMSE <- function (pred, obs)
sqrt(mean((pred - obs)^2))/mean(pred)
## Using mean as norm
RAEmax <- function (pred, obs)
mean(abs(pred - obs))/max(pred)
## Max absolute error
MAE <- function (pred, obs)
max(abs(pred - obs))
## workhorse function for ComputeErrors and its use with mapply
AppliedFun <- function(a, b, .fun) mapply(.fun, as.list(a$C), as.list(b$C))
## Compute the diffs between two simulation, iter by iter,
## with a given metric (passed in form of function name to this function)
ComputeErrors <- function(sim1, sim2, FUN=RMSE)
{
if (length(sim1)!= length(sim2)) {
cat("The simulations do not have the same length, subsetting to the shortest\n")
a <- min(length(sim1), length(sim2))
sim1 <- sim1[1:a]
sim2 <- sim2[1:a]
}
if (!is.function(match.fun(FUN))) {
cat("Invalid function\n")
}
t(mapply(AppliedFun, sim1, sim2, MoreArgs=list(.fun=FUN)))
}
## Function to display the error progress between 2 simulations
ErrorProgress <- function(mat, ignore, colors, metric, ...)
{
if (missing(colors))
colors <- sample(rainbow(ncol(mat)))
if (missing(metric))
metric <- "Metric"
## if the optional argument "ignore" (a character vector) is
## passed, we remove the matching column names
if (!missing(ignore)) {
to_remove <- match(ignore, colnames(mat))
mat <- mat[, -to_remove]
colors <- colors[-to_remove]
}
yc <- mat[nrow(mat),]
par(mar=c(5,4,2,6))
matplot(mat, type="l", lty=1, lwd=2, col=colors, xlab="iteration", ylab=metric, ...)
mtext(colnames(mat), side = 4, line = 2, outer = FALSE, at = yc, adj = 0.5, col = colors, las=2)
}
## Function which exports all simulations to ParaView's .vtu Requires
## package RcppVTK
ExportToParaview <- function(vtu, nameout, results){
require(RcppVTK)
n <- length(results)
vars <- colnames(results[[1]])
## strip eventually present ".vtu" from nameout
nameout <- sub(".vtu", "", nameout, fixed=TRUE)
namesteps <- paste0(nameout, ".", sprintf("%04d",seq(1,n)), ".vtu")
for (step in seq_along(results)) {
file.copy(from=vtu, to=namesteps[step], overwrite = TRUE)
cat(paste("Saving step ", step, " in file ", namesteps[step], "\n"))
ret <- ExportMatrixToVTU (fin=vtu, fout=namesteps[step], names=colnames(results[[step]]), mat=results[[step]])
}
invisible(ret)
}
## Version of Rmufits::PlotCartCellData with the ability to fix the
## "breaks" for color coding of 2D simulations
Plot2DCellData <- function (data, grid, nx, ny, contour = TRUE,
nlevels = 12, breaks, palette = "heat.colors",
rev.palette = TRUE, scale = TRUE, ...)
{
if (!missing(grid)) {
xc <- unique(sort(grid$cell$XCOORD))
yc <- unique(sort(grid$cell$YCOORD))
nx <- length(xc)
ny <- length(yc)
if (!length(data) == nx * ny)
stop("Wrong nx, ny or grid")
}
else {
xc <- seq(1, nx)
yc <- seq(1, ny)
}
z <- matrix(round(data, 6), ncol = nx, nrow = ny, byrow = TRUE)
pp <- t(z[rev(seq(1, nrow(z))), ])
if (missing(breaks)) {
breaks <- pretty(data, n = nlevels)
}
breakslen <- length(breaks)
colors <- do.call(palette, list(n = breakslen - 1))
if (rev.palette)
colors <- rev(colors)
if (scale) {
par(mfrow = c(1, 2))
nf <- layout(matrix(c(1, 2), 1, 2, byrow = TRUE), widths = c(4,
1))
}
par(las = 1, mar = c(5, 5, 3, 1))
image(xc, yc, pp, xlab = "X [m]", ylab = "Y[m]", las = 1,
asp = 1, breaks = breaks, col = colors, axes = FALSE,
...)
axis(1)
axis(2)
if (contour)
contour(unique(sort(xc)), unique(sort(yc)), pp, breaks = breaks,
add = TRUE)
if (scale) {
par(las = 1, mar = c(5, 1, 5, 5))
PlotImageScale(data, breaks = breaks, add.axis = FALSE,
axis.pos = 4, col = colors)
axis(4, at = breaks)
}
invisible(pp)
}

128
src/SimComp2D.R Normal file
View File

@ -0,0 +1,128 @@
## chemical database
db <- RPhreeFile("mdl_quint_kin.dat", is.db=TRUE)
phreeqc::phrLoadDatabaseString(db)
## only the directory
demodir <- system.file("extdata", "demo_rtwithmufits", package="Rmufits")
prop <- c("Al", "C","Ca","Cl","Fe", "K", "Mg","Na", "Si", "pH", ## "pe",
"Albite", "Calcite", "Chlorite", "Illite", "Kaolinite")
signif_vector <- c(7,7,7,7,7,7,7,7,7,6, 5,5,5,5,5)
prop_type <- rep("normal", length(signif_vector))
base <- c("SOLUTION 1",
"units mol/kgw",
"pH 6.77",
"temp 35",
"-water 1",
"Al 8.06386e-09",
"C 0.0006108294",
"Ca 0.09709463",
"Cl 4.340042",
"Fe 1.234357e-05",
"K 0.01117434",
"Mg 0.0406959",
"Na 4.189209",
"Si 0.0001935754",
"INCREMENTAL_REACTIONS true",
"KINETICS 1 ",
"-steps 86400",
"-bad_step_max 10000",
"-cvode true",
"Albite",
"-m 8.432165", ## 1540.0",
"-parms 01.54 100",
"Calcite",
"-m 0.0",
"-parms 10 100",
"Chlorite",
"-m 1.106585", ## 202.100",
"-parms 64.84 100",
"Illite",
"-m 0.9549153", ## 174.400",
"-parms 43.38 100",
"Kaolinite",
"-m 0.0",
"-parms 29.17 100",
"END")
selout <- c("KNOBS",
"-convergence_tolerance 1E-6",
"SELECTED_OUTPUT",
"-reset false",
"USER_PUNCH",
"-head Al C Ca Cl Fe K Mg Na Si pH Albite Calcite Chlorite Illite Kaolinite", ## pe
"10 PUNCH TOT(\"Al\"), TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Fe\"), TOT(\"K\"), TOT(\"Mg\"), TOT(\"Na\"), TOT(\"Si\"), -LA(\"H+\"), KIN(\"Albite\"), KIN(\"Calcite\"), KIN(\"Chlorite\"), KIN(\"Illite\"), KIN(\"Kaolinite\")" )
## Define initial conditions as equilibrium with primary minerals
ipr <- c(Al = 8.689e-10,
C = 0.0006108,
Ca = 0.09709,
Cl = 4.34,
Fe = 1.802e-06,
K = 0.01131,
Mg = 0.04074,
Na = 4.189,
Si = 7.653e-05,
pH = 6.889,
Albite = 5.0,
Calcite = 0.0,
Chlorite = 10.0,
Illite = 2.0,
Kaolinite = 0.0
)
initstate <- matrix(rep(ipr, 2500), byrow=TRUE, ncol=length(ipr))
colnames(initstate) <- names(ipr)
vecinj <- c(Al= 8.694e-10,
C = 8.182e-01,
Ca= 9.710e-02,
Cl= 4.340e+00,
Fe= 1.778e-06,
K = 1.131e-02,
Mg= 4.074e-02,
Na= 4.189e+00,
Si= 7.652e-05,
pH= 2.556228)
## setup boundary conditions for transport - we have already read the
## GRID with the following code:
## grid <- Rmufits::ReadGrid(paste0(demodir,"/d2ascii.run.GRID.SUM"))
## cbound <- which(grid$cell$ACTNUM == 2)
## dput(cbound)
cbound <- c(1L, 50L, 100L, 150L, 200L, 250L, 300L, 350L, 400L, 450L, 500L,
550L, 600L, 650L, 700L, 750L, 800L, 850L, 900L, 950L, 1000L,
1050L, 1100L, 1150L, 1200L, 1250L, 1300L, 1350L, 1400L, 1450L,
1500L, 1550L, 1600L, 1650L, 1700L, 1750L, 1800L, 1850L, 1900L,
1950L, 2000L, 2050L, 2100L, 2150L, 2200L, 2250L, 2300L, 2350L,
2400L, 2450L, 2451L, 2452L, 2453L, 2454L, 2455L, 2456L, 2457L,
2458L, 2459L, 2460L, 2461L, 2462L, 2463L, 2464L, 2465L, 2466L,
2467L, 2468L, 2469L, 2470L, 2471L, 2472L, 2473L, 2474L, 2475L,
2476L, 2477L, 2478L, 2479L, 2480L, 2481L, 2482L, 2483L, 2484L,
2485L, 2486L, 2487L, 2488L, 2489L, 2490L, 2491L, 2492L, 2493L,
2494L, 2495L, 2496L, 2497L, 2498L, 2499L, 2500L)
boundary_matrix <- matrix(rep(ipr[1:10], length(cbound)), byrow=TRUE, nrow=length(cbound))
colnames(boundary_matrix) <- names(ipr[1:10])
boundary_matrix[1, ] <- vecinj
boundary_matrix <- cbind(cbound,boundary_matrix)
setup <- list(n = 2500,
bound = boundary_matrix,
base = base,
first = selout,
initsim = initstate,
Cf = 1,
prop = prop,
immobile = seq(11,15),
kin = seq(11,15),
phase = "FLUX1",
density = "DEN1",
reduce = FALSE,
snapshots = demodir, ## directory where we will read MUFITS SUM files
gridfile = paste0(demodir,"/d2ascii.run.GRID.SUM")
)

131
src/SimCompKtz.R Normal file
View File

@ -0,0 +1,131 @@
db <- RPhreeFile("mdl_quint_kin.dat", is.db=TRUE)
phreeqc::phrLoadDatabaseString(db)
## only the directory
demodir <- "./snaps/"
base <- c("SOLUTION 1",
"units mol/kgw",
"pH 6.77",
"temp 35",
"-water 1",
"Al 8.06386e-09",
"C 0.0006108294",
"Ca 0.09709463",
"Cl 4.340042",
"Fe 1.234357e-05",
"K 0.01117434",
"Mg 0.0406959",
"Na 4.189209",
"Si 0.0001935754",
"INCREMENTAL_REACTIONS true",
"KINETICS 1 ",
"-steps 86400",
"-bad_step_max 10000",
"-cvode true",
"Albite",
"-m 8.432165", ## 1540.0",
"-parms 01.54 100",
"Calcite",
"-m 0.0",
"-parms 10 100",
"Chlorite",
"-m 1.106585", ## 202.100",
"-parms 64.84 100",
"Illite",
"-m 0.9549153", ## 174.400",
"-parms 43.38 100",
"Kaolinite",
"-m 0.0",
"-parms 29.17 100",
"END")
selout <- c("KNOBS",
"-convergence_tolerance 1E-6",
"SELECTED_OUTPUT",
"-reset false",
"USER_PUNCH",
"-head Al C Ca Cl Fe K Mg Na Si pH Albite Calcite Chlorite Illite Kaolinite", ## pe
"10 PUNCH TOT(\"Al\"), TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Fe\"), TOT(\"K\"), TOT(\"Mg\"), TOT(\"Na\"), TOT(\"Si\"), -LA(\"H+\"), KIN(\"Albite\"), KIN(\"Calcite\"), KIN(\"Chlorite\"), KIN(\"Illite\"), KIN(\"Kaolinite\")" )
## Define initial conditions as equilibrium with primary minerals
ipr <- c(Al = 8.689e-10,
C = 0.0006108,
Ca = 0.09709,
Cl = 4.34,
Fe = 1.802e-06,
K = 0.01131,
Mg = 0.04074,
Na = 4.189,
Si = 7.653e-05,
pH = 6.889,
Albite = 5.0,
Calcite = 0.0,
Chlorite = 10.0,
Illite = 2.0,
Kaolinite = 0.0
)
initstate <- matrix(rep(ipr, 648420), byrow=TRUE, ncol=length(ipr))
colnames(initstate) <- names(ipr)
vecinj <- c(Al= 8.694e-10,
C = 8.182e-01,
Ca= 9.710e-02,
Cl= 4.340e+00,
Fe= 1.778e-06,
K = 1.131e-02,
Mg= 4.074e-02,
Na= 4.189e+00,
Si= 7.652e-05,
pH= 2.556228)
prop <- c("Al", "C","Ca","Cl","Fe", "K", "Mg","Na", "Si", "pH", ## "pe",
"Albite", "Calcite", "Chlorite", "Illite", "Kaolinite")
bound_elm <- c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L,
67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L,
80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L,
93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L,
105L, 106L, 107L, 108L, 214L, 215L)
inj_elm <- c(7426L, 18233L, 29040L, 39847L,
50654L, 61461L, 72268L, 83075L, 93882L, 104689L, 115496L, 126303L,
137110L, 147917L, 158724L, 169531L, 180338L, 191145L, 201952L,
212759L, 223566L, 234373L, 245180L, 255987L, 266794L, 277601L,
288408L, 299215L, 310022L, 320829L, 331636L, 342443L, 353250L,
364057L, 374864L, 385671L, 396478L, 407285L, 418092L, 428899L,
439706L, 450513L, 461320L, 472127L, 482934L, 493741L, 504548L,
515355L)
cbound <- inj_elm
boundinit <- matrix(rep(vecinj, length(cbound)), ncol=length(vecinj), byrow=TRUE)
myboundmat <- cbind(cbound,boundinit)
## distinguish between injection and real boundaries
colnames(myboundmat) <- c("cbound", names(vecinj))
setup <- list(n=648420,
base=base,
bound=myboundmat,
first=selout,
initsim=initstate,
Cf=1,
prop=prop,
immobile=seq(11,15),
kin= seq(11,15),
phase="FLUX1",
density="DENS",
reduce=TRUE,
snapshots="snaps/AllSnaps_cmp_v3.rds",
gridfile ="snaps/GridKtz_cmp_v3.rds")

118
src/SimDol2D.R Normal file
View File

@ -0,0 +1,118 @@
## chemical database
db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat",
package="RedModRphree"), is.db=TRUE)
phreeqc::phrLoadDatabaseString(db)
## only the directory
demodir <- system.file("extdata", "demo_rtwithmufits", package="Rmufits")
prop <- c("C","Ca","Cl","Mg","pH","pe","O2g", "Calcite","Dolomite")
signif_vector <- c(7,7,7,7,7,7,7,5,5)
prop_type <- c("act","act","act","act","logact","logact","ignore","act","act")
base <- c("SOLUTION 1",
"units mol/kgw",
"temp 25.0",
"water 1",
"pH 9.91 charge",
"pe 4.0",
"C 1.2279E-04",
"Ca 1.2279E-04",
"Mg 0.001",
"Cl 0.002",
"PURE 1",
"O2g -0.1675 10",
"KINETICS 1",
"-steps 100",
"-step_divide 100",
"-bad_step_max 2000",
"Calcite", "-m 0.000207",
"-parms 0.0032",
"Dolomite",
"-m 0.0",
"-parms 0.00032",
"END")
selout <- c("SELECTED_OUTPUT", "-high_precision true", "-reset false",
"-time", "-soln", "-temperature true", "-water true",
"-pH", "-pe", "-totals C Ca Cl Mg",
"-kinetic_reactants Calcite Dolomite", "-equilibrium O2g")
initsim <- c("SELECTED_OUTPUT", "-high_precision true",
"-reset false",
"USER_PUNCH",
"-head C Ca Cl Mg pH pe O2g Calcite Dolomite",
"10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")",
"SOLUTION 1",
"units mol/kgw",
"temp 25.0", "water 1",
"pH 9.91 charge",
"pe 4.0",
"C 1.2279E-04",
"Ca 1.2279E-04",
"Cl 1E-12",
"Mg 1E-12",
"PURE 1",
"O2g -0.6788 10.0",
"Calcite 0.0 2.07E-3",
"Dolomite 0.0 0.0",
"END")
vecinj <- c("C"= 0,
"Ca" = 0,
"Cl" = 0.002,
"Mg" = 0.001,
"pe" = 4,
"pH" = 7)
init <- c("C(4)"= 1.2279E-4,
"Ca" =1.2279E-4,
"Cl" =0,
"Mg" =0,
"pe" =4,
"pH" =7,
"Calcite"= 2.07e-4,
"Dolomite"= 0)
## setup boundary conditions for transport - we have already read the
## GRID with the following code:
## grid <- Rmufits::ReadGrid(paste0(demodir,"/d2ascii.run.GRID.SUM"))
## cbound <- which(grid$cell$ACTNUM == 2)
## dput(cbound)
cbound <- c(1L, 50L, 100L, 150L, 200L, 250L, 300L, 350L, 400L, 450L, 500L,
550L, 600L, 650L, 700L, 750L, 800L, 850L, 900L, 950L, 1000L,
1050L, 1100L, 1150L, 1200L, 1250L, 1300L, 1350L, 1400L, 1450L,
1500L, 1550L, 1600L, 1650L, 1700L, 1750L, 1800L, 1850L, 1900L,
1950L, 2000L, 2050L, 2100L, 2150L, 2200L, 2250L, 2300L, 2350L,
2400L, 2450L, 2451L, 2452L, 2453L, 2454L, 2455L, 2456L, 2457L,
2458L, 2459L, 2460L, 2461L, 2462L, 2463L, 2464L, 2465L, 2466L,
2467L, 2468L, 2469L, 2470L, 2471L, 2472L, 2473L, 2474L, 2475L,
2476L, 2477L, 2478L, 2479L, 2480L, 2481L, 2482L, 2483L, 2484L,
2485L, 2486L, 2487L, 2488L, 2489L, 2490L, 2491L, 2492L, 2493L,
2494L, 2495L, 2496L, 2497L, 2498L, 2499L, 2500L)
boundinit <- matrix(rep(init[-c(7,8)], length(cbound)), byrow=TRUE, nrow=length(cbound))
myboundmat <- cbind(cbound,boundinit)
myboundmat[cbound==1, c(2:7)] <- vecinj
colnames(myboundmat) <- c("cbound", names(vecinj))
setup <- list(n=2500,
bound=myboundmat,
base=base,
first=selout,
initsim=initsim,
Cf=1,
prop=prop,
immobile=c(7,8,9),
kin= c(8,9),
ann=list(O2g=-0.1675),
phase="FLUX1",
density="DEN1",
reduce=FALSE,
snapshots=demodir, ## directory where we will read MUFITS SUM files
gridfile=paste0(demodir,"/d2ascii.run.GRID.SUM")
)

130
src/SimDolKtz.R Normal file
View File

@ -0,0 +1,130 @@
# library(RedModRphree)
# library(Rmufits)
# library(RcppVTK)
db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat",
package="RedModRphree"), is.db=TRUE)
phreeqc::phrLoadDatabaseString(db)
prop <- c("C","Ca","Cl","Mg","pH","pe","O2g", "Calcite","Dolomite")
signif_vector <- c(7,7,7,7,7,7,7,5,5)
prop_type <- c("act","act","act","act","logact","logact","ignore","act","act")
base <- c("SOLUTION 1",
"units mol/kgw",
"temp 25.0",
"water 1",
"pH 9.91 charge",
"pe 4.0",
"C 1.2279E-04",
"Ca 1.2279E-04",
"Mg 0.001",
"Cl 0.002",
"PURE 1",
"O2g -0.1675 10",
"KINETICS 1",
"-steps 100",
"-step_divide 100",
"-bad_step_max 2000",
"Calcite", "-m 0.000207",
"-parms 0.0032",
"Dolomite",
"-m 0.0",
"-parms 0.00032",
"END")
selout <- c("SELECTED_OUTPUT",
"-high_precision true",
"-reset false",
"-time",
"-soln",
"-temperature true",
"-water true",
"-pH",
"-pe",
"-totals C Ca Cl Mg",
"-kinetic_reactants Calcite Dolomite",
"-equilibrium O2g")
initsim <- c("SELECTED_OUTPUT",
"-high_precision true",
"-reset false",
"USER_PUNCH",
"-head C Ca Cl Mg pH pe O2g Calcite Dolomite",
"10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")",
"SOLUTION 1",
"units mol/kgw",
"temp 25.0", "water 1",
"pH 9.91 charge",
"pe 4.0",
"C 1.2279E-04",
"Ca 1.2279E-04",
"Cl 1E-12",
"Mg 1E-12",
"PURE 1",
"O2g -0.6788 10.0",
"Calcite 0.0 2.07E-3",
"Dolomite 0.0 0.0",
"END")
vecinj <- c("C"= 0,
"Ca" = 0,
"Cl" = 0.002,
"Mg" = 0.001,
"pe" = 4,
"pH" = 7)
init <- c("C(4)"= 1.2279E-4,
"Ca" =1.2279E-4,
"Cl" =0,
"Mg" =0,
"pe" =4,
"pH" =7,
"Calcite"= 2.07e-4,
"Dolomite"= 0)
bound_elm <- c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L,
67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L,
80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L,
93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L,
105L, 106L, 107L, 108L, 214L, 215L)
inj_elm <- c(7426L, 18233L, 29040L, 39847L,
50654L, 61461L, 72268L, 83075L, 93882L, 104689L, 115496L, 126303L,
137110L, 147917L, 158724L, 169531L, 180338L, 191145L, 201952L,
212759L, 223566L, 234373L, 245180L, 255987L, 266794L, 277601L,
288408L, 299215L, 310022L, 320829L, 331636L, 342443L, 353250L,
364057L, 374864L, 385671L, 396478L, 407285L, 418092L, 428899L,
439706L, 450513L, 461320L, 472127L, 482934L, 493741L, 504548L,
515355L)
cbound <- inj_elm
boundinit <- matrix(rep(vecinj, length(cbound)), ncol=length(vecinj), byrow=TRUE)
myboundmat <- cbind(cbound,boundinit)
## distinguish between injection and real boundaries
colnames(myboundmat) <- c("cbound", names(vecinj))
setup <- list(n=648420,
bound=myboundmat,
base=base,
first=selout,
initsim=initsim,
Cf=1,
prop=prop,
immobile=c(7,8,9),
kin= c(8,9),
ann=list(O2g=-0.1675),
phase="FLUX1",
density="DENS",
reduce=FALSE,
snapshots="snaps/AllSnaps_cmp_v3.rds",
gridfile ="snaps/GridKtz_cmp_v3.rds"
)

115
src/Test_Pro.R Normal file
View File

@ -0,0 +1,115 @@
## Example script for assessing simulation results with "RFun_Eval.R"
## Time-stamp: "Last modified 2020-02-05 03:36:05 delucia"
## Read all the needed functions
source("RFun_Eval.R")
## read the simulation results of a SimDol2D variant with setup$prolong
sd <- ReadRTSims("test_prol4")
simtimes <- sapply(sd, "[","simtime")
## we still need to read the grid to perform 2D visualization
demodir <- system.file("extdata", "demo_rtwithmufits", package="Rmufits")
grid <- ReadGrid(paste0(demodir,"/d2ascii.run.GRID.SUM"))
## fix the color scale
br <- seq(0, 0.0012, length=13)
## we want animation
library(animation)
## workhorse function to be used with package "animation"
PlotAn <- function(tot, prop, grid, breaks)
{
for (step in seq(1, length(tot))) {
snap <- tot[[step]]$C
time <- tot[[step]]$simtime/3600/24
ind <- match(prop, colnames(snap))
Plot2DCellData(snap[,ind], grid=grid, contour=FALSE, breaks=breaks, nlevels=length(breaks), scale=TRUE, main=paste0(prop," after ", time, "days"))
}
}
### simple animations ###
## As gif
saveGIF({
PlotAn(tot=sd, prop="Dolomite", grid=grid, breaks=br)
}, img.name = 'Dolomite_plot', movie.name="Dol2Dbreaks.gif",
interval = 0.5, nmax = length(sd))
## as HTML
saveHTML({
PlotAn(tot=sd, prop="Dolomite", grid=grid, breaks=br)
}, img.name = 'Dolomite_plot', htmlfile="dolomite.html",
interval = 0.5, nmax = length(sd))
## For inclusion in latex
saveLatex({
PlotAn(tot=sd, prop="Dolomite", grid=grid, breaks=br)
}, img.name = 'Dolomite_plot',
latex.filename = 'dolomite_prolong.tex',
interval = 0.5, nmax = length(sd))
sd <- ReadRTSims("test_prol4")
source("RFun_Eval.R")
library(RedModRphree)
library(Rmufits)
library(RcppVTK)
#### Evaluation of discrepancies of KtzDol_6p200_1_nodht_180 / KtzDol_6p200_1_dhtlog_180
dht <- ReadRTSims("KtzDol_6p200_1_dhtlog_180")
ref <- ReadRTSims("KtzDol_6p200_1_nodht_180")
rmse <- ComputeErrors(dht, ref, FUN=RMSE)
rae <- ComputeErrors(dht, ref, FUN=RAEmax)
mae <- ComputeErrors(dht, ref, FUN=MAE)
rel_max_rmse <- ComputeErrors(dht, ref, FUN=RmaxRMSE)
## Visualize the 2 computed relative errors
## start by defining a nice color palette using RColorBrewer
mycol <- RColorBrewer::brewer.pal(8, "Dark2")
## uncomment the next line to save this image to a pdf file
cairo_pdf("Ref_VS_DHT.pdf", height=4, width=12)
par(mfrow=c(1,2), family="serif")
ErrorProgress(rmse, colors=mycol, ignore=c("O2g", "pe", "Cl", "C"), log="y",
ylim=c(1E-12, 1E-3), las=1, main="Mean RMSE, reference vs. dht", metric="RMSE")
abline(h=10^-seq(11,2), col="grey", lwd=1, lty="dashed")
ErrorProgress(mae, colors=mycol, ignore=c("O2g", "pe", "Cl", "C"), log="y",
ylim=c(1E-11, 1E-2), las=1, main="Max Absolute Error, reference vs. dht", metric="MAE")
abline(h=10^-seq(10,3), col="grey", lwd=1, lty="dashed")
dev.off()
cairo_pdf("Scatter_Ref_VS_DHT.pdf", height=5, width=9)
par(family="serif")
PlotScatter(ref[[200]], dht[[200]],
which=c("Calcite", "Dolomite", "Ca", "Mg", "C", "pH"),
cols=3, labs=c("Reference", "DHT"), pch=".", cex=2)
dev.off()
time_ref <- readRDS("KtzDol_6p200_1_nodht_180/timings.rds")
time_dht <- readRDS("KtzDol_6p200_1_dhtlog_180/timings.rds")
## export to paraview
resk <- lapply(res, function(x) return(data.matrix(x$C)))
ExportToParaview("template_vtu_ketzinlarge.vtu", "KtzDol_6p200_1/paraview", results=resk)

92
src/Test_Validation.R Normal file
View File

@ -0,0 +1,92 @@
## Example script for assessing simulation results with "RFun_Eval.R"
## Time-stamp: "Last modified 2020-01-27 17:23:25 delucia"
## Read all the needed functions
source("RFun_Eval.R")
## Read all .rds in a given directory
trds <- ReadRTSims("TestDHT_Log")
## Read all .dht in a given directory
tdht <- ReadAllDHT("TestDHT_Log")
## control the content of a .dht
rc <- trds[[1]]$C
rt <- trds[[1]]$T
dht <- tdht[[1]][, 10:18]
## extract the unique rows of the first iteration (3 rows expected, as per dht)
rtu <- mgcv::uniquecombs(rt)
rcu <- mgcv::uniquecombs(rc)
all.equal(attr(rtu,"index"), attr(rcu,"index"))
colnames(dht) <- colnames(rcu)
## compare _Log _NoLog and reference
## First read all the simulations
rlog <- ReadRTSims("TestDHT_Log") ## all default values, np 4, with --dht-log
nlog <- ReadRTSims("TestDHT_NoLog") ## all default values, np 4, without --dht-log
ref <- ReadRTSims("TestNoDHT") ## all default values, np 4
## Compute absolute RMSE for each variable
rmse_lnl <- ComputeErrors(rlog, nlog, FUN=RMSE)
## Compute relative RMSE for each variable using sd as norm
rel_sd_rmse_lnl <- ComputeErrors(rlog, nlog, FUN=RsdRMSE)
## Compute relative RMSE for each variable using MAX as norm
rel_max_rmse_lnl <- ComputeErrors(rlog, nlog, FUN=RmaxRMSE)
## Visualize the 2 computed relative errors
## start by defining a nice color palette using RColorBrewer
mycol <- RColorBrewer::brewer.pal(8, "Dark2")
## uncomment the next line to save this image to a pdf file
## cairo_pdf("RelErr_Dol_dhtlog_vs_nolog.pdf", height=9, width=12)
par(mfrow=c(1,2))
ErrorProgress(rel_sd_rmse_lnl, colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs dht, norm=SD", metric="RMSE_SD")
## add a grid
abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
ErrorProgress(rel_max_rmse_lnl, colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs dht, norm=MAX", metric="RMSE_MAX")
abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
## uncomment the next line when saving to pdf
## dev.off()
## Visualize Scatter Plot of each variable
PlotScatter(rlog[[20]], nlog[[20]], labs=c("DHT LOG", "DHT"))
PlotScatter(rlog[[4]], nlog[[4]], labs=c("DHT LOG", "DHT"))
## Same as before but between dht-log and ref
rmse_logref <- ComputeErrors(rlog, ref, FUN=RMSE)
rel_sd_rmse_logref <- ComputeErrors(rlog, ref, FUN=RsdRMSE)
rel_max_rmse_logref <- ComputeErrors(rlog, ref, FUN=RmaxRMSE)
cairo_pdf("RelErr_Dol_ref_vs_dhtlog.pdf", height=9, width=12)
par(mfrow=c(1,2))
ErrorProgress(rel_sd_rmse_logref, colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs Ref, norm=SD", metric="RMSE_SD")
abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
ErrorProgress(rel_max_rmse_logref, colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs Ref, norm=MAX", metric="RMSE_MAX")
abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
dev.off()
x11(); PlotScatter(rlog[[20]]$C, ref[[20]]$C, labs=c("DHT log", "ref"))
rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RMSE)
rel_sd_rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RsdRMSE)
ErrorProgress(rel_sd_rmse_nlogref, log="y")
PlotScatter(nlog[[20]]$C, ref[[20]]$C, labs=c("DHT no log", "ref"))
## Same as before but between dht-nolog and ref
rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RMSE)
rel_sd_rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RsdRMSE)
rel_max_rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RmaxRMSE)
cairo_pdf("RelErr_Dol_ref_vs_dht.pdf", height=9, width=12)
par(mfrow=c(1,2))
ErrorProgress(rel_sd_rmse_nlogref, colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-nolog vs Ref, norm=SD", metric="RMSE_SD")
abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
ErrorProgress(rel_max_rmse_nlogref, colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-nolog vs Ref, norm=MAX", metric="RMSE_MAX")
abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
dev.off()

432
src/argh.h Normal file
View File

@ -0,0 +1,432 @@
#pragma once
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include <set>
#include <map>
#include <cassert>
namespace argh
{
// Terminology:
// A command line is composed of 2 types of args:
// 1. Positional args, i.e. free standing values
// 2. Options: args beginning with '-'. We identify two kinds:
// 2.1: Flags: boolean options => (exist ? true : false)
// 2.2: Parameters: a name followed by a non-option value
#if !defined(__GNUC__) || (__GNUC__ >= 5)
using string_stream = std::istringstream;
#else
// Until GCC 5, istringstream did not have a move constructor.
// stringstream_proxy is used instead, as a workaround.
class stringstream_proxy
{
public:
stringstream_proxy() = default;
// Construct with a value.
stringstream_proxy(std::string const& value) :
stream_(value)
{}
// Copy constructor.
stringstream_proxy(const stringstream_proxy& other) :
stream_(other.stream_.str())
{
stream_.setstate(other.stream_.rdstate());
}
void setstate(std::ios_base::iostate state) { stream_.setstate(state); }
// Stream out the value of the parameter.
// If the conversion was not possible, the stream will enter the fail state,
// and operator bool will return false.
template<typename T>
stringstream_proxy& operator >> (T& thing)
{
stream_ >> thing;
return *this;
}
// Get the string value.
std::string str() const { return stream_.str(); }
std::stringbuf* rdbuf() const { return stream_.rdbuf(); }
// Check the state of the stream.
// False when the most recent stream operation failed
operator bool() const { return !!stream_; }
~stringstream_proxy() = default;
private:
std::istringstream stream_;
};
using string_stream = stringstream_proxy;
#endif
class parser
{
public:
enum Mode { PREFER_FLAG_FOR_UNREG_OPTION = 1 << 0,
PREFER_PARAM_FOR_UNREG_OPTION = 1 << 1,
NO_SPLIT_ON_EQUALSIGN = 1 << 2,
SINGLE_DASH_IS_MULTIFLAG = 1 << 3,
};
parser() = default;
parser(std::initializer_list<char const* const> pre_reg_names)
{ add_params(pre_reg_names); }
parser(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
{ parse(argv, mode); }
parser(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
{ parse(argc, argv, mode); }
void add_param(std::string const& name);
void add_params(std::initializer_list<char const* const> init_list);
void parse(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
void parse(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
std::multiset<std::string> const& flags() const { return flags_; }
std::map<std::string, std::string> const& params() const { return params_; }
std::vector<std::string> const& pos_args() const { return pos_args_; }
// begin() and end() for using range-for over positional args.
std::vector<std::string>::const_iterator begin() const { return pos_args_.cbegin(); }
std::vector<std::string>::const_iterator end() const { return pos_args_.cend(); }
size_t size() const { return pos_args_.size(); }
//////////////////////////////////////////////////////////////////////////
// Accessors
// flag (boolean) accessors: return true if the flag appeared, otherwise false.
bool operator[](std::string const& name) const;
// multiple flag (boolean) accessors: return true if at least one of the flag appeared, otherwise false.
bool operator[](std::initializer_list<char const* const> init_list) const;
// returns positional arg string by order. Like argv[] but without the options
std::string const& operator[](size_t ind) const;
// returns a std::istream that can be used to convert a positional arg to a typed value.
string_stream operator()(size_t ind) const;
// same as above, but with a default value in case the arg is missing (index out of range).
template<typename T>
string_stream operator()(size_t ind, T&& def_val) const;
// parameter accessors, give a name get an std::istream that can be used to convert to a typed value.
// call .str() on result to get as string
string_stream operator()(std::string const& name) const;
// accessor for a parameter with multiple names, give a list of names, get an std::istream that can be used to convert to a typed value.
// call .str() on result to get as string
// returns the first value in the list to be found.
string_stream operator()(std::initializer_list<char const* const> init_list) const;
// same as above, but with a default value in case the param was missing.
// Non-string def_val types must have an operator<<() (output stream operator)
// If T only has an input stream operator, pass the string version of the type as in "3" instead of 3.
template<typename T>
string_stream operator()(std::string const& name, T&& def_val) const;
// same as above but for a list of names. returns the first value to be found.
template<typename T>
string_stream operator()(std::initializer_list<char const* const> init_list, T&& def_val) const;
private:
string_stream bad_stream() const;
std::string trim_leading_dashes(std::string const& name) const;
bool is_number(std::string const& arg) const;
bool is_option(std::string const& arg) const;
bool got_flag(std::string const& name) const;
bool is_param(std::string const& name) const;
private:
std::vector<std::string> args_;
std::map<std::string, std::string> params_;
std::vector<std::string> pos_args_;
std::multiset<std::string> flags_;
std::set<std::string> registeredParams_;
std::string empty_;
};
//////////////////////////////////////////////////////////////////////////
inline void parser::parse(const char * const argv[], int mode)
{
int argc = 0;
for (auto argvp = argv; *argvp; ++argc, ++argvp);
parse(argc, argv, mode);
}
//////////////////////////////////////////////////////////////////////////
inline void parser::parse(int argc, const char* const argv[], int mode /*= PREFER_FLAG_FOR_UNREG_OPTION*/)
{
// convert to strings
args_.resize(argc);
std::transform(argv, argv + argc, args_.begin(), [](const char* const arg) { return arg; });
// parse line
for (auto i = 0u; i < args_.size(); ++i)
{
if (!is_option(args_[i]))
{
pos_args_.emplace_back(args_[i]);
continue;
}
auto name = trim_leading_dashes(args_[i]);
if (!(mode & NO_SPLIT_ON_EQUALSIGN))
{
auto equalPos = name.find('=');
if (equalPos != std::string::npos)
{
params_.insert({ name.substr(0, equalPos), name.substr(equalPos + 1) });
continue;
}
}
// if the option is unregistered and should be a multi-flag
if (1 == (args_[i].size() - name.size()) && // single dash
argh::parser::SINGLE_DASH_IS_MULTIFLAG & mode && // multi-flag mode
!is_param(name)) // unregistered
{
std::string keep_param;
if (!name.empty() && is_param(std::string(1ul, name.back()))) // last char is param
{
keep_param += name.back();
name.resize(name.size() - 1);
}
for (auto const& c : name)
{
flags_.emplace(std::string{ c });
}
if (!keep_param.empty())
{
name = keep_param;
}
else
{
continue; // do not consider other options for this arg
}
}
// any potential option will get as its value the next arg, unless that arg is an option too
// in that case it will be determined a flag.
if (i == args_.size() - 1 || is_option(args_[i + 1]))
{
flags_.emplace(name);
continue;
}
// if 'name' is a pre-registered option, then the next arg cannot be a free parameter to it is skipped
// otherwise we have 2 modes:
// PREFER_FLAG_FOR_UNREG_OPTION: a non-registered 'name' is determined a flag.
// The following value (the next arg) will be a free parameter.
//
// PREFER_PARAM_FOR_UNREG_OPTION: a non-registered 'name' is determined a parameter, the next arg
// will be the value of that option.
assert(!(mode & argh::parser::PREFER_FLAG_FOR_UNREG_OPTION)
|| !(mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION));
bool preferParam = mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION;
if (is_param(name) || preferParam)
{
params_.insert({ name, args_[i + 1] });
++i; // skip next value, it is not a free parameter
continue;
}
else
{
flags_.emplace(name);
}
};
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::bad_stream() const
{
string_stream bad;
bad.setstate(std::ios_base::failbit);
return bad;
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::is_number(std::string const& arg) const
{
// inefficient but simple way to determine if a string is a number (which can start with a '-')
std::istringstream istr(arg);
double number;
istr >> number;
return !(istr.fail() || istr.bad());
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::is_option(std::string const& arg) const
{
assert(0 != arg.size());
if (is_number(arg))
return false;
return '-' == arg[0];
}
//////////////////////////////////////////////////////////////////////////
inline std::string parser::trim_leading_dashes(std::string const& name) const
{
auto pos = name.find_first_not_of('-');
return std::string::npos != pos ? name.substr(pos) : name;
}
//////////////////////////////////////////////////////////////////////////
inline bool argh::parser::got_flag(std::string const& name) const
{
return flags_.end() != flags_.find(trim_leading_dashes(name));
}
//////////////////////////////////////////////////////////////////////////
inline bool argh::parser::is_param(std::string const& name) const
{
return registeredParams_.count(name);
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::operator[](std::string const& name) const
{
return got_flag(name);
}
//////////////////////////////////////////////////////////////////////////
inline bool parser::operator[](std::initializer_list<char const* const> init_list) const
{
return std::any_of(init_list.begin(), init_list.end(), [&](char const* const name) { return got_flag(name); });
}
//////////////////////////////////////////////////////////////////////////
inline std::string const& parser::operator[](size_t ind) const
{
if (ind < pos_args_.size())
return pos_args_[ind];
return empty_;
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::operator()(std::string const& name) const
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
return bad_stream();
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::operator()(std::initializer_list<char const* const> init_list) const
{
for (auto& name : init_list)
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
}
return bad_stream();
}
//////////////////////////////////////////////////////////////////////////
template<typename T>
string_stream parser::operator()(std::string const& name, T&& def_val) const
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
std::ostringstream ostr;
ostr << def_val;
return string_stream(ostr.str()); // use default
}
//////////////////////////////////////////////////////////////////////////
// same as above but for a list of names. returns the first value to be found.
template<typename T>
string_stream parser::operator()(std::initializer_list<char const* const> init_list, T&& def_val) const
{
for (auto& name : init_list)
{
auto optIt = params_.find(trim_leading_dashes(name));
if (params_.end() != optIt)
return string_stream(optIt->second);
}
std::ostringstream ostr;
ostr << def_val;
return string_stream(ostr.str()); // use default
}
//////////////////////////////////////////////////////////////////////////
inline string_stream parser::operator()(size_t ind) const
{
if (pos_args_.size() <= ind)
return bad_stream();
return string_stream(pos_args_[ind]);
}
//////////////////////////////////////////////////////////////////////////
template<typename T>
string_stream parser::operator()(size_t ind, T&& def_val) const
{
if (pos_args_.size() <= ind)
{
std::ostringstream ostr;
ostr << def_val;
return string_stream(ostr.str());
}
return string_stream(pos_args_[ind]);
}
//////////////////////////////////////////////////////////////////////////
inline void parser::add_param(std::string const& name)
{
registeredParams_.insert(trim_leading_dashes(name));
}
//////////////////////////////////////////////////////////////////////////
inline void parser::add_params(std::initializer_list<char const* const> init_list)
{
for (auto& name : init_list)
registeredParams_.insert(trim_leading_dashes(name));
}
}

175
src/dht_wrapper.cpp Normal file
View File

@ -0,0 +1,175 @@
#include "dht_wrapper.h"
#include <openssl/md5.h>
/*init globals*/
bool dht_enabled;
int dht_snaps;
int dht_strategy;
int dht_significant_digits;
std::string dht_file;
std::vector<int> dht_significant_digits_vector;
std::vector<string> prop_type_vector;
bool dht_logarithm;
uint64_t dht_size_per_process;
uint64_t dht_hits, dht_miss, dht_collision;
RInside *R_DHT;
std::vector<bool> dht_flags;
DHT *dht_object;
double *fuzzing_buffer;
bool dt_differ;
/*functions*/
uint64_t get_md5(int key_size, void *key) {
MD5_CTX ctx;
unsigned char sum[MD5_DIGEST_LENGTH];
uint64_t retval, *v1, *v2;
MD5_Init(&ctx);
MD5_Update(&ctx, key, key_size);
MD5_Final(sum, &ctx);
v1 = (uint64_t *)&sum[0];
v2 = (uint64_t *)&sum[8];
retval = *v1 ^ *v2;
return retval;
}
double Round_off(RInside &R, double N, double n) {
double result;
R["roundsig"] = n;
R["roundin"] = N;
result = R.parseEval("signif(roundin, digits=roundsig)");
return result;
}
/*
* Stores fuzzed version of key in fuzzing_buffer
*/
void fuzz_for_dht(RInside &R, int var_count, void *key, double dt) {
unsigned int i = 0;
//introduce fuzzing to allow more hits in DHT
for (i = 0; i < (unsigned int)var_count; i++) {
if (prop_type_vector[i] == "act") {
//with log10
if (dht_logarithm) {
if (((double *)key)[i] < 0)
cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Negative value at key!" << endl;
else if (((double *)key)[i] == 0)
fuzzing_buffer[i] = 0;
else
//fuzzing_buffer[i] = Round_off(R, std::log10(((double *)key)[i]), dht_significant_digits_vector[i] - 1);
fuzzing_buffer[i] = ROUND(-(std::log10(((double *)key)[i])), dht_significant_digits_vector[i]);
} else {
//without log10
//fuzzing_buffer[i] = Round_off(R, ((double *)key)[i], dht_significant_digits_vector[i]);
fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_significant_digits_vector[i]);
}
} else if (prop_type_vector[i] == "logact") {
//fuzzing_buffer[i] = Round_off(R, ((double *)key)[i], dht_significant_digits_vector[i]);
fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_significant_digits_vector[i]);
} else if (prop_type_vector[i] == "ignore") {
fuzzing_buffer[i] = 0;
} else {
cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Probably wrong prop_type!" << endl;
}
}
if (dt_differ)
fuzzing_buffer[var_count] = dt;
}
void check_dht(RInside &R, int length, std::vector<bool> &out_result_index, double *work_package) {
void *key;
int res;
int var_count = prop_type_vector.size();
double dt;
dt = R.parseEval("mysetup$dt");
for (int i = 0; i < length; i++) {
key = (void *)&(work_package[i * var_count]);
//fuzz data (round, logarithm etc.)
fuzz_for_dht(R, var_count, key, dt);
//overwrite input with data from DHT, IF value is found in DHT
res = DHT_read(dht_object, fuzzing_buffer, key);
if (res == DHT_SUCCESS) {
//flag that this line is replaced by DHT-value, do not simulate!!
out_result_index[i] = false;
dht_hits++;
} else if (res == DHT_READ_ERROR) {
//this line is untouched, simulation is needed
out_result_index[i] = true;
dht_miss++;
} else {
//MPI ERROR ... WHAT TO DO NOW?
//RUNNING CIRCLES WHILE SCREAMING
}
}
}
void fill_dht(RInside &R, int length, std::vector<bool> &result_index, double *work_package, double *results) {
void *key;
void *data;
int res;
int var_count = prop_type_vector.size();
double dt;
dt = R.parseEval("mysetup$dt");
for (int i = 0; i < length; i++) {
key = (void *)&(work_package[i * var_count]);
data = (void *)&(results[i * var_count]);
if (result_index[i]) {
//If true -> was simulated, needs to be inserted into dht
//fuzz data (round, logarithm etc.)
fuzz_for_dht(R, var_count, key, dt);
res = DHT_write(dht_object, fuzzing_buffer, data);
if (res != DHT_SUCCESS) {
if (res == DHT_WRITE_SUCCESS_WITH_COLLISION) {
dht_collision++;
} else {
//MPI ERROR ... WHAT TO DO NOW?
//RUNNING CIRCLES WHILE SCREAMING
}
}
}
}
}
void print_statistics() {
int res;
res = DHT_print_statistics(dht_object);
if (res != DHT_SUCCESS) {
//MPI ERROR ... WHAT TO DO NOW?
//RUNNING CIRCLES WHILE SCREAMING
}
}
int table_to_file(char *filename) {
int res = DHT_to_file(dht_object, filename);
return res;
}
int file_to_table(char *filename) {
int res = DHT_from_file(dht_object, filename);
if (res != DHT_SUCCESS)
return res;
DHT_print_statistics(dht_object);
return DHT_SUCCESS;
}

50
src/dht_wrapper.h Normal file
View File

@ -0,0 +1,50 @@
#pragma once
#include <RInside.h>
#include <string>
#include <vector>
#include <math.h>
#include "DHT.h"
using namespace std;
using namespace Rcpp;
/*Functions*/
uint64_t get_md5(int key_size, void* key);
void fuzz_for_dht(RInside &R, int var_count, void *key, double dt);
void check_dht(RInside &R, int length, std::vector<bool> &out_result_index, double *work_package);
void fill_dht(RInside &R, int length, std::vector<bool> &result_index, double *work_package, double *results);
void print_statistics();
int table_to_file(char* filename);
int file_to_table(char* filename);
/*globals*/
extern bool dht_enabled;
extern int dht_snaps;
extern std::string dht_file;
extern bool dt_differ;
//Default DHT Size per process in Byte (defaults to 1 GiB)
#define DHT_SIZE_PER_PROCESS 1073741824
//sets default dht access and distribution strategy
#define DHT_STRATEGY 0
// 0 -> DHT is on workers, access from workers only
// 1 -> DHT is on workers + master, access from master only !NOT IMPLEMENTED YET!
#define ROUND(value,signif) (((int) (pow(10.0, (double) signif) * value)) * pow(10.0, (double) -signif))
extern int dht_strategy;
extern int dht_significant_digits;
extern std::vector<int> dht_significant_digits_vector;
extern std::vector<string> prop_type_vector;
extern bool dht_logarithm;
extern uint64_t dht_size_per_process;
//global DHT object, can be NULL if not initialized, check strategy
extern DHT* dht_object;
//DHT Performance counter
extern uint64_t dht_hits, dht_miss, dht_collision;
extern double* fuzzing_buffer;
extern std::vector<bool> dht_flags;

9
src/global_buffer.h Normal file
View File

@ -0,0 +1,9 @@
#pragma once
#define BUFFER_OFFSET 5
/*Globals*/
extern double* mpi_buffer;
extern double* mpi_buffer_results;
extern uint32_t work_package_size;

818
src/kin.cpp Normal file
View File

@ -0,0 +1,818 @@
#include <string>
#include <vector>
#include <iostream>
#include <cstring>
#include <RInside.h> // for the embedded R via RInside
#include <mpi.h> // mpi header file
#include "argh.h" // Argument handler https://github.com/adishavit/argh BSD-licenced
#include "DHT.h" // MPI-DHT Implementation
#include "worker.h"
#include "r_utils.h"
#include "dht_wrapper.h"
#include "global_buffer.h"
using namespace std;
using namespace Rcpp;
double* mpi_buffer;
double* mpi_buffer_results;
uint32_t work_package_size;
#define WORK_PACKAGE_SIZE_DEFAULT 5
bool store_result;
std::set<std::string> paramList() {
std::set<std::string> options;
//global
options.insert("work-package-size");
//only DHT
options.insert("dht-signif");
options.insert("dht-strategy");
options.insert("dht-size");
options.insert("dht-snaps");
options.insert("dht-file");
return options;
}
std::set<std::string> flagList() {
std::set<std::string> options;
//global
options.insert("ignore-result");
//only DHT
options.insert("dht");
options.insert("dht-log");
return options;
}
std::list<std::string> checkOptions(argh::parser cmdl) {
std::list<std::string> retList;
std::set<std::string> flist = flagList();
std::set<std::string> plist = paramList();
for (auto& flag: cmdl.flags()) {
if (!(flist.find(flag) != flist.end())) retList.push_back(flag);
}
for (auto& param: cmdl.params()) {
if (!(plist.find(param.first) != plist.end())) retList.push_back(param.first);
}
return retList;
}
typedef struct
{
char has_work;
double* send_addr;
} worker_struct;
int main(int argc, char *argv[]) {
double sim_start, sim_b_transport, sim_a_transport, sim_b_chemistry, sim_a_chemistry,
sim_end;
double cummul_transport = 0.f;
double cummul_chemistry = 0.f;
double cummul_workers = 0.f;
double cummul_chemistry_master = 0.f;
double cummul_master_seq_pre_loop = 0.f;
double cummul_master_seq_loop = 0.f;
double master_idle = 0.f;
double master_send_a, master_send_b;
double cummul_master_send = 0.f;
double master_recv_a, master_recv_b;
double cummul_master_recv = 0.f;
double sim_a_seq, sim_b_seq, sim_c_seq, sim_d_seq;
double idle_a, idle_b;
double sim_c_chemistry, sim_d_chemistry;
double sim_e_chemistry, sim_f_chemistry;
argh::parser cmdl(argv);
// cout << "CPP: Start Init (MPI)" << endl;
MPI_Init( &argc, &argv );
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
/*Create custom Communicator with all processes except 0 (the master) for DHT storage*/
//only needed if strategy == 0, but done anyway
MPI_Group group_world;
MPI_Group dht_group;
MPI_Comm dht_comm;
int* process_ranks;
// make a list of processes in the new communicator
process_ranks= (int*) malloc(world_size*sizeof(int));
for(int I = 1; I < world_size; I++)
process_ranks[I-1] = I;
//get the group under MPI_COMM_WORLD
MPI_Comm_group(MPI_COMM_WORLD, &group_world);
//create the new group
MPI_Group_incl(group_world, world_size-1, process_ranks, &dht_group);
// create the new communicator
MPI_Comm_create(MPI_COMM_WORLD, dht_group, &dht_comm);
free (process_ranks); //cleanup
// cout << "Done";
if (cmdl[{"help", "h"}]) {
if (world_rank == 0) {
cout << "Todo" << endl <<
"See README.md for further information." << endl;
}
MPI_Finalize();
return EXIT_SUCCESS;
}
/*INIT is now done separately in an R file provided here as argument!*/
if (!cmdl(2)) {
if (world_rank == 0) {
cerr << "ERROR. Kin needs 2 positional arguments: " << endl <<
"1) the R script defining your simulation and" << endl <<
"2) the directory prefix where to save results and profiling" << endl;
}
MPI_Finalize();
return EXIT_FAILURE;
}
std::list<std::string> optionsError = checkOptions(cmdl);
if (!optionsError.empty()) {
if (world_rank == 0) {
cerr << "Unrecognized option(s):\n" << endl;
for (auto option: optionsError) {
cerr << option << endl;
}
cerr << "\nMake sure to use available options. Exiting!" << endl;
}
MPI_Finalize();
return EXIT_FAILURE;
}
/*Parse DHT arguments*/
dht_enabled = cmdl["dht"];
// cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n';
if (dht_enabled) {
cmdl("dht-strategy", 0) >> dht_strategy;
// cout << "CPP: DHT strategy is " << dht_strategy << endl;
cmdl("dht-signif", 5) >> dht_significant_digits;
// cout << "CPP: DHT significant digits = " << dht_significant_digits << endl;
dht_logarithm = cmdl["dht-log"];
// cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" : "OFF" ) << endl;
cmdl("dht-size", DHT_SIZE_PER_PROCESS) >> dht_size_per_process;
// cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << endl;
cmdl("dht-snaps", 0) >> dht_snaps;
cmdl("dht-file") >> dht_file;
}
/*Parse work package size*/
cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> work_package_size;
/*Parse output options*/
store_result = !cmdl["ignore-result"];
if (world_rank==0) {
cout << "CPP: Complete results storage is " << ( store_result ? "ON" : "OFF" ) << endl;
cout << "CPP: Work Package Size: " << work_package_size << endl;
cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n';
if (dht_enabled) {
cout << "CPP: DHT strategy is " << dht_strategy << endl;
cout << "CPP: DHT key default digits (ignored if 'signif_vector' is defined) = " << dht_significant_digits << endl;
cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" : "OFF" ) << endl;
cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << endl;
cout << "CPP: DHT save snapshots is " << dht_snaps << endl;
cout << "CPP: DHT load file is " << dht_file << endl;
}
}
cout << "CPP: R Init (RInside) on process " << world_rank << endl;
RInside R(argc, argv);
// if local_rank == 0 then master else worker
R["local_rank"] = world_rank;
/*Loading Dependencies*/
std::string r_load_dependencies =
"suppressMessages(library(Rmufits));"
"suppressMessages(library(RedModRphree));"
"source('kin_r_library.R');"
"source('parallel_r_library.R');";
R.parseEvalQ(r_load_dependencies);
std::string filesim;
cmdl(1) >> filesim; // <- first positional argument
R["filesim"] = wrap(filesim); // assign a char* (string) to 'filesim'
R.parseEvalQ("source(filesim)"); // eval the init string, ignoring any returns
std::string out_dir;
if (world_rank == 0) { // only rank 0 initializes goes through the whole initialization
cmdl(2) >> out_dir; // <- second positional argument
R["fileout"] = wrap(out_dir); // assign a char* (string) to 'fileout'
// Note: R::sim_init() checks if the directory already exists,
// if not it makes it
// pass the boolean "store_result" to the R process
R["store_result"] = store_result;
//get timestep vector from grid_init function ...
std::string master_init_code = "mysetup <- master_init(setup=setup)";
R.parseEval(master_init_code);
dt_differ = R.parseEval("mysetup$dt_differ");
MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
} else { // workers will only read the setup DataFrame defined by input file
R.parseEval("mysetup <- setup");
MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
}
if (world_rank==0) {
cout << "CPP: R init done on process with rank " << world_rank << endl;
}
//initialize chemistry on all processes
std::string init_chemistry_code = "mysetup <- init_chemistry(setup=mysetup)";
R.parseEval(init_chemistry_code);
/* Retrieve state_C from R context for MPI buffer generation */
Rcpp::DataFrame state_C = R.parseEval("mysetup$state_C");
/* Init Parallel helper functions */
R["n_procs"] = world_size-1; /* worker count */
R["work_package_size"] = work_package_size;
// Removed additional field for ID in previous versions
if (world_rank == 0)
{
mpi_buffer = (double*) calloc(state_C.nrow() * (state_C.ncol()), sizeof(double));
} else
{
mpi_buffer = (double*) calloc((work_package_size * (state_C.ncol())) + BUFFER_OFFSET, sizeof(double));
mpi_buffer_results = (double*) calloc(work_package_size * (state_C.ncol()), sizeof(double));
}
if (world_rank==0) {
cout << "CPP: parallel init completed (buffers allocated)!" << endl;
}
// MDL: pass to R the DHT stuff (basically, only for storing of
// simulation parameters). These 2 variables are always defined:
R["dht_enabled"] = dht_enabled;
R["dht_log"] = dht_logarithm;
if (dht_enabled)
{
//cout << "\nCreating DHT\n";
//determine size of dht entries
int dht_data_size = state_C.ncol() * sizeof(double);
int dht_key_size = state_C.ncol() * sizeof(double) + (dt_differ * sizeof(double));
//determine bucket count for preset memory usage
//bucket size is key + value + 1 byte for status
int dht_buckets_per_process = dht_size_per_process / (1 + dht_data_size + dht_key_size);
// MDL : following code moved here from worker.cpp
/*Load significance vector from R setup file (or set default)*/
bool signif_vector_exists = R.parseEval("exists('signif_vector')");
if (signif_vector_exists)
{
dht_significant_digits_vector = as<std::vector<int>>(R["signif_vector"]);
} else
{
dht_significant_digits_vector.assign(dht_object->key_size / sizeof(double), dht_significant_digits);
}
/*Load property type vector from R setup file (or set default)*/
bool prop_type_vector_exists = R.parseEval("exists('prop_type')");
if (prop_type_vector_exists)
{
prop_type_vector = as<std::vector<string>>(R["prop_type"]);
} else
{
prop_type_vector.assign(dht_object->key_size / sizeof(double), "act");
}
if(world_rank == 0)
{
//print only on master, values are equal on all workes
cout << "CPP: dht_data_size: " << dht_data_size << "\n";
cout << "CPP: dht_key_size: " << dht_key_size << "\n";
cout << "CPP: dht_buckets_per_process: " << dht_buckets_per_process << endl;
// MDL: new output on signif_vector and prop_type
if (signif_vector_exists) {
cout << "CPP: using problem-specific rounding digits: " << endl;
R.parseEval("print(data.frame(prop=prop, type=prop_type, digits=signif_vector))");
} else
{
cout << "CPP: using DHT default rounding digits = " << dht_significant_digits << endl;
}
// MDL: pass to R the DHT stuff. These variables exist
// only if dht_enabled is true
R["dht_final_signif"] = dht_significant_digits_vector;
R["dht_final_proptype"] = prop_type_vector;
}
if (dht_strategy == 0)
{
if(world_rank != 0) {
dht_object = DHT_create(dht_comm, dht_buckets_per_process, dht_data_size, dht_key_size, get_md5);
//storing for access from worker and callback functions
fuzzing_buffer = (double *) malloc (dht_key_size);
}
} else {
dht_object = DHT_create(MPI_COMM_WORLD, dht_buckets_per_process, dht_data_size, dht_key_size, get_md5);
}
if (world_rank==0) {
cout << "CPP: DHT successfully created!" << endl;
}
}
// MDL: store all parameters
if (world_rank==0) {
cout << "CPP: Calling R Function to store calling parameters" << endl;
R.parseEvalQ("StoreSetup(setup=mysetup)");
}
MPI_Barrier(MPI_COMM_WORLD);
if (world_rank == 0)
{ /* This is executed by the master */
Rcpp::NumericVector master_send;
Rcpp::NumericVector master_recv;
sim_a_seq = MPI_Wtime();
worker_struct* workerlist = (worker_struct*) calloc(world_size-1, sizeof(worker_struct));
int need_to_receive;
MPI_Status probe_status;
double* timings;
uint64_t* dht_perfs = NULL;
int local_work_package_size;
// a temporary send buffer
double* send_buffer;
send_buffer = (double*) calloc((work_package_size * (state_C.ncol() )) + BUFFER_OFFSET, sizeof(double));
// helper variables
int iteration;
double dt, current_sim_time;
int n_wp = 1; // holds the actual number of wp which is
// computed later in R::distribute_work_packages()
std::vector<int> wp_sizes_vector; // vector with the sizes of
// each package
sim_start = MPI_Wtime();
//Iteration Count is dynamic, retrieving value from R (is only needed by master for the following loop)
uint32_t maxiter = R.parseEval("mysetup$maxiter");
sim_b_seq = MPI_Wtime();
cummul_master_seq_pre_loop += sim_b_seq - sim_a_seq;
/*SIMULATION LOOP*/
for(uint32_t iter = 1; iter < maxiter+1; iter++)
{
sim_a_seq = MPI_Wtime();
cummul_master_send = 0.f;
cummul_master_recv = 0.f;
cout << "CPP: Evaluating next time step" << endl;
R.parseEvalQ("mysetup <- master_iteration_setup(mysetup)");
/*displaying iteration number, with C++ and R iterator*/
cout << "CPP: Going through iteration " << iter << endl;
cout << "CPP: R's $iter: "<< ((uint32_t) (R.parseEval("mysetup$iter"))) <<". Iteration" << endl;
cout << "CPP: Calling Advection" << endl;
sim_b_transport = MPI_Wtime();
R.parseEvalQ("mysetup <- master_advection(setup=mysetup)");
sim_a_transport = MPI_Wtime();
cout << "CPP: Chemistry" << endl;
/*Fallback for sequential execution*/
sim_b_chemistry = MPI_Wtime();
if(world_size == 1)
{
// MDL : the transformation of values into pH and pe
// takes now place in master_advection() so the
// following line is unneeded
// R.parseEvalQ("mysetup$state_T <- RedModRphree::Act2pH(mysetup$state_T)");
R.parseEvalQ("result <- slave_chemistry(setup=mysetup, data=mysetup$state_T)");
R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
} else { /*send work to workers*/
// NEW: only in the first iteration we call
// R::distribute_work_packages()!!
if (iter==1)
{
R.parseEvalQ("wp_ids <- distribute_work_packages(len=nrow(mysetup$state_T), package_size=work_package_size)");
// we only sort once the vector
R.parseEvalQ("ordered_ids <- order(wp_ids)");
R.parseEvalQ("wp_sizes_vector <- compute_wp_sizes(wp_ids)");
n_wp = (int) R.parseEval("length(wp_sizes_vector)");
wp_sizes_vector = as<std::vector<int>>(R["wp_sizes_vector"]);
cout << "CPP: Total number of work packages: " << n_wp << endl;
R.parseEval("stat_wp_sizes(wp_sizes_vector)");
}
/* shuffle and extract data
MDL: we now apply :Act2pH directly in master_advection
*/
// R.parseEval("tmp <- shuffle_field(RedModRphree::Act2pH(mysetup$state_T), ordered_ids)");
R.parseEval("tmp <- shuffle_field(mysetup$state_T, ordered_ids)");
Rcpp::DataFrame chemistry_data = R.parseEval("tmp");
convert_R_Dataframe_2_C_buffer(mpi_buffer, chemistry_data);
// cout << "CPP: shuffle_field() done" << endl;
/* send and receive work; this is done by counting
* the wp */
int pkg_to_send = n_wp;
int pkg_to_recv = n_wp;
size_t colCount = chemistry_data.ncol();
int free_workers = world_size-1;
double* work_pointer = mpi_buffer;
sim_c_chemistry = MPI_Wtime();
/* visual progress */
float progress = 0.0;
int barWidth = 70;
//retrieve data from R runtime
iteration = (int) R.parseEval("mysetup$iter");
dt = (double) R.parseEval("mysetup$requested_dt");
current_sim_time = (double) R.parseEval("mysetup$simulation_time-mysetup$requested_dt");
int count_pkgs = 0;
sim_b_seq = MPI_Wtime();
sim_c_chemistry = MPI_Wtime();
while (pkg_to_recv > 0) // start dispatching work packages
{
/* visual progress */
progress = (float) (count_pkgs+1)/n_wp;
cout << "[";
int pos = barWidth * progress;
for (int iprog = 0; iprog < barWidth; ++iprog) {
if (iprog < pos)
cout << "=";
else if (iprog == pos)
cout << ">";
else
cout << " ";
}
std::cout << "] " << int(progress * 100.0) << " %\r";
std::cout.flush();
/* end visual progress */
if (pkg_to_send > 0) {
master_send_a = MPI_Wtime();
/*search for free workers and send work*/
for (int p = 0; p < world_size-1; p++) {
if (workerlist[p].has_work == 0 && pkg_to_send > 0) /* worker is free */ {
// to enable different work_package_size, set local copy of work_package_size to
// either global work_package size or remaining 'to_send' packages
// to_send >= work_package_size ? local_work_package_size = work_package_size : local_work_package_size = to_send;
local_work_package_size = (int) wp_sizes_vector[count_pkgs];
count_pkgs++;
// cout << "CPP: sending pkg n. " << count_pkgs << " with size " << local_work_package_size << endl;
/*push pointer forward to next work package, after taking the current one*/
workerlist[p].send_addr = work_pointer;
int end_of_wp = local_work_package_size * colCount;
work_pointer = &(work_pointer[end_of_wp]);
// fill send buffer starting with work_package ...
std::memcpy(send_buffer, workerlist[p].send_addr, (end_of_wp) * sizeof(double));
// followed by: work_package_size
send_buffer[end_of_wp] = (double) local_work_package_size;
// current iteration of simulation
send_buffer[end_of_wp + 1] = (double) iteration;
// size of timestep in seconds
send_buffer[end_of_wp + 2] = dt;
// current time of simulation (age) in seconds
send_buffer[end_of_wp + 3] = current_sim_time;
// placeholder for work_package_count
send_buffer[end_of_wp + 4] = 0.;
/* ATTENTION Worker p has rank p+1 */
MPI_Send(send_buffer, end_of_wp + BUFFER_OFFSET, MPI_DOUBLE, p+1, TAG_WORK, MPI_COMM_WORLD);
workerlist[p].has_work = 1;
free_workers--;
pkg_to_send -= 1;
}
}
master_send_b = MPI_Wtime();
cummul_master_send += master_send_b - master_send_a;
}
/*check if there are results to receive and receive them*/
need_to_receive = 1;
master_recv_a = MPI_Wtime();
while(need_to_receive && pkg_to_recv > 0)
{
if (pkg_to_send > 0 && free_workers > 0)
MPI_Iprobe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &need_to_receive, &probe_status);
else {
idle_a = MPI_Wtime();
MPI_Probe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &probe_status);
idle_b = MPI_Wtime();
master_idle += idle_b - idle_a;
}
if(need_to_receive)
{
int p = probe_status.MPI_SOURCE;
int size;
MPI_Get_count(&probe_status, MPI_DOUBLE, &size);
MPI_Recv(workerlist[p-1].send_addr, size, MPI_DOUBLE, p, TAG_WORK, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
workerlist[p-1].has_work = 0;
pkg_to_recv -= 1;
free_workers++;
}
}
master_recv_b = MPI_Wtime();
cummul_master_recv += master_recv_b - master_recv_a;
}
sim_c_seq = MPI_Wtime();
// don't overwrite last progress
cout << endl;
sim_d_chemistry = MPI_Wtime();
cummul_workers += sim_d_chemistry - sim_c_chemistry;
convert_C_buffer_2_R_Dataframe(mpi_buffer, chemistry_data);
R["chemistry_data"] = chemistry_data;
/* unshuffle results */
R.parseEval("result <- unshuffle_field(chemistry_data, ordered_ids)");
/* do master stuff */
sim_e_chemistry = MPI_Wtime();
R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
sim_f_chemistry = MPI_Wtime();
cummul_chemistry_master += sim_f_chemistry - sim_e_chemistry;
}
sim_a_chemistry = MPI_Wtime();
// MDL master_iteration_end just writes on disk state_T and
// state_C after every iteration if the cmdline option
// --ignore-results is not given (and thus the R variable
// store_result is TRUE)
R.parseEvalQ("mysetup <- master_iteration_end(setup=mysetup)");
cummul_transport += sim_a_transport - sim_b_transport;
cummul_chemistry += sim_a_chemistry - sim_b_chemistry;
cout << endl << "CPP: End of *coupling* iteration "<< iter <<"/" << maxiter << endl << endl;
if (dht_enabled) {
for (int i=1; i < world_size; i++) {
MPI_Send(NULL, 0, MPI_DOUBLE, i, TAG_DHT_STATS, MPI_COMM_WORLD);
}
// MPI_Barrier(MPI_COMM_WORLD);
if (dht_snaps == 2) {
std::stringstream outfile;
outfile << out_dir << "/iter_" << std::setfill('0') << std::setw(3) << iter << ".dht";
for (int i=1; i < world_size; i++) {
MPI_Send(outfile.str().c_str(), outfile.str().size(), MPI_CHAR, i, TAG_DHT_STORE, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
sim_d_seq = MPI_Wtime();
cummul_master_seq_loop += ((sim_b_seq - sim_a_seq) - (sim_a_transport - sim_b_transport)) + (sim_d_seq - sim_c_seq);
master_send.push_back(cummul_master_send, "it_" + to_string(iter));
master_recv.push_back(cummul_master_recv, "it_" + to_string(iter));
} // END SIMULATION LOOP
sim_end = MPI_Wtime();
if (dht_enabled && dht_snaps > 0) {
cout << "CPP: Master: Instruct workers to write DHT to file ..." << endl;
std::string outfile;
outfile = out_dir + ".dht";
for (int i=1; i < world_size; i++) {
MPI_Send(outfile.c_str(), outfile.size(), MPI_CHAR, i, TAG_DHT_STORE, MPI_COMM_WORLD);
}
MPI_Barrier(MPI_COMM_WORLD);
cout << "CPP: Master: ... done" << endl;
}
Rcpp::NumericVector phreeqc_time;
Rcpp::NumericVector dht_get_time;
Rcpp::NumericVector dht_fill_time;
Rcpp::IntegerVector phreeqc_counts;
Rcpp::NumericVector idle_worker;
int phreeqc_tmp;
timings = (double*) calloc(3, sizeof(double));
if (dht_enabled) {
dht_hits = 0;
dht_miss = 0;
dht_collision = 0;
dht_perfs = (uint64_t*) calloc(3, sizeof(uint64_t));
}
double idle_worker_tmp;
for (int p = 0; p < world_size-1; p++)
{
/* ATTENTION Worker p has rank p+1 */
/* Send termination message to worker */
MPI_Send(NULL, 0, MPI_DOUBLE, p+1, TAG_FINISH, MPI_COMM_WORLD);
MPI_Recv(timings, 3, MPI_DOUBLE, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
phreeqc_time.push_back(timings[0], "w" + to_string(p+1));
MPI_Recv(&phreeqc_tmp, 1, MPI_INT, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
phreeqc_counts.push_back(phreeqc_tmp, "w" + to_string(p+1));
MPI_Recv(&idle_worker_tmp, 1, MPI_DOUBLE, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
idle_worker.push_back(idle_worker_tmp, "w" + to_string(p+1));
if (dht_enabled)
{
dht_get_time.push_back(timings[1], "w" + to_string(p+1));
dht_fill_time.push_back(timings[2], "w" + to_string(p+1));
MPI_Recv(dht_perfs, 3, MPI_UNSIGNED_LONG_LONG, p+1, TAG_DHT_PERF, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
dht_hits += dht_perfs[0];
dht_miss += dht_perfs[1];
dht_collision += dht_perfs[2];
}
}
R.parseEvalQ("profiling <- list()");
R["simtime"] = sim_end - sim_start;
R.parseEvalQ("profiling$simtime <- simtime");
R["simtime_transport"] = cummul_transport;
R.parseEvalQ("profiling$simtime_transport <- simtime_transport");
R["simtime_chemistry"] = cummul_chemistry;
R.parseEvalQ("profiling$simtime_chemistry <- simtime_chemistry");
R["simtime_workers"] = cummul_workers;
R.parseEvalQ("profiling$simtime_workers <- simtime_workers");
R["simtime_chemistry_master"] = cummul_chemistry_master;
R.parseEvalQ("profiling$simtime_chemistry_master <- simtime_chemistry_master");
R["seq_master_prep"] = cummul_master_seq_pre_loop;
R.parseEvalQ("profiling$seq_master_prep <- seq_master_prep");
R["seq_master_loop"] = cummul_master_seq_loop;
R.parseEvalQ("profiling$seq_master_loop <- seq_master_loop");
// R["master_send"] = master_send;
// R.parseEvalQ("profiling$master_send <- master_send");
// R["master_recv"] = master_recv;
// R.parseEvalQ("profiling$master_recv <- master_recv");
R["idle_master"] = master_idle;
R.parseEvalQ("profiling$idle_master <- idle_master");
R["idle_worker"] = idle_worker;
R.parseEvalQ("profiling$idle_worker <- idle_worker");
R["phreeqc_time"] = phreeqc_time;
R.parseEvalQ("profiling$phreeqc <- phreeqc_time");
R["phreeqc_count"] = phreeqc_counts;
R.parseEvalQ("profiling$phreeqc_count <- phreeqc_count");
if (dht_enabled)
{
R["dht_hits"] = dht_hits;
R.parseEvalQ("profiling$dht_hits <- dht_hits");
R["dht_miss"] = dht_miss;
R.parseEvalQ("profiling$dht_miss <- dht_miss");
R["dht_collision"] = dht_collision;
R.parseEvalQ("profiling$dht_collisions <- dht_collision");
R["dht_get_time"] = dht_get_time;
R.parseEvalQ("profiling$dht_get_time <- dht_get_time");
R["dht_fill_time"] = dht_fill_time;
R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time");
}
free(workerlist);
free(timings);
if (dht_enabled)
free(dht_perfs);
cout << "CPP: Done! Results are stored as R objects into <" << out_dir << "/timings.rds>" << endl;
/*exporting results and profiling data*/
std::string r_vis_code;
r_vis_code = "saveRDS(profiling, file=paste0(fileout,'/timings.rds'));";
R.parseEval(r_vis_code);
} else { /*This is executed by the workers*/
if (!dht_file.empty()) {
int res = file_to_table((char *) dht_file.c_str());
if (res != DHT_SUCCESS) {
if (res == DHT_WRONG_FILE) {
if (world_rank == 2) cerr << "CPP: Worker: Wrong File" << endl;
} else {
if (world_rank == 2) cerr << "CPP: Worker: Error in loading current state of DHT from file" << endl;
}
return EXIT_FAILURE;
} else {
if (world_rank == 2) cout << "CPP: Worker: Successfully loaded state of DHT from file " << dht_file << endl;
std::cout.flush();
}
}
worker_function(R);
free(mpi_buffer_results);
}
cout << "CPP: finished, cleanup of process " << world_rank << endl;
if (dht_enabled)
{
if (dht_strategy == 0)
{
if(world_rank != 0) {
DHT_free(dht_object, NULL, NULL);
}
} else {
DHT_free(dht_object, NULL, NULL);
}
}
free(mpi_buffer);
MPI_Finalize();
if (world_rank==0) {
cout << "CPP: done, bye!" << endl;
}
exit(0);
}

662
src/kin_r_library.R Normal file
View File

@ -0,0 +1,662 @@
## Simple function to check file extension. It is needed to check if
## the GridFile is SUM (MUFITS format) or rds/RData
FileExt <- function (x)
{
pos <- regexpr("\\.([[:alnum:]]+)$", x)
ifelse(pos > -1L, substring(x, pos + 1L), "")
}
### This function is only called by the master process. It sets up
### the data structures for the coupled simulations and performs the
### first timestep or "iteration zero", possibly involving the phreeqc
### calculation of the initial state (if it is not already provided as
### matrix) and the first Advection iteration
master_init <- function(setup)
{
msgm("Process with rank 0 reading GRID and MUFITS_sims")
## MDL TODO: actually grid and all the MUFITS snapshots should be
## read and stored only by the master process!!
## Setup the directory where we will store the results
verb <- FALSE
if (local_rank==0) {
verb <- TRUE ## verbosity loading MUFITS results
if (!dir.exists(fileout)) {
dir.create(fileout)
msgm("created directory ", fileout)
} else {
msgm("dir ", fileout," already exists, I will overwrite!")
}
if (!exists("store_result"))
msgm("store_result doesn't exist!")
else
msgm("store_result is ", store_result)
} else {
}
## to enhance flexibility, gridfile can be now given in SUM or,
## already processed, in rds format. We check for extension first.
gridext <- FileExt(setup$gridfile)
if (gridext=="SUM") {
msgm("Generating grid from MUFITS SUM file...")
mufits_grid <- ReadGrid(setup$gridfile, verbose=verb)
} else {
msgm("Reading grid from rds file...")
mufits_grid <- readRDS(setup$gridfile)
}
## load all the snapshots at once. The setup argument "snapshots"
## now can be either a *directory* where the .SUM files are stored
## or a rds file.
if (dir.exists(setup$snapshots)) {
msgm("<snapshots> points to a directory; reading from SUM files in there...")
mufits_sims <- LoadMufitsSumRes(dir=setup$snapshots, verbose=verb)
} else {
msgm("<snapshots> points to a file. Reading as rds...")
mufits_sims <- readRDS(setup$snapshots)
}
## Since this function is evaluated by the R process called from
## the C++ program, we need to make available all these variables
## to the R parent frame!
assign("mufits_grid", mufits_grid, pos=parent.frame())
assign("mufits_sims", mufits_sims, pos=parent.frame())
## cat(local_rank, "assignement complete\n")
## we calculate the *coupling* iterations
nstep <- length(mufits_sims)
msgm("using", nstep,"flow snapshots")
## cat(local_rank, "nstep:", nstep, "\n")
## cat(local_rank, "names:", paste(names(mufits_sims), collate=","), "\n")
## we have these snapshots; we output the results of the coupling
## after each timesteps
timesteps <- diff(sapply(mufits_sims, function(x) {return(x$info)}))
## cat(local_rank, "timesteps:", paste0(timesteps, collate=" "), "\n")
dt_differ <- abs(max(timesteps) - min(timesteps)) != 0
maxiter <- length(timesteps)
## steady state after last flow snapshot? It is controlled by
## "setup$prolong", containing an integer which represents the
## number of further iterations
setup$steady <- FALSE
if ("prolong" %in% names(setup)) {
last_dt <- timesteps[length(timesteps)]
timesteps <- c(timesteps, rep(last_dt, setup$prolong))
msgm("simulation prolonged for", setup$prolong, "additional iterations")
## we set this flag to TRUE to check afterwards which snapshot we need to use at each iteration
setup$steady <- TRUE
setup$last_snapshot <- maxiter
maxiter <- maxiter + setup$prolong
}
## now that we know how many iterations we're gonna have, we can
## setup the optional output
if (local_rank==0) {
if (is.null(setup$iter_output)) {
## "iter_output" is not specified: store all iterations
setup$out_save <- seq(1,maxiter)
msgm("setup$iter_output unspecified, storing all iterations")
} else if (setup$iter_output=="all") {
## "iter_output" is "all": store all iterations
setup$out_save <- seq(1,maxiter)
msgm("storing all iterations")
} else if (is.numeric(setup$iter_output)) {
msgm("storing iterations:", paste(setup$iter_output, collapse=", "))
setup$out_save <- as.integer(setup$iter_output)
} else if (setup$iter_output=="last") {
msgm("storing only the last iteration")
setup$out_save <- maxiter
} else {## fallback to "all"
setup$out_save <- seq(1,maxiter)
msgm("invalid setup$iter_output: storing all iterations")
}
}
setup$iter <- 1
setup$maxiter <- maxiter
setup$timesteps <- timesteps
setup$simulation_time <- 0
setup$dt_differ <- dt_differ
if (nrow(setup$bound)==1) {
boundmatAct <- t(RedModRphree::pH2Act(setup$bound))
msg("formed correct matrix from setup$bound:")
print(boundmatAct)
} else {
boundmatAct <- RedModRphree::pH2Act(setup$bound)
}
setup$boundmatAct <- boundmatAct
return(setup)
}
## This function is called by all processes.
## Since the worker processes need state_C in worker.cpp -> worker_function()
## even the worker need to run this code.
## TODO: worker.cpp -> worker_function() --> state_C is only needed to obtain headers
## and size of dataframe ... if this will be adjusted, this function will be
## unnecessary for workers
init_chemistry <- function(setup)
{
## setup the chemistry
if (!is.matrix(setup$initsim)) {
msgm("initial state defined through PHREEQC simulation, assuming homogeneous medium!")
tmpfirstrun <- RunPQC(setup$initsim, second=TRUE)
## if (local_rank==0){
## print(tmpfirstrun)
## }
remcolnames <- colnames(tmpfirstrun)
if (nrow(tmpfirstrun) > 1) {
## msgm("Iter 0 selecting second row")
firstrun <- matrix(tmpfirstrun[2,], nrow=1, byrow=TRUE)
colnames(firstrun) <- remcolnames
} else {
firstrun <- tmpfirstrun
}
state_C <- matrix(rep(firstrun,setup$n), byrow=TRUE, ncol=length(setup$prop))
colnames(state_C) <- colnames(firstrun)
## if (local_rank==0)
## saveRDS(state_C, "initstate.rds")
} else {
msgm("given initial state")
state_C <- setup$initsim
}
## save state_T and state_C
setup$state_C <- state_C
return(setup)
}
## relic code, may be useful in future
finit <- function(setup)
{
## saved <- setup$saved
## state_C <- setup$state_C
## timesteps <- setup$timesteps
## out_save <- setup$out_save
## to_save <- setup$to_save
## if (out_save) {
## msgm("saving <saved>")
## attr(saved,"savedtimesteps") <- timesteps[save]
## attr(saved,"timesteps") <- timesteps
## return(saved)
## } else {
## attr(saved,"timesteps") <- timesteps
## return(state_C)
## }
}
## This function, called only by the master, computes the *inner*
## timesteps for transport given the requested simulation timestep and
## the current snapshot (through the Courant Fredrich Levy condition)
## NB: we always iterate: 1)T 2)C
master_iteration_setup <- function(setup)
{
## in this version, "grid" and "mufits_sims" are variables already
## living in the environment of the R process with local_rank 0
## if (local_rank == 0) {
## cat("Process ", local_rank, "iteration_start\n")
## } else {
## cat("Process ", local_rank, "SHOULD NOT call iteration_start!\n")
## }
## in C++, "iter" starts from 1
iter <- setup$iter
next_dt <- setup$timesteps[iter]
## setting the current flow snapshot - we have to check if
## we need prolongation or not
if (setup$steady) {
if (iter > setup$last_snapshot)
snap <- mufits_sims[[setup$last_snapshot]]
else
snap <- mufits_sims[[iter]]
} else
snap <- mufits_sims[[iter]]
msgm("Current simulation time:", round(setup$simulation_time),"[s] or ", round(setup$simulation_time/3600/24,2), "[day]")
msgm("Requested time step for current iteration:", round(next_dt),"[s] or ", round(next_dt/3600/24,2), "[day]")
setup$simulation_time <- setup$simulation_time+next_dt
msgm("Target simulation time:", round(setup$simulation_time),"[s] or ", round((setup$simulation_time)/3600/24,2), "[day]")
## since the phase and density may change in MUFITS
## simulations/snapshots, we MUST tell their name at setup. Here
## we match the right column for both variables
nphase <- match(setup$phase, colnames(snap$conn))
ndensity <- match(setup$density, colnames(snap$cell))
## the "new cfl"
flux <- snap$conn[, nphase]
cellporvol <- mufits_grid$cell$PORO * mufits_grid$cell$CELLVOL
## in MUFITS, positive flux means from CELLID1 to CELLID2
pos_flux <- ifelse(flux>0, snap$conn$CONNID1, snap$conn$CONNID2)
poro <- mufits_grid$cell$PORO[pos_flux]
vol <- mufits_grid$cell$CELLVOL[snap$conn$CONNID1]
## extract the right density for the right fluxes
density <- snap$cell[pos_flux, ndensity]
## transform flux from Mufits ton/day to m3/s. This is a VOLUMETRIC FLUX
fluxv <- flux/3600/24*1000/density
## now we want the darcy velocity
excl0 <- which(abs(fluxv)<.Machine$double.eps)
## msgm("excl0"); print(excl0)
## msgm("length(excl0)"); print(length(excl0))
## The CFL condition is expressed in terms of total flux and total
## pore volume
cfl <- as.numeric(min(abs(vol*poro/fluxv)[-excl0]))
if (!is.finite(cfl))
stop(msgm("CFL is ", cfl,"; quitting"))
allowed_dt <- setup$Cf*cfl
requested_dt <- next_dt ## target_time - setup$simulation_time
msgm("CFL allows dt of <", round(allowed_dt, 2)," [s]; multiplicator is ", setup$Cf,
"; requested_dt is: ", round(requested_dt, 2))
if (requested_dt > allowed_dt) {
inniter <- requested_dt%/%allowed_dt ## integer division
inner_timesteps <- c(rep(allowed_dt, inniter), requested_dt%%allowed_dt)
## was: inner_timesteps <- c(rep(allowed_dt, inniter), requested_dt - allowed_dt * inniter)
} else {
inner_timesteps <- requested_dt
inniter <- 1
}
msgm("Performing ", inniter, " inner iterations")
setup$inner_timesteps <- inner_timesteps
setup$requested_dt <- requested_dt
setup$allowed_dt <- allowed_dt
setup$inniter <- inniter
setup$iter <- iter
## TODO these 3 can be actually spared
setup$fluxv <- fluxv
## setup$no_transport_conn <- excl0
setup$cellporvol <- cellporvol
return(setup)
}
## This function, called only by master, stores on disk the last
## calculated time step if store_result is TRUE and increments the
## iteration counter
master_iteration_end <- function(setup) {
iter <- setup$iter
## MDL Write on disk state_T and state_C after every iteration
## comprised in setup$out_save
if (store_result) {
if (iter %in% setup$out_save) {
nameout <- paste0(fileout, '/iter_', sprintf('%03d', iter), '.rds')
info <- list(tr_req_dt = as.integer(setup$requested_dt),
tr_allow_dt = setup$allowed_dt,
tr_inniter = as.integer(setup$inniter)
)
saveRDS(list(T=setup$state_T, C=setup$state_C,
simtime=as.integer(setup$simulation_time),
tr_info=info), file=nameout)
msgm("results stored in <", nameout, ">")
}
}
msgm("done iteration", iter, "/", setup$maxiter)
setup$iter <- setup$iter + 1
return(setup)
}
## master: compute advection
master_advection <- function(setup) {
msgm("requested dt=", setup$requested_dt)
concmat <- RedModRphree::pH2Act(setup$state_C)
inner_timesteps <- setup$inner_timesteps
Cf <- setup$Cf
iter <- setup$iter
## MDL: not used at the moment, so commenting it out
## excl <- setup$no_transport_conn
## msgm("excl"); print(excl)
immobile <- setup$immobile
boundmat <- setup$boundmatAct
## setting the current flow snapshot - we have to check if
## we are in "prolongation" or not
if (setup$steady) {
if (iter > setup$last_snapshot)
snap <- mufits_sims[[setup$last_snapshot]]
else
snap <- mufits_sims[[iter]]
} else
snap <- mufits_sims[[iter]]
## conc is a matrix with colnames
if (is.matrix(concmat)) {
if (length(immobile)>0)
val <- concmat[,-immobile]
else
val <- concmat
## sort the columns of the matrix matching the names of boundary
sptr <- colnames(val)
inflow <- boundmat[, colnames(boundmat)!="cbound", drop=FALSE]
spin <- colnames(inflow)
ind <- match(spin,sptr)
if (TRUE %in% is.na(ind)) {
msgm("Missing boundary conditions for some species")
val <- cbind(val,matrix(0,ncol=sum(is.na(ind)),nrow=nrow(val) ))
colnames(val) <- c(sptr,spin[is.na(ind)])
}
sptr <- colnames(val)
ind <- match(sptr,spin)
## msgm("ind"); print(ind)
cnew <- val
msgm("Computing transport of ", ncol(val), " species")
## if (local_rank==0) {
## saveRDS(list(conc=newconc, times=times, activeCells=grid$cell$CELLID[-exclude_cell], fluxv=fluxv),
## file=paste0("TranspUpwind_", local_rank, ".RData"))
## }
## cnew[ boundmat[, 1] ] <- boundmat[, 2]
## MDL 20200227: Here was the bug: "excl" refers to
## CONNECTIONS and not GRID_CELLS!!
## cells where we won't update the concentrations: union of
## boundary and no-flow cells
## if (length(excl) == 0)
## exclude_cell <- sort(c(boundmat[, 1]))
## else
## exclude_cell <- sort(c(boundmat[, 1], excl))
exclude_cell <- sort(c(boundmat[, 1]))
## msgm("mufits_grid$cell$CELLID[-exclude_cell]:")
## print(mufits_grid$cell$CELLID[-exclude_cell])
for (i in seq(1, ncol(val))) {
## form a 2-column matrix with cell id and boundary
## concentration for those elements
newboundmat <- boundmat[,c(1, ind[i]+1)]
## vector with the old concentrations
concv <- val[,i]
## apply boundary conditions to the concentration vector
## (they should stay constant but better safe than sorry)
concv[newboundmat[, 1]] <- newboundmat[, 2]
## call the function
cnew[,i] <- CppTransportUpwindIrr(concv = concv,
times = inner_timesteps,
activeCells = mufits_grid$cell$CELLID[-exclude_cell],
fluxv = setup$fluxv,
listconn = mufits_grid$listconn,
porVol = setup$cellporvol)
}
colnames(cnew) <- colnames(val)
if ( length(immobile) > 0) {
res <- cbind(cnew, concmat[,immobile])
} else {
res <- cnew
}
## check for negative values. This SHOULD NOT OCCUR and may be
## the result of numerical dispersion (or bug in my transport
## code!)
if (any(res <0 )) {
rem_neg <- which(res<0, arr.ind=TRUE)
a <- nrow(rem_neg)
res[res < 0 ] <- 0
msgm("-> ", a, "concentrations were negative")
print(rem_neg)
}
## msgm("state_T after iteration", setup$iter, ":")
## print(head(res))
} else {
msgm("state_C at iteration", setup$iter, " is not a Matrix, doing nothing!")
}
## retransform concentrations H+ and e- into pH and pe
state_T <- RedModRphree::Act2pH(res)
setup$state_T <- state_T
return(setup)
}
## function for the workers to compute chemistry through PHREEQC
slave_chemistry <- function(setup, data)
{
base <- setup$base
first <- setup$first
prop <- setup$prop
immobile <- setup$immobile
kin <- setup$kin
ann <- setup$ann
if (local_rank == 0) {
iter <- setup$iter
timesteps <- setup$timesteps
dt <- timesteps[iter]
}
state_T <- data ## not the global field, but the work-package
## treat special H+/pH, e-/pe cases
state_T <- RedModRphree::Act2pH(state_T)
## reduction of the problem
if(setup$reduce)
reduced <- ReduceStateOmit(state_T, omit=setup$ann)
else
reduced <- state_T
## if (local_rank==1) {
## msg("worker", local_rank,"; iter=", iter, "dt=", dt)
## msg("reduce is", setup$reduce)
## msg("data:")
## print(reduced)
## msg("base:")
## print(base)
## msg("first:")
## print(first)
## msg("ann:")
## print(ann)
## msg("prop:")
## print(prop)
## msg("immobile:")
## print(immobile)
## msg("kin:")
## print(kin)
## }
## form the PHREEQC input script for the current work package
inplist <- splitMultiKin(data=reduced, procs=1, base=base, first=first,
ann=ann, prop=prop, minerals=immobile, kin=kin, dt=dt)
## if (local_rank==1 & iter==1)
## RPhreeWriteInp("FirstInp", inplist)
tmpC <- RunPQC(inplist, procs=1, second=TRUE)
## recompose after the reduction
if (setup$reduce)
state_C <- RecomposeState(tmpC, reduced)
else {
state_C <- tmpC
}
## the next line is needed since we don't need all columns of
## PHREEQC output
return(state_C[, prop])
}
## This function, called by master
master_chemistry <- function(setup, data)
{
state_T <- setup$state_T
msgm(" chemistry iteration", setup$iter)
## treat special H+/pH, e-/pe cases
state_T <- RedModRphree::Act2pH(state_T)
## reduction of the problem
if(setup$reduce)
reduced <- ReduceStateOmit(state_T, omit=setup$ann)
else
reduced <- state_T
### inject data from workers
res_C <- data
rownames(res_C) <- NULL
## print(res_C)
if (nrow(res_C) > nrow(reduced)) {
res_C <- res_C[seq(2,nrow(res_C), by=2),]
}
## recompose after the reduction
if (setup$reduce)
state_C <- RecomposeState(res_C, reduced)
else {
state_C <- res_C
}
setup$state_C <- state_C
setup$reduced <- reduced
return(setup)
}
## Adapted version for "reduction"
ReduceStateOmit <- function (data, omit=NULL, sign=6)
{
require(mgcv)
rem <- colnames(data)
if (is.list(omit)) {
indomi <- match(names(omit), colnames(data))
datao <- data[, -indomi]
} else datao <- data
datao <- signif(datao, sign)
red <- mgcv::uniquecombs(datao)
inds <- attr(red, "index")
now <- ncol(red)
## reattach the omitted column(s)
## FIXME: control if more than one ann is present
if (is.list(omit)) {
red <- cbind(red, rep( data[ 1, indomi], nrow(red)))
colnames(red)[now+1] <- names(omit)
ret <- red[, colnames(data)]
} else {
ret <- red
}
rownames(ret) <- NULL
attr(ret, "index") <- inds
return(ret)
}
## Attach the name of the calling function to the message displayed on
## R's stdout
msgm <- function (...)
{
if (local_rank==0) {
fname <- as.list(sys.call(-1))[[1]]
prefix <- paste0("R: ", fname, " ::")
cat(paste(prefix, ..., "\n"))
}
invisible()
}
## Function called by master R process to store on disk all relevant
## parameters for the simulation
StoreSetup <- function(setup) {
to_store <- vector(mode="list", length=5)
names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT")
## read the setup R file, which is sourced in kin.cpp
tmpbuff <- file(filesim, "r")
setupfile <- readLines(tmpbuff)
close.connection(tmpbuff)
to_store$Sim <- setupfile
to_store$Flow <- list(
snapshots = setup$snapshots,
gridfile = setup$gridfile,
phase = setup$phase,
density = setup$density,
dt_differ = setup$dt_differ,
prolong = setup$prolong,
maxiter = setup$maxiter,
saved_iter = setup$iter_output,
out_save = setup$out_save )
to_store$Transport <- list(
boundary = setup$bound,
Cf = setup$Cf,
prop = setup$prop,
immobile = setup$immobile,
reduce = setup$reduce )
to_store$Chemistry <- list(
nprocs = n_procs,
wp_size = work_package_size,
base = setup$base,
first = setup$first,
init = setup$initsim,
db = db,
kin = setup$kin,
ann = setup$ann)
if (dht_enabled) {
to_store$DHT <- list(
enabled = dht_enabled,
log = dht_log,
signif = dht_final_signif,
proptype = dht_final_proptype)
} else {
to_store$DHT <- FALSE
}
saveRDS(to_store, file=paste0(fileout,'/setup.rds'))
msgm("initialization stored in ", paste0(fileout,'/setup.rds'))
}

View File

@ -0,0 +1,584 @@
## Simple function to check file extension. It is needed to check if
## the GridFile is SUM (MUFITS format) or rds/RData
FileExt <- function (x)
{
pos <- regexpr("\\.([[:alnum:]]+)$", x)
ifelse(pos > -1L, substring(x, pos + 1L), "")
}
### This function is only called by the master process. It sets up
### the data structures for the coupled simulations and performs the
### first timestep or "iteration zero", possibly involving the phreeqc
### calculation of the initial state (if it is not already provided as
### matrix) and the first Advection iteration
master_init <- function(setup)
{
msgm("Process with rank 0 reading GRID and MUFITS_sims")
## MDL TODO: actually grid and all the MUFITS snapshots should be
## read and stored only by the master process!!
## Setup the directory where we will store the results
verb <- FALSE
if (local_rank==0) {
verb <- TRUE ## verbosity loading MUFITS results
if (!dir.exists(fileout)) {
dir.create(fileout)
msgm("created directory ", fileout)
} else {
msgm("dir ", fileout," already exists, I will overwrite!")
}
if (!exists("store_result"))
msgm("store_result doesn't exist!")
else
msgm("store_result is ", store_result)
} else {
}
## to enhance flexibility, gridfile can be now given in SUM or,
## already processed, in rds format. We check for extension first.
gridext <- FileExt(setup$gridfile)
if (gridext=="SUM") {
msgm("Generating grid from MUFITS SUM file...")
mufits_grid <- ReadGrid(setup$gridfile, verbose=verb)
} else {
msgm("Reading grid from rds file...")
mufits_grid <- readRDS(setup$gridfile)
}
## load all the snapshots at once. The setup argument "snapshots"
## now can be either a *directory* where the .SUM files are stored
## or a rds file.
if (dir.exists(setup$snapshots)) {
msgm("<snapshots> points to a directory; reading from SUM files in there...")
mufits_sims <- LoadMufitsSumRes(dir=setup$snapshots, verbose=verb)
} else {
msgm("<snapshots> points to a file. Reading as rds...")
mufits_sims <- readRDS(setup$snapshots)
}
## Since this function is evaluated by the R process called from
## the C++ program, we need to make available all these variables
## to the R parent frame!
assign("mufits_grid", mufits_grid, pos=parent.frame())
assign("mufits_sims", mufits_sims, pos=parent.frame())
## cat(local_rank, "assignement complete\n")
## we calculate the *coupling* iterations
nstep <- length(mufits_sims)
msgm("using", nstep,"flow snapshots")
## cat(local_rank, "nstep:", nstep, "\n")
## cat(local_rank, "names:", paste(names(mufits_sims), collate=","), "\n")
## we have these snapshots; we output the results of the coupling
## after each timesteps
timesteps <- diff(sapply(mufits_sims, function(x) {return(x$info)}))
## cat(local_rank, "timesteps:", paste0(timesteps, collate=" "), "\n")
maxiter <- length(timesteps)
## steady case not treated in this version!!!!
steady <- FALSE
setup$iter <- 1
setup$maxiter <- maxiter
setup$timesteps <- timesteps
setup$simulation_time <- 0
setup$steady <- steady
if (nrow(setup$bound)==1) {
boundmatAct <- t(RedModRphree::pH2Act(setup$bound))
msg("formed correct matrix from setup$bound:")
print(boundmatAct)
} else {
boundmatAct <- RedModRphree::pH2Act(setup$bound)
}
setup$boundmatAct <- boundmatAct
return(setup)
}
## This function is called by all processes.
## Since the worker processes need state_C in worker.cpp -> worker_function()
## even the worker need to run this code.
## TODO: worker.cpp -> worker_function() --> state_C is only needed to obtain headers
## and size of dataframe ... if this will be adjusted, this function will be
## unnecessary for workers
init_chemistry <- function(setup)
{
## setup the chemistry
if (!is.matrix(setup$initsim)) {
msgm("initial state defined through PHREEQC simulation, assuming homogeneous medium!")
tmpfirstrun <- RunPQC_Warnings(setup$initsim, second=TRUE)
## if (local_rank==0){
## print(tmpfirstrun)
## }
remcolnames <- colnames(tmpfirstrun)
if (nrow(tmpfirstrun) > 1) {
## msgm("Iter 0 selecting second row")
firstrun <- matrix(tmpfirstrun[2,], nrow=1, byrow=TRUE)
colnames(firstrun) <- remcolnames
} else {
firstrun <- tmpfirstrun
}
state_C <- matrix(rep(firstrun,setup$n), byrow=TRUE, ncol=length(setup$prop))
colnames(state_C) <- colnames(firstrun)
## if (local_rank==0)
## saveRDS(state_C, "initstate.rds")
} else {
msgm("given initial state")
state_C <- setup$initsim
}
## save state_T and state_C
setup$state_C <- state_C
return(setup)
}
## relic code, may be useful in future
finit <- function(setup)
{
## saved <- setup$saved
## state_C <- setup$state_C
## timesteps <- setup$timesteps
## out_save <- setup$out_save
## to_save <- setup$to_save
## if (out_save) {
## msgm("saving <saved>")
## attr(saved,"savedtimesteps") <- timesteps[save]
## attr(saved,"timesteps") <- timesteps
## return(saved)
## } else {
## attr(saved,"timesteps") <- timesteps
## return(state_C)
## }
}
## This function, called only by the master, computes the *inner*
## timesteps for transport given the requested simulation timestep and
## the current snapshot (through the Courant Fredrich Levy condition)
## NB: we always iterate: 1)T 2)C
master_iteration_setup <- function(setup)
{
## in this version, "grid" and "mufits_sims" are variables already
## living in the environment of the R process with local_rank 0
## if (local_rank == 0) {
## cat("Process ", local_rank, "iteration_start\n")
## } else {
## cat("Process ", local_rank, "SHOULD NOT call iteration_start!\n")
## }
## in C++, "iter" starts from 1
iter <- setup$iter
next_dt <- setup$timesteps[iter]
## setting the current snapshot
snap <- mufits_sims[[iter]]
msgm("Current simulation time:", round(setup$simulation_time),"[s] or ", round(setup$simulation_time/3600/24,2), "[day]")
msgm("Time step for current iteration:", round(next_dt),"[s] or ", round(next_dt/3600/24,2), "[day]")
setup$simulation_time <- setup$simulation_time+next_dt
msgm("Target simulation time:", round(setup$simulation_time),"[s] or ", round((setup$simulation_time)/3600/24,2), "[day]")
## since the phase and density may change in MUFITS
## simulations/snapshots, we MUST tell their name at setup. Here
## we match the right column for both variables
nphase <- match(setup$phase, colnames(snap$conn))
ndensity <- match(setup$density, colnames(snap$cell))
## the "new cfl"
flux <- snap$conn[, nphase]
cellporvol <- mufits_grid$cell$PORO * mufits_grid$cell$CELLVOL
## in MUFITS, positive flux means from CELLID1 to CELLID2
pos_flux <- ifelse(flux>0, snap$conn$CONNID1, snap$conn$CONNID2)
poro <- mufits_grid$cell$PORO[pos_flux]
vol <- mufits_grid$cell$CELLVOL[snap$conn$CONNID1]
## extract the right density for the right fluxes
density <- snap$cell[pos_flux, ndensity]
## transform flux from Mufits ton/day to m3/s. This is a VOLUMETRIC FLUX
fluxv <- flux/3600/24*1000/density
## now we want the darcy velocity
excl0 <- which(abs(fluxv)<.Machine$double.eps)
## msgm("excl0"); print(excl0)
## msgm("length(excl0)"); print(length(excl0))
## The CFL condition is expressed in terms of total flux and total
## pore volume
cfl <- as.numeric(min(abs(vol*poro/fluxv)[-excl0]))
if (!is.finite(cfl))
stop(msgm("CFL is ", cfl,"; quitting"))
allowed_dt <- setup$Cf*cfl
requested_dt <- next_dt ## target_time - setup$simulation_time
msgm("CFL allows dt of <", round(cfl, 2)," [s]; multiplicator is ", setup$Cf,
"; requested_dt is: ", round(requested_dt, 2))
if (requested_dt > allowed_dt) {
inniter <- floor(requested_dt/allowed_dt)
inner_timesteps <- c(rep(allowed_dt, inniter), requested_dt - allowed_dt * inniter)
msgm("Performing ", inniter, " inner iterations")
} else {
inner_timesteps <- requested_dt
inniter <- 1
}
setup$inner_timesteps <- inner_timesteps
setup$requested_dt <- requested_dt
setup$allowed_dt <- allowed_dt
setup$inniter <- inniter
setup$iter <- iter
setup$fluxv <- fluxv
setup$no_transport_cells <- excl0
setup$cellporvol <- cellporvol
return(setup)
}
## This function, called only by master, stores on disk the last
## calculated time step if store_result is TRUE and increments the
## iteration counter
master_iteration_end <- function(setup) {
iter <- setup$iter
## MDL Write on disk state_T and state_C after every iteration
if (store_result) {
nameout <- paste0(fileout, '/iter_', sprintf('%03d', iter), '.rds')
saveRDS(list(T=setup$state_T, C=setup$state_C), file=nameout)
msgm("results stored in <", nameout, ">")
}
msgm("done iteration", iter, "/", setup$maxiter)
setup$iter <- setup$iter + 1
return(setup)
}
## master: compute advection
master_advection <- function(setup) {
msgm("requested dt=", setup$requested_dt)
concmat <- RedModRphree::pH2Act(setup$state_C)
inner_timesteps <- setup$inner_timesteps
Cf <- setup$Cf
iter <- setup$iter
excl <- setup$no_transport_cells
## msgm("excl"); print(excl)
immobile <- setup$immobile
boundmat <- setup$boundmatAct
snap <- mufits_sims[[setup$iter]]
## conc is a matrix with colnames
if (is.matrix(concmat)) {
if (length(immobile)>0)
val <- concmat[,-immobile]
else
val <- concmat
## sort the columns of the matrix matching the names of boundary
sptr <- colnames(val)
inflow <- boundmat[, colnames(boundmat)!="cbound", drop=FALSE]
spin <- colnames(inflow)
ind <- match(spin,sptr)
if (TRUE %in% is.na(ind)) {
msgm("Missing boundary conditions for some species")
val <- cbind(val,matrix(0,ncol=sum(is.na(ind)),nrow=nrow(val) ))
colnames(val) <- c(sptr,spin[is.na(ind)])
}
sptr <- colnames(val)
ind <- match(sptr,spin)
## msgm("ind"); print(ind)
cnew <- val
msgm("Computing transport of ", ncol(val), " species")
## if (local_rank==0) {
## saveRDS(list(conc=newconc, times=times, activeCells=grid$cell$CELLID[-exclude_cell], fluxv=fluxv),
## file=paste0("TranspUpwind_", local_rank, ".RData"))
## }
## cnew[ boundmat[, 1] ] <- boundmat[, 2]
## cells where we won't update the concentrations: union of
## boundary and no-flow cells
if (length(excl) == 0)
exclude_cell <- sort(c(boundmat[, 1]))
else
exclude_cell <- sort(c(boundmat[, 1], excl))
## msgm("mufits_grid$cell$CELLID[-excludecell]:")
## print(mufits_grid$cell$CELLID[-exclude_cell])
for (i in seq(1, ncol(val))) {
## form a 2-column matrix with cell id and boundary
## concentration for those elements
newboundmat <- boundmat[,c(1, ind[i]+1)]
## vector with the old concentrations
concv <- val[,i]
## apply boundary conditions to the concentration vector
## (they should stay constant but better safe than sorry)
concv[newboundmat[, 1]] <- newboundmat[, 2]
## call the function
cnew[,i] <- CppTransportUpwindIrr(concv = concv,
times = inner_timesteps,
activeCells = mufits_grid$cell$CELLID[-exclude_cell],
fluxv = setup$fluxv,
listconn = mufits_grid$listconn,
porVol = setup$cellporvol)
}
colnames(cnew) <- colnames(val)
if ( length(immobile) > 0) {
res <- cbind(cnew, concmat[,immobile])
} else {
res <- cnew
}
## check for negative values. This SHOULD NOT OCCUR and may be
## the result of numerical dispersion (or bug in my transport
## code!)
if (any(res <0 )) {
a <- length(res[res < 0 ])
res[res < 0 ] <- 0
msgm("-> ", a, "concentrations were negative")
}
## msgm("state_T after iteration", setup$iter, ":")
## print(head(res))
} else {
msgm("state_C at iteration", setup$iter, " is not a Matrix, doing nothing!")
}
setup$state_T <- res
return(setup)
}
## function for the workers to compute chemistry through PHREEQC
slave_chemistry <- function(setup, data)
{
base <- setup$base
first <- setup$first
prop <- setup$prop
immobile <- setup$immobile
kin <- setup$kin
ann <- setup$ann
if (local_rank == 0) {
iter <- setup$iter
timesteps <- setup$timesteps
dt <- timesteps[iter]
}
state_T <- data ## not the global field, but the work-package
## treat special H+/pH, e-/pe cases
state_T <- RedModRphree::Act2pH(state_T)
## reduction of the problem
if(setup$reduce)
reduced <- ReduceStateOmit(state_T, omit=setup$ann)
else
reduced <- state_T
## if (local_rank==1) {
## msg("worker", local_rank,"; iter=", iter, "dt=", dt)
## msg("reduce is", setup$reduce)
## msg("data:")
## print(reduced)
## msg("base:")
## print(base)
## msg("first:")
## print(first)
## msg("ann:")
## print(ann)
## msg("prop:")
## print(prop)
## msg("immobile:")
## print(immobile)
## msg("kin:")
## print(kin)
## }
## form the PHREEQC input script for the current work package
inplist <- splitMultiKin(data=reduced, procs=1, base=base, first=first,
ann=ann, prop=prop, minerals=immobile, kin=kin, dt=dt)
## if (local_rank==1 & iter==1)
## RPhreeWriteInp("FirstInp", inplist)
tmpC <- RunPQC_Warnings(inplist, procs=1, second=TRUE)
## recompose after the reduction
if (setup$reduce)
state_C <- RecomposeState(tmpC, reduced)
else {
state_C <- tmpC
}
## the next line is needed since we don't need all columns of
## PHREEQC output
return(state_C[, prop])
}
## This function, called by master
master_chemistry <- function(setup, data)
{
state_T <- setup$state_T
msgm(" chemistry iteration", setup$iter)
## treat special H+/pH, e-/pe cases
state_T <- RedModRphree::Act2pH(state_T)
## reduction of the problem
if(setup$reduce)
reduced <- ReduceStateOmit(state_T, omit=setup$ann)
else
reduced <- state_T
### inject data from workers
res_C <- data
rownames(res_C) <- NULL
## print(res_C)
if (nrow(res_C) > nrow(reduced)) {
res_C <- res_C[seq(2,nrow(res_C), by=2),]
}
## recompose after the reduction
if (setup$reduce)
state_C <- RecomposeState(res_C, reduced)
else {
state_C <- res_C
}
setup$state_C <- state_C
setup$reduced <- reduced
return(setup)
}
## Adapted version for "reduction"
ReduceStateOmit <- function (data, omit=NULL, sign=6)
{
require(mgcv)
rem <- colnames(data)
if (is.list(omit)) {
indomi <- match(names(omit), colnames(data))
datao <- data[, -indomi]
} else datao <- data
datao <- signif(datao, sign)
red <- mgcv::uniquecombs(datao)
inds <- attr(red, "index")
now <- ncol(red)
## reattach the omitted column(s)
## FIXME: control if more than one ann is present
if (is.list(omit)) {
red <- cbind(red, rep( data[ 1, indomi], nrow(red)))
colnames(red)[now+1] <- names(omit)
ret <- red[, colnames(data)]
} else {
ret <- red
}
rownames(ret) <- NULL
attr(ret, "index") <- inds
return(ret)
}
## Attach the name of the calling function to the message displayed on
## R's stdout
msgm <- function (...)
{
if (local_rank==0) {
fname <- as.list(sys.call(-1))[[1]]
prefix <- paste0("R: ", fname, " ::")
cat(paste(prefix, ..., "\n"))
}
invisible()
}
RunPQC_Warnings <- function (input, procs = 1, second = TRUE)
{
.runPQC <- function(input, onlysecond) {
require(phreeqc)
phreeqc::phrSetErrorStringsOn(TRUE)
error_count <- phreeqc::phrRunString(input)
warnings <- phreeqc::phrGetWarningStrings()
checked_warn <- CheckWarnings(warnings)
if (is.null(error_count)) {
out <- phreeqc::phrGetSelectedOutput()[[1]]
nso <- colnames(out)
nso <- sub("..mol.kgw.", "", nso, fixed = TRUE)
nso <- sub(".mol.kgw.", "", nso, fixed = TRUE)
nso <- sub("^k_", "", nso)
nso <- sub(".g.", "(g)", nso, fixed = TRUE)
if (second)
out <- out[seq(2, nrow(out), by = 2), ]
colnames(out) <- nso
return(data.matrix(out))
} else {
msgm("Error in simmulation!")
cat(phrGetErrorStrings(), sep="\n")
}
}
if (!is.list(input)) {
if (is.character(input)) {
res <- .runPQC(input, onlysecond = second)
} else {
stopmsg("something wrong with the input, dying!")
}
}
else {
res <- foreach(i = seq_along(input), .combine = rbind) %dopar%
.runPQC(input[[i]], onlysecond = second)
}
return(res)
}
CheckWarnings <- function(warnings)
{
## get rid of "Negative moles in solution" warnings
recover <- grep("Negative moles", warnings, fixed=TRUE)
warnings <- warnings[-recover]
lw <- length(warnings)
if (lw>0) {
## nfile <- paste0("warnings_", local_rank, ".txt")
nfile <- "Warnings.txt"
cat(warnings, file=nfile, sep="\n", append=TRUE)
}
return(lw)
}

2625
src/mdl_quint_kin.dat Normal file

File diff suppressed because it is too large Load Diff

43
src/parallel_r_library.R Normal file
View File

@ -0,0 +1,43 @@
distribute_work_packages <- function(len, package_size)
{
## Check if work_package is a divisor of grid length and act
## accordingly
if ((len %% package_size) == 0) {
n_packages <- len/package_size
} else {
n_packages <- floor(len/package_size) + 1
}
ids <- rep(1:n_packages, times=package_size, each = 1)[1:len]
return(ids)
}
compute_wp_sizes <- function(ids)
{
as.integer(table(ids))
}
shuffle_field <- function(data, send_ord)
{
shf <- data[send_ord,]
return(shf)
}
unshuffle_field <- function(data, send_ord)
{
data[send_ord,] <- data
rownames(data) <- NULL
return(data)
}
stat_wp_sizes <- function(sizes)
{
res <- as.data.frame(table(sizes))
if (nrow(res)>1) {
msgm("Chosen work_package_size is not a divisor of grid length. ")
colnames(res) <- c("Size", "N")
print(res)
} else {
msgm("All work packages of length ", sizes[1])
}
}

44
src/r_utils.cpp Normal file
View File

@ -0,0 +1,44 @@
#include "r_utils.h"
/* This function converts a pure double dataframe into a double array.
buffer <- double array, needs to be allocated before
df <- reference to input dataframe
*/
void convert_R_Dataframe_2_C_buffer(double* buffer, Rcpp::DataFrame &df)
{
size_t rowCount = df.nrow();
size_t colCount = df.ncol();
for (size_t i = 0; i < rowCount; i++)
{
for (size_t j = 0; j < colCount; j++)
{
/* Access column vector j and extract value of line i */
Rcpp::DoubleVector col = df[j];
buffer[i * colCount + j] = col[i];
}
}
}
/* This function converts a double array into a double dataframe.
buffer <- input double array
df <- reference to output dataframe, needs to be of fitting size, structure will be taken from it
*/
void convert_C_buffer_2_R_Dataframe(double* buffer, Rcpp::DataFrame &df)
{
size_t rowCount = df.nrow();
size_t colCount = df.ncol();
for (size_t i = 0; i < rowCount; i++)
{
for (size_t j = 0; j < colCount; j++)
{
/* Access column vector j and extract value of line i */
Rcpp::DoubleVector col = df[j];
col[i] = buffer[i * colCount + j];
}
}
}

6
src/r_utils.h Normal file
View File

@ -0,0 +1,6 @@
#pragma once
#include <RInside.h>
/*Functions*/
void convert_R_Dataframe_2_C_buffer(double* buffer, Rcpp::DataFrame &df);
void convert_C_buffer_2_R_Dataframe(double* buffer, Rcpp::DataFrame &df);

277
src/worker.cpp Normal file
View File

@ -0,0 +1,277 @@
#include "worker.h"
#include "dht_wrapper.h"
#include "global_buffer.h"
#include "r_utils.h"
#include <mpi.h>
#include <iostream>
void worker_function(RInside& R)
{
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Status probe_status;
int count;
int local_work_package_size;
int iteration;
double dt, current_sim_time;
double idle_a, idle_b;
double cummul_idle = 0.f;
double dht_get_start=0, dht_get_end=0;
double dht_fill_start=0, dht_fill_end=0;
double phreeqc_time_start=0, phreeqc_time_end=0;
int phreeqc_count = 0;
//timing[0] -> phreeqc
//timing[1] -> dht_get
//timing[2] -> dht_fill
double timing[3];
timing[0] = 0.0;
timing[1] = 0.0;
timing[2] = 0.0;
//dht_perf[0] -> hits
//dht_perf[1] -> miss
//dht_perf[2] -> collisions
uint64_t dht_perf[3];
if (dht_enabled)
{
dht_flags.resize(work_package_size, true); //set size
dht_flags.assign(work_package_size, true); //assign all elements to true (default)
dht_hits = 0;
dht_miss = 0;
dht_collision = 0;
// MDL: This code has now been moved to kin.cpp
// /*Load significance vector from R setup file (or set default)*/
// bool signif_vector_exists = R.parseEval("exists('signif_vector')");
// if (signif_vector_exists)
// {
// dht_significant_digits_vector = as<std::vector<int>>(R["signif_vector"]);
// } else
// {
// dht_significant_digits_vector.assign(dht_object->key_size / sizeof(double), dht_significant_digits);
// }
// /*Load property type vector from R setup file (or set default)*/
// bool prop_type_vector_exists = R.parseEval("exists('prop_type')");
// if (prop_type_vector_exists)
// {
// prop_type_vector = as<std::vector<string>>(R["prop_type"]);
// } else
// {
// prop_type_vector.assign(dht_object->key_size / sizeof(double), "normal");
// }
}
//initialization of helper variables
iteration = 0;
dt = 0;
current_sim_time = 0;
local_work_package_size = 0;
/*worker loop*/
while(1)
{
/*Wait for Message*/
idle_a = MPI_Wtime();
MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &probe_status);
idle_b = MPI_Wtime();
if (probe_status.MPI_TAG == TAG_WORK)
{ /* do work */
cummul_idle += idle_b - idle_a;
/* get number of doubles sent */
MPI_Get_count(&probe_status, MPI_DOUBLE, &count);
/* receive */
MPI_Recv(mpi_buffer, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
//decrement count of work_package by BUFFER_OFFSET
count -= BUFFER_OFFSET;
//check for changes on all additional variables given by the 'header' of mpi_buffer
if (mpi_buffer[count] != local_work_package_size) { //work_package_size
local_work_package_size = mpi_buffer[count];
R["work_package_size"] = local_work_package_size;
R.parseEvalQ("mysetup$work_package_size <- work_package_size");
}
if (mpi_buffer[count+1] != iteration) { //current iteration of simulation
iteration = mpi_buffer[count+1];
R["iter"] = iteration;
R.parseEvalQ("mysetup$iter <- iter");
}
if (mpi_buffer[count+2] != dt) { //current timestep size
dt = mpi_buffer[count+2];
R["dt"] = dt;
R.parseEvalQ("mysetup$dt <- dt");
}
if (mpi_buffer[count+3] != current_sim_time) { //current simulation time ('age' of simulation)
current_sim_time = mpi_buffer[count+3];
R["simulation_time"] = current_sim_time;
R.parseEvalQ("mysetup$simulation_time <- simulation_time");
}
/* 4th double value is currently a placeholder */
// if (mpi_buffer[count+4] != placeholder) {
// placeholder = mpi_buffer[count+4];
// R["mysetup$placeholder"] = placeholder;
// }
/* get df with right structure to fill in work package */
R.parseEvalQ("tmp2 <- head(mysetup$state_C, work_package_size)");
// R.parseEval("print(rownames(tmp2)[1:5])");
// R.parseEval("print(head(tmp2, 2))");
// R.parseEvalQ("tmp2$id <- as.double(rownames(tmp2))");
Rcpp::DataFrame buffer = R.parseEval("tmp2");
if (dht_enabled)
{
// DEBUG
// cout << "RANK " << world_rank << " start checking DHT\n";
//resize helper vector dht_flags of work_package_size changes
if ((int) dht_flags.size() != local_work_package_size) {
dht_flags.resize(local_work_package_size, true); //set size
dht_flags.assign(local_work_package_size, true); //assign all elements to true (default)
}
dht_get_start = MPI_Wtime();
check_dht(R, local_work_package_size, dht_flags, mpi_buffer);
dht_get_end = MPI_Wtime();
//DEBUG
//cout << "RANK " << world_rank << " checking DHT complete \n";
R["dht_flags"] = as<LogicalVector>(wrap(dht_flags));
//R.parseEvalQ("print(head(dht_flags))");
}
/* work */
convert_C_buffer_2_R_Dataframe(mpi_buffer, buffer);
R["work_package_full"] = buffer;
//R["work_package"] = buffer;
//DEBUG
//R.parseEvalQ("print(head(work_package_full))");
//R.parseEvalQ("print( c(length(dht_flags), nrow(work_package_full)) )");
if (dht_enabled)
{
R.parseEvalQ("work_package <- work_package_full[dht_flags,]");
} else {
R.parseEvalQ("work_package <- work_package_full");
}
//DEBUG
// R.parseEvalQ("print(head(work_package),2)");
// R.parseEvalQ("rownames(work_package) <- work_package$id");
// R.parseEval("print(paste('id %in% colnames(work_package)', 'id' %in% colnames(work_package)");
// R.parseEvalQ("id_store <- rownames(work_package)"); //"[, ncol(work_package)]");
// R.parseEvalQ("work_package$id <- NULL");
R.parseEvalQ("work_package <- as.matrix(work_package)");
unsigned int nrows = R.parseEval("nrow(work_package)");
if (nrows > 0)
{
/*Single Line error Workaround*/
if (nrows <=1)
{
//duplicate line to enable correct simmulation
R.parseEvalQ("work_package <- work_package[rep(1:nrow(work_package), times = 2), ]");
}
phreeqc_count++;
phreeqc_time_start = MPI_Wtime();
// MDL
// R.parseEvalQ("print('Work_package:\n'); print(head(work_package , 2)); cat('RCpp: worker_function:', local_rank, ' \n')");
R.parseEvalQ("result <- as.data.frame(slave_chemistry(setup=mysetup, data = work_package))");
phreeqc_time_end = MPI_Wtime();
// R.parseEvalQ("result$id <- id_store");
} else
{
//cout << "Work-Package is empty, skipping phreeqc!" << endl;
}
if (dht_enabled)
{
R.parseEvalQ("result_full <- work_package_full");
if (nrows > 0)
R.parseEvalQ("result_full[dht_flags,] <- result");
} else {
R.parseEvalQ("result_full <- result");
}
Rcpp::DataFrame result = R.parseEval("result_full");
convert_R_Dataframe_2_C_buffer(mpi_buffer_results, result);
/* send results to master */
MPI_Request send_req;
MPI_Isend(mpi_buffer_results, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD, &send_req);
if (dht_enabled)
{
dht_fill_start = MPI_Wtime();
fill_dht(R, local_work_package_size, dht_flags, mpi_buffer, mpi_buffer_results);
dht_fill_end = MPI_Wtime();
timing[1] += dht_get_end - dht_get_start;
timing[2] += dht_fill_end - dht_fill_start;
}
timing[0] += phreeqc_time_end - phreeqc_time_start;
MPI_Wait(&send_req,MPI_STATUS_IGNORE);
} else if (probe_status.MPI_TAG == TAG_FINISH)
{ /* recv and die */
/* before death, submit profiling/timings to master*/
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_FINISH, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
//timings
MPI_Send(timing, 3, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
MPI_Send(&phreeqc_count, 1, MPI_INT, 0, TAG_TIMING, MPI_COMM_WORLD);
MPI_Send(&cummul_idle, 1, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
if(dht_enabled)
{
//dht_perf
dht_perf[0] = dht_hits;
dht_perf[1] = dht_miss;
dht_perf[2] = dht_collision;
MPI_Send(dht_perf, 3, MPI_UNSIGNED_LONG_LONG, 0, TAG_DHT_PERF, MPI_COMM_WORLD);
}
break;
} else if ((probe_status.MPI_TAG == TAG_DHT_STATS)) {
MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_DHT_STATS, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
print_statistics();
MPI_Barrier(MPI_COMM_WORLD);
} else if ((probe_status.MPI_TAG == TAG_DHT_STORE)) {
char* outdir;
MPI_Get_count(&probe_status, MPI_CHAR, &count);
outdir = (char *) calloc(count + 1, sizeof(char));
MPI_Recv(outdir, count, MPI_CHAR, 0, TAG_DHT_STORE, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
int res = table_to_file((char *) outdir);
if (res != DHT_SUCCESS) {
if (world_rank == 2) cerr << "CPP: Worker: Error in writing current state of DHT to file (TAG_DHT_STORE)" << endl;
} else {
if (world_rank == 2) cout << "CPP: Worker: Successfully written DHT to file " << outdir << endl;
}
free(outdir);
MPI_Barrier(MPI_COMM_WORLD);
}
}
}

17
src/worker.h Normal file
View File

@ -0,0 +1,17 @@
#pragma once
#include <RInside.h>
using namespace std;
using namespace Rcpp;
/*Functions*/
void worker_function(RInside &R);
/*Globals*/
#define TAG_WORK 42
#define TAG_FINISH 43
#define TAG_TIMING 44
#define TAG_DHT_PERF 45
#define TAG_DHT_STATS 46
#define TAG_DHT_STORE 47