Initial commit

2025-12-15 20:38:23 +01:00 · 2020-11-18 14:24:12 +01:00 · 2020-11-18 14:24:12 +01:00 · 8f687a18c1
commit 8f687a18c1
24 changed files with 7319 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,63 @@
+
+<!--
+    Time-stamp: "Last modified 2020-02-01 18:14:13 delucia"
+-->
+
+# install libraries from MDL
+
+    library(devtools)
+    devtools::install_git("https://gitext.gfz-potsdam.de/delucia/RedModRphree.git")
+    devtools::install_git("https://gitext.gfz-potsdam.de/delucia/Rmufits.git")
+  
+# USAGE
+
+mpirun ./kin <OPTIONS> <simfile.R> <DIRECTORY>
+
+OPTIONS:
+
+--work-package-size=<1-n>     ... size of work packages (default 5)
+
+--ignore-result         ... disables store of simulation resuls
+
+DHT:
+
+--dht                   ... enable dht (default is off)
+
+--dht-log               ... enable logarithm application before rounding (default is off)
+
+--dht-signif=<1-n>      ... set rounding to number of significant digits (default 5) 
+                            (only used if no vector is given in setup file)
+                            (for individual values per column use R vector "signif_vector" in setup file)
+
+--dht-strategy=<0-1>    ... change dht strategy, not implemented yet (default 0, dht on workers)
+
+--dht-size=<1-n>        ... size of dht per process involved (see dht-strategy) in byte (default 1GiB)
+
+--dht-snaps=<0-2>	... enable or disable storage of DHT snapshots
+			    0 = snapshots are disabled
+			    1 = only stores snapshot at the end of the simulation with name <DIRECTORY>.dht
+			    2 = stores snapshot at the end and after each iteration
+			        iteration snapshot files are stored in <DIRECTORY>/iter<n>.dht
+
+--dht-file=<snapshot> 	... initializes DHT with the given snapshot file
+
+###############################################################################
+
+
+# about the usage of MPI_Wtime()
+From the OpenMPI Man Page:
+
+For example, on platforms that support it, the clock_gettime() function will be used 
+to obtain a monotonic clock value with whatever precision is supported on that platform (e.g., nanoseconds). 
+
+# External Libraries
+Cmdline Parsing -> https://github.com/adishavit/argh
+
+
+# Examples included (more to come)
+1) SimDol2D.R     ... simple chemistry (Calcite/Dolomite) on a 50x50 2D grid, 20 time steps
+2) SimDolKtz.R    ... simple chemistry (Calcite/Dolomite) on Ketzin grid (~650k elements), 20 time steps
+                      The flow snapshots are NOT INCLUDED in svn but must be provided separately
+
+
+
--- a/src/DHT.cpp
+++ b/src/DHT.cpp
@ -0,0 +1,423 @@
+#include "DHT.h"
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+
+/*
+ * determining destination rank and index by hash of key
+ *
+ * return values by reference
+ */
+static void determine_dest(uint64_t hash, int comm_size, unsigned int table_size, unsigned int *dest_rank, unsigned int *index, unsigned int index_count) {
+    uint64_t tmp;
+    int char_hop = 9-index_count;
+    unsigned int i;
+    for (i = 0; i < index_count ; i++) {
+        tmp = 0;
+        memcpy(&tmp,(unsigned char *)&hash+i, char_hop);
+        index[i] = (unsigned int) (tmp % table_size);
+    }
+    *dest_rank = (unsigned int) (hash % comm_size);
+}
+
+/**
+ * set write flag to 1
+ */
+static void set_flag(char* flag_byte) {
+    *flag_byte = 0;
+    *flag_byte |= (1 << 0);
+} 
+
+/**
+ * return 1 if write flag is set
+ * else 0
+ */
+static int read_flag(char flag_byte) {
+    if ((flag_byte & 0x01) == 0x01) {
+        return 1;
+    } else return 0;
+}
+
+/*
+ * allocating memory for DHT object and buckets.
+ * creating MPI window for OSC
+ * filling DHT object with passed by parameters, window, 2 counters for R/W errors and 2 pointers with allocated memory for further use
+ * return DHT object
+ */
+DHT* DHT_create(MPI_Comm comm, unsigned int size, int data_size, int key_size, uint64_t(*hash_func) (int, void*)) {
+    DHT *object;
+    MPI_Win window;
+    void* mem_alloc;
+    int comm_size, tmp;
+
+    tmp = (int) ceil(log2(size));
+    if (tmp%8 != 0) tmp = tmp + (8-(tmp%8));
+
+    object = (DHT*) malloc(sizeof(DHT));
+    if (object == NULL) return NULL;
+
+    //every memory allocation has 1 additional byte for flags etc.
+    if (MPI_Alloc_mem(size * (1 + data_size + key_size), MPI_INFO_NULL, &mem_alloc) != 0) return NULL;
+    if (MPI_Comm_size(comm, &comm_size) != 0) return NULL;
+
+    memset(mem_alloc, '\0', size * (1 + data_size + key_size));
+
+    if (MPI_Win_create(mem_alloc, size * (1 + data_size + key_size), (1 + data_size + key_size), MPI_INFO_NULL, comm, &window) != 0) return NULL;
+
+    object->data_size = data_size;
+    object->key_size = key_size;
+    object->table_size = size;
+    object->window = window;
+    object->hash_func = hash_func;
+    object->comm_size = comm_size;
+    object->communicator = comm;
+    object->read_misses = 0;
+    object->collisions = 0;
+    object->recv_entry = malloc(1 + data_size + key_size);
+    object->send_entry = malloc(1 + data_size + key_size);
+    object->index_count = 9-(tmp/8);
+    object->index = (unsigned int*) malloc((9-(tmp/8))*sizeof(int));
+    object->mem_alloc = mem_alloc;
+
+    DHT_stats *stats;
+
+    stats = (DHT_stats*) malloc(sizeof(DHT_stats));
+    if (stats == NULL) return NULL;
+
+    object->stats = stats;
+    object->stats->writes_local = (int*) calloc(comm_size, sizeof(int));
+    object->stats->old_writes = 0;
+    object->stats->read_misses = 0;
+    object->stats->collisions = 0;
+    object->stats->w_access = 0;
+    object->stats->r_access = 0;
+
+    return object;
+}
+
+/*
+ * puts passed by data with key to DHT
+ *
+ * returning DHT_MPI_ERROR = -1 if MPI error occurred
+ * else  DHT_SUCCESS = 0 if success
+ */
+int DHT_write(DHT *table, void* send_key, void* send_data) {
+    unsigned int dest_rank, i;
+    int result = DHT_SUCCESS;
+
+    table->stats->w_access++;
+
+    //determine destination rank and index by hash of key
+    determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, table->table_size, &dest_rank, table->index, table->index_count);
+
+    //concatenating key with data to write entry to DHT
+    set_flag((char *) table->send_entry);
+    memcpy((char *) table->send_entry + 1, (char *) send_key, table->key_size);
+    memcpy((char *) table->send_entry + table->key_size + 1, (char *) send_data, table->data_size);
+
+    //locking window of target rank with exclusive lock
+    if (MPI_Win_lock(MPI_LOCK_EXCLUSIVE, dest_rank, 0, table->window) != 0)
+        return DHT_MPI_ERROR;
+    for (i = 0; i < table->index_count; i++)
+    {
+        if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
+        if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
+
+        //increment collision counter if receiving key doesn't match sending key
+        //,entry has write flag + last index is reached
+
+        if (read_flag(*(char *)table->recv_entry)) {
+            if (memcmp(send_key, (char *) table->recv_entry + 1, table->key_size) != 0) {
+                if (i == (table->index_count)-1) {
+                    table->collisions += 1;
+                    table->stats->collisions += 1;
+                    result = DHT_WRITE_SUCCESS_WITH_COLLISION;
+                    break;
+                }
+            } else break;
+        } else {
+            table->stats->writes_local[dest_rank]++;
+            break;
+        }
+    }
+
+    //put data to DHT
+    if (MPI_Put(table->send_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
+    //unlock window of target rank
+    if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
+
+    return result;
+}
+
+/*
+ * gets data from the DHT by key
+ *
+ * return  DHT_SUCCESS = 0 if success
+ * DHT_MPI_ERROR = -1 if MPI error occurred
+ * DHT_READ_ERROR = -2 if receiving key doesn't match sending key
+ */
+int DHT_read(DHT *table, void* send_key, void* destination) {
+    unsigned int dest_rank, i;
+
+    table->stats->r_access++;
+
+    //determine destination rank and index by hash of key
+    determine_dest(table->hash_func(table->key_size, send_key), table->comm_size, table->table_size, &dest_rank, table->index, table->index_count);
+
+    //locking window of target rank with shared lock
+    if (MPI_Win_lock(MPI_LOCK_SHARED, dest_rank, 0, table->window) != 0) return DHT_MPI_ERROR;
+    //receive data
+    for (i = 0; i < table->index_count; i++) {
+        if (MPI_Get(table->recv_entry, 1 + table->data_size + table->key_size, MPI_BYTE, dest_rank, table->index[i], 1 + table->data_size + table->key_size, MPI_BYTE, table->window) != 0) return DHT_MPI_ERROR;
+        if (MPI_Win_flush(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
+        
+        //increment read error counter if write flag isn't set or key doesn't match passed by key + last index reached
+        //else copy data to dereference of passed by destination pointer
+
+        if ((read_flag(*(char *) table->recv_entry)) == 0) {
+            table->read_misses += 1;
+            table->stats->read_misses += 1;
+            if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
+            return DHT_READ_ERROR;
+        }
+
+        if (memcmp(((char*)table->recv_entry) + 1, send_key, table->key_size) != 0) {
+            if (i == (table->index_count)-1) {
+                table->read_misses += 1;
+                table->stats->read_misses += 1;
+                if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
+                return DHT_READ_ERROR;
+            }
+        } else break;
+    }
+
+    //unlock window of target rank
+    if (MPI_Win_unlock(dest_rank, table->window) != 0) return DHT_MPI_ERROR;
+
+    memcpy((char *) destination, (char *) table->recv_entry + table->key_size + 1, table->data_size);
+
+    return DHT_SUCCESS;
+}
+
+int DHT_to_file(DHT* table, char* filename) {
+    //open file
+    MPI_File file;
+    if (MPI_File_open(table->communicator, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &file) != 0) return DHT_FILE_IO_ERROR;
+
+    int rank;
+    MPI_Comm_rank(table->communicator, &rank);
+
+    //write header (key_size and data_size)
+    if (rank == 0) {
+        if (MPI_File_write(file, &table->key_size, 1, MPI_INT, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
+        if (MPI_File_write(file, &table->data_size, 1, MPI_INT, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
+    }
+
+    if (MPI_File_seek_shared(file, DHT_HEADER_SIZE, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
+
+    char* ptr;
+    int bucket_size = table->key_size + table->data_size + 1;
+
+    //iterate over local memory
+    for (unsigned int i = 0; i < table->table_size; i++) {
+        ptr = (char *) table->mem_alloc + (i * bucket_size);
+        //if bucket has been written to (checked by written_flag)...
+        if (read_flag(*ptr)) {
+            //write key and data to file
+            if (MPI_File_write_shared(file, ptr + 1, bucket_size - 1, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_WRITE_ERROR;
+        }
+    }
+    //close file
+    if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR;
+
+    return DHT_SUCCESS;  
+}
+
+int DHT_from_file(DHT* table, char* filename) {
+    MPI_File file;
+    MPI_Offset f_size;
+    int e_size, m_size, cur_pos, rank, offset;
+    char* buffer;
+    void* key;
+    void* data;
+
+    if (MPI_File_open(table->communicator, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &file) != 0) return DHT_FILE_IO_ERROR;
+
+    if (MPI_File_get_size(file, &f_size) != 0) return DHT_FILE_IO_ERROR;
+
+    MPI_Comm_rank(table->communicator, &rank);
+
+    e_size = table->key_size + table->data_size;
+    m_size = e_size > DHT_HEADER_SIZE ? e_size : DHT_HEADER_SIZE;
+    buffer = (char *) malloc(m_size);
+
+    if (MPI_File_read(file, buffer, DHT_HEADER_SIZE, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_READ_ERROR;
+
+    if (*(int *) buffer != table->key_size) return DHT_WRONG_FILE;
+    if (*(int *) (buffer + 4) != table->data_size) return DHT_WRONG_FILE;
+
+    offset = e_size*table->comm_size;
+
+    if (MPI_File_seek(file, DHT_HEADER_SIZE, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
+    cur_pos = DHT_HEADER_SIZE + (rank * e_size);
+
+    while(cur_pos < f_size) {
+        if (MPI_File_seek(file, cur_pos, MPI_SEEK_SET) != 0) return DHT_FILE_IO_ERROR;
+        MPI_Offset tmp;
+        MPI_File_get_position(file, &tmp);
+        if (MPI_File_read(file, buffer, e_size, MPI_BYTE, MPI_STATUS_IGNORE) != 0) return DHT_FILE_READ_ERROR;
+        key = buffer;
+        data = (buffer+table->key_size);
+        if (DHT_write(table, key, data) == DHT_MPI_ERROR) return DHT_MPI_ERROR;
+
+        cur_pos += offset;
+    }
+
+    free (buffer);
+    if (MPI_File_close(&file) != 0) return DHT_FILE_IO_ERROR; 
+
+    return DHT_SUCCESS;
+}
+
+/*
+ * frees up memory and accumulate counter
+ */
+int DHT_free(DHT* table, int* collision_counter, int* readerror_counter) {
+    int buf;
+
+    if (collision_counter != NULL) {
+        buf = 0;
+        if (MPI_Reduce(&table->collisions, &buf, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+        *collision_counter = buf;
+    }
+    if (readerror_counter != NULL) {
+        buf = 0;
+        if (MPI_Reduce(&table->read_misses, &buf, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+        *readerror_counter = buf;
+    }
+    if (MPI_Win_free(&(table->window)) != 0) return DHT_MPI_ERROR;
+    if (MPI_Free_mem(table->mem_alloc) != 0) return DHT_MPI_ERROR;
+    free(table->recv_entry);
+    free(table->send_entry);
+    free(table->index);
+
+    free(table->stats->writes_local);
+    free(table->stats);
+
+    free(table);
+
+    return DHT_SUCCESS;
+}
+
+/*
+ * prints a table with statistics about current use of DHT 
+ * for each participating process and summed up results containing:
+    *  1. occupied buckets (in respect to the memory of this process)
+    *  2. free buckets (in respect to the memory of this process)
+    *  3. calls of DHT_write (w_access)
+    *  4. calls of DHT_read (r_access)
+    *  5. read misses (see DHT_READ_ERROR)
+    *  6. collisions (see DHT_WRITE_SUCCESS_WITH_COLLISION)
+ * 3-6 will reset with every call of this function
+ * finally the amount of new written entries is printed out (in relation to last call of this funtion)
+ */
+int DHT_print_statistics(DHT *table) {
+    int *written_buckets;
+    int *read_misses, sum_read_misses;
+    int *collisions, sum_collisions;
+    int sum_w_access, sum_r_access, *w_access, *r_access;
+    int rank;
+
+    MPI_Comm_rank(table->communicator, &rank);
+
+    //disable possible warning of unitialized variable, which is not the case
+    //since we're using MPI_Gather to obtain all values only on rank 0
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+
+    //obtaining all values from all processes in the communicator
+    if (rank == 0) read_misses = (int*) malloc(table->comm_size*sizeof(int));
+    if (MPI_Gather(&table->stats->read_misses, 1, MPI_INT, read_misses, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    if (MPI_Reduce(&table->stats->read_misses, &sum_read_misses, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    table->stats->read_misses = 0;
+    
+    if (rank == 0) collisions = (int*) malloc(table->comm_size*sizeof(int));
+    if (MPI_Gather(&table->stats->collisions, 1, MPI_INT, collisions, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    if (MPI_Reduce(&table->stats->collisions, &sum_collisions, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    table->stats->collisions = 0;
+
+    if (rank == 0) w_access = (int*) malloc(table->comm_size*sizeof(int));
+    if (MPI_Gather(&table->stats->w_access, 1, MPI_INT, w_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    if (MPI_Reduce(&table->stats->w_access, &sum_w_access, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    table->stats->w_access = 0;
+
+    if (rank == 0) r_access = (int*) malloc(table->comm_size*sizeof(int));
+    if (MPI_Gather(&table->stats->r_access, 1, MPI_INT, r_access, 1, MPI_INT, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    if (MPI_Reduce(&table->stats->r_access, &sum_r_access, 1, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+    table->stats->r_access = 0;
+    
+    if (rank == 0) written_buckets = (int*) calloc(table->comm_size, sizeof(int));
+    if (MPI_Reduce(table->stats->writes_local, written_buckets, table->comm_size, MPI_INT, MPI_SUM, 0, table->communicator) != 0) return DHT_MPI_ERROR;
+
+    if (rank == 0) { //only process with rank 0 will print out results as a table
+        int sum_written_buckets = 0;
+
+        for (int i=0; i < table->comm_size; i++) {
+            sum_written_buckets += written_buckets[i];
+        }
+
+        int members = 7;
+        int padsize = (members*12)+1;
+        char pad[padsize+1];
+
+        memset(pad, '-', padsize*sizeof(char));
+        pad[padsize]= '\0';
+        printf("\n");
+        printf("%-35s||resets with every call of this function\n", " ");
+        printf("%-11s|%-11s|%-11s||%-11s|%-11s|%-11s|%-11s\n", 
+            "rank", 
+            "occupied", 
+            "free", 
+            "w_access",
+            "r_access",
+            "read misses",
+            "collisions");
+        printf("%s\n", pad);
+        for (int i = 0; i < table->comm_size; i++) {
+            printf("%-11d|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n", 
+                i, 
+                written_buckets[i], 
+                table->table_size-written_buckets[i], 
+                w_access[i],
+                r_access[i],
+                read_misses[i],
+                collisions[i]);
+        }
+        printf("%s\n", pad);
+        printf("%-11s|%-11d|%-11d||%-11d|%-11d|%-11d|%-11d\n", 
+            "sum", 
+            sum_written_buckets, 
+            (table->table_size*table->comm_size)-sum_written_buckets, 
+            sum_w_access,
+            sum_r_access,
+            sum_read_misses,
+            sum_collisions);
+
+        printf("%s\n", pad);
+        printf("%s %d\n", 
+            "new entries:", 
+            sum_written_buckets - table->stats->old_writes);
+
+        printf("\n");
+        fflush(stdout);
+
+        table->stats->old_writes = sum_written_buckets;
+    }
+
+    //enable warning again
+    #pragma GCC diagnostic pop
+
+    MPI_Barrier(table->communicator);
+    return DHT_SUCCESS;
+}
--- a/src/DHT.h
+++ b/src/DHT.h
@ -0,0 +1,112 @@
+/*
+ * File:   DHT.h
+ * Author: max luebke
+ *
+ * Created on 16. November 2017, 09:14
+ */
+
+#ifndef DHT_H
+#define DHT_H
+
+#include <mpi.h>
+#include <stdint.h>
+
+#define DHT_MPI_ERROR -1
+#define DHT_READ_ERROR -2
+#define DHT_SUCCESS 0
+#define DHT_WRITE_SUCCESS_WITH_COLLISION 1
+
+#define DHT_WRONG_FILE 11
+#define DHT_FILE_IO_ERROR 12
+#define DHT_FILE_READ_ERROR 13
+#define DHT_FILE_WRITE_ERROR 14
+
+#define DHT_HEADER_SIZE 8
+
+typedef struct {;
+    int *writes_local, old_writes;
+    int read_misses, collisions;
+    int w_access, r_access;
+} DHT_stats;
+
+typedef struct {
+    MPI_Win window;
+    int data_size;
+    int key_size;
+    unsigned int table_size;
+    MPI_Comm communicator;
+    int comm_size;
+    uint64_t(*hash_func) (int, void*);
+    void* recv_entry;
+    void* send_entry;
+    void* mem_alloc;
+    int read_misses;
+    int collisions;
+    unsigned int *index;
+    unsigned int index_count;
+    DHT_stats *stats;
+} DHT;
+
+
+
+/*
+ * parameters:
+ * MPI_Comm comm - communicator of processes that are holding the DHT
+ * int size_per_process - number of buckets each process will create
+ * int data_size - size of data in bytes
+ * int key_size - size of key in bytes
+ * *hash_func - pointer to hashfunction
+ *
+ * return:
+ * NULL if error during initialization
+ * DHT* if success
+ */
+extern DHT* DHT_create(MPI_Comm comm, unsigned int size_per_process, int data_size, int key_size, uint64_t(*hash_func)(int, void*));
+
+/*
+ * parameters:
+ * DHT *table - DHT_object created by DHT_create
+ * void* data - pointer to data
+ * void* - pointer to key
+ *
+ * return:
+ * error value (see above)
+ */
+extern int DHT_write(DHT *table, void* key, void* data);
+
+/*
+ * parameters:
+ * DHT *table - DHT_object created by DHT_create
+ * void* key - pointer to key
+ * void* destination - pointer which will hold the resulting data from DHT
+ *
+ * return:
+ * error value (see above)
+ */
+extern int DHT_read(DHT *table, void* key, void* destination);
+
+extern int DHT_to_file(DHT *table, char* filename);
+
+extern int DHT_from_file(DHT *table, char* filename);
+
+/*
+ * parameters:
+ * DHT *table - DHT_object created by DHT_create
+ * int* collision_counter - pointer which will hold the total count of collisions
+ * int* readerror_counter - pointer which will hold the total count of read errors
+ *
+ * return:
+ * error value (see above)
+ */
+extern int DHT_free(DHT *table, int* collision_counter, int* readerror_counter);
+
+/*
+ * parameters:
+ * DHT *table - DHT_object created by DHT_create
+ * 
+ * return:
+ * error value (DHT_SUCCESS or DHT_MPI_ERROR)
+ */
+extern int DHT_print_statistics(DHT *table);
+
+#endif /* DHT_H */
--- a/src/Makefile
+++ b/src/Makefile
@ -0,0 +1,51 @@
+## Simple Makefile for MPI use of RInside
+
+## comment this out if you need a different version of R, 
+## and set R_HOME accordingly as an environment variable
+R_HOME := 		$(shell R RHOME)
+
+sources := 		$(wildcard *.cpp)
+programs := 		kin
+
+
+# OpenMPI header and libraries
+MPICPPFLAGS :=		$(shell mpic++ -showme:compile)
+MPILIBS :=		$(shell mpic++ -showme:link)
+
+## include headers and libraries for R 
+RCPPFLAGS := 		$(shell $(R_HOME)/bin/R CMD config --cppflags)
+RLDFLAGS := 		$(shell $(R_HOME)/bin/R CMD config --ldflags)
+RBLAS := 		$(shell $(R_HOME)/bin/R CMD config BLAS_LIBS)
+RLAPACK := 		$(shell $(R_HOME)/bin/R CMD config LAPACK_LIBS)
+
+
+## include headers and libraries for Rcpp interface classes
+## note that RCPPLIBS will be empty with Rcpp (>= 0.11.0) and can be omitted
+RCPPINCL := 		$(shell echo 'Rcpp:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave)
+RCPPLIBS := 		$(shell echo 'Rcpp:::LdFlags()'  | $(R_HOME)/bin/R --vanilla --slave)
+
+
+## include headers and libraries for RInside embedding classes
+RINSIDEINCL := 		$(shell echo 'RInside:::CxxFlags()' | $(R_HOME)/bin/R --vanilla --slave)
+RINSIDELIBS := 		$(shell echo 'RInside:::LdFlags()'  | $(R_HOME)/bin/R --vanilla --slave)
+
+
+## compiler etc settings used in default make rules
+CXX := 			$(shell $(R_HOME)/bin/R CMD config CXX)
+CPPFLAGS := 		-D STRICT_R_HEADERS -Wall -ggdb -O3 $(shell $(R_HOME)/bin/R CMD config CPPFLAGS)
+CXXFLAGS := 		$(MPICPPFLAGS) $(RCPPFLAGS) $(RCPPINCL) $(RINSIDEINCL) $(shell $(R_HOME)/bin/R CMD config CXXFLAGS)
+LDLIBS := 		$(MPILIBS) $(RLDFLAGS) $(RBLAS) $(RLAPACK) $(RCPPLIBS) $(RINSIDELIBS) -lcrypto ## -fpermissive
+
+default: all
+
+kin :			DHT.cpp worker.cpp dht_wrapper.cpp r_utils.cpp kin.cpp
+
+all : 			$(programs)
+			@test -x /usr/bin/strip && strip $^
+
+run :			$(programs)
+			@test -x /usr/bin/mpirun && for p in $(programs); do echo; echo "Running $$p:"; mpirun -n 4 ./$$p; done
+
+clean:
+			rm -vf $(programs)
+
--- a/src/RFun_Eval.R
+++ b/src/RFun_Eval.R
@ -0,0 +1,214 @@
+## Simple library of functions to assess and visualize the results of the coupled simulations
+
+## Time-stamp: "Last modified 2020-02-04 23:21:37 delucia"
+
+require(RedModRphree)
+require(Rmufits)  ## essentially for PlotCartCellData 
+
+
+## function which reads all simulation results in a given directory
+ReadRTSims <- function(dir) {
+    files_full <- list.files(dir, pattern="iter.*rds", full.names=TRUE)
+    files_name <- list.files(dir, pattern="iter.*rds", full.names=FALSE)
+    res <- lapply(files_full, readRDS)
+    names(res) <- gsub(".rds","",files_name, fixed=TRUE)
+    return(res)
+}
+
+## function which reads all successive DHT stored in a given directory
+ReadAllDHT <- function(dir) {
+    files_full <- list.files(dir, pattern="iter.*dht", full.names=TRUE)
+    files_name <- list.files(dir, pattern="iter.*dht", full.names=FALSE)
+    res <- lapply(files_full, ReadDHT)
+    names(res) <- gsub(".rds","",files_name, fixed=TRUE)
+    return(res)
+}
+
+## function which reads one .dht file and gives a matrix
+ReadDHT <- function(file)
+{
+    conn <- file(file, "rb")  ## open for reading in binary mode
+    if (!isSeekable(conn))
+        stop("Connection not seekable")
+
+    ## we first reposition ourselves to the end of the file...
+    tmp <- seek(conn, where=0, origin = "end")
+    ## ... and then back to the origin so to store the length in bytes
+    flen <- seek(conn, where=0, origin = "start")
+
+    ## we read the first 2 integers (4 bytes each) containing dimensions in bytes
+    dims <- readBin(conn, what="integer", n=2)
+
+    ## compute dimensions of the data
+    tots <- sum(dims)
+    ncol <- tots/8
+    nrow <- (flen - 8)/tots ## 8 here is 2*sizeof("int")
+    buff <- readBin(conn, what="double", n=ncol*nrow)
+    ## close connection
+    close(conn)
+    res <- matrix(buff, nrow=nrow, ncol=ncol, byrow=TRUE)
+    return(res)
+}
+
+## Scatter plots of each variable in the iteration 
+PlotScatter <- function(sam1, sam2, which=NULL, labs=c("NO DHT", "DHT"), pch=".", cols=3, ...)
+{
+    if ((!is.data.frame(sam1)) & ("T" %in% names(sam1)))
+        sam1 <- sam1$C
+    if ((!is.data.frame(sam2)) & ("T" %in% names(sam2)))
+        sam2 <- sam2$C
+    if (is.numeric(which))
+        inds <- which
+    else if (is.character(which))
+        inds <- match(which, colnames(sam1))
+    else if (is.null(which))
+        inds <- seq_along(colnames(sam1))
+
+    rows <- nrow(matrix(seq_along(inds), ncol=cols))
+    par(mfrow=c(rows, cols))
+    a <- lapply(inds, function(x) {
+        plot(sam1[,x], sam2[,x], main=colnames(sam1)[x],  xlab=labs[1], ylab=labs[2], pch=pch, col="red", ...)
+        abline(0,1, col="grey", cex=1.5)
+    })
+    invisible()
+}
+
+##### Some metrics for relative comparison
+
+## Using range as norm
+RranRMSE <- function (pred, obs)
+    sqrt(mean((pred - obs)^2))/abs(max(pred) - min(pred))
+
+## Using max val as norm
+RmaxRMSE <- function (pred, obs)
+    sqrt(mean((pred - obs)^2)/abs(max(pred)))
+
+## Using sd as norm
+RsdRMSE <- function (pred, obs)
+    sqrt(mean((pred - obs)^2))/sd(pred)
+
+## Using mean as norm
+RmeanRMSE <- function (pred, obs)
+    sqrt(mean((pred - obs)^2))/mean(pred)
+
+## Using mean as norm
+RAEmax <- function (pred, obs)
+    mean(abs(pred - obs))/max(pred)
+
+## Max absolute error
+MAE <- function (pred, obs)
+    max(abs(pred - obs))
+
+## workhorse function for ComputeErrors and its use with mapply
+AppliedFun <- function(a, b, .fun) mapply(.fun, as.list(a$C), as.list(b$C))
+
+## Compute the diffs between two simulation, iter by iter,
+## with a given metric (passed in form of function name to this function)
+ComputeErrors <- function(sim1, sim2, FUN=RMSE)
+{
+    if (length(sim1)!= length(sim2)) {
+        cat("The simulations do not have the same length, subsetting to the shortest\n")
+        a <- min(length(sim1), length(sim2))
+        sim1 <- sim1[1:a]
+        sim2 <- sim2[1:a]
+    }
+    if (!is.function(match.fun(FUN))) {
+        cat("Invalid function\n")
+
+    }
+
+    t(mapply(AppliedFun, sim1, sim2, MoreArgs=list(.fun=FUN)))
+}
+
+## Function to display the error progress between 2 simulations
+ErrorProgress <- function(mat, ignore, colors, metric, ...)
+{
+    if (missing(colors))
+        colors <- sample(rainbow(ncol(mat)))
+
+    if (missing(metric))
+        metric <- "Metric"
+
+    ## if the optional argument "ignore" (a character vector) is
+    ## passed, we remove the matching column names
+    if (!missing(ignore)) {
+        to_remove <- match(ignore, colnames(mat))
+        mat <- mat[, -to_remove]
+        colors <- colors[-to_remove]
+    }
+    
+    yc <- mat[nrow(mat),]
+    par(mar=c(5,4,2,6))
+    matplot(mat, type="l", lty=1, lwd=2, col=colors, xlab="iteration", ylab=metric, ...)
+    mtext(colnames(mat), side = 4, line = 2, outer = FALSE, at = yc, adj = 0.5, col = colors, las=2)
+}
+
+## Function which exports all simulations to ParaView's .vtu Requires
+## package RcppVTK
+ExportToParaview <- function(vtu, nameout, results){
+    require(RcppVTK)
+    n <- length(results)
+    vars <- colnames(results[[1]])
+    ## strip eventually present ".vtu" from nameout
+    nameout <- sub(".vtu", "", nameout, fixed=TRUE)
+    namesteps <- paste0(nameout,  ".", sprintf("%04d",seq(1,n)), ".vtu")
+    for (step in seq_along(results)) {
+        file.copy(from=vtu, to=namesteps[step], overwrite = TRUE)
+        cat(paste("Saving step ", step, " in file ", namesteps[step], "\n"))
+        ret <- ExportMatrixToVTU (fin=vtu, fout=namesteps[step], names=colnames(results[[step]]), mat=results[[step]])
+    }
+    invisible(ret)
+}
+
+
+## Version of Rmufits::PlotCartCellData with the ability to fix the
+## "breaks" for color coding of 2D simulations
+Plot2DCellData <- function (data, grid, nx, ny, contour = TRUE,
+                            nlevels = 12, breaks, palette = "heat.colors",
+                            rev.palette = TRUE, scale = TRUE, ...) 
+{
+    if (!missing(grid)) {
+        xc <- unique(sort(grid$cell$XCOORD))
+        yc <- unique(sort(grid$cell$YCOORD))
+        nx <- length(xc)
+        ny <- length(yc)
+        if (!length(data) == nx * ny) 
+            stop("Wrong nx, ny or grid")
+    }
+    else {
+        xc <- seq(1, nx)
+        yc <- seq(1, ny)
+    }
+    z <- matrix(round(data, 6), ncol = nx, nrow = ny, byrow = TRUE)
+    pp <- t(z[rev(seq(1, nrow(z))), ])
+
+    if (missing(breaks)) {
+        breaks <- pretty(data, n = nlevels)
+    }
+    
+    breakslen <- length(breaks)
+    colors <- do.call(palette, list(n = breakslen - 1))
+    if (rev.palette) 
+        colors <- rev(colors)
+    if (scale) {
+        par(mfrow = c(1, 2))
+        nf <- layout(matrix(c(1, 2), 1, 2, byrow = TRUE), widths = c(4, 
+            1))
+    }
+    par(las = 1, mar = c(5, 5, 3, 1))
+    image(xc, yc, pp, xlab = "X [m]", ylab = "Y[m]", las = 1, 
+        asp = 1, breaks = breaks, col = colors, axes = FALSE, 
+        ...)
+    axis(1)
+    axis(2)
+    if (contour) 
+        contour(unique(sort(xc)), unique(sort(yc)), pp, breaks = breaks, 
+            add = TRUE)
+    if (scale) {
+        par(las = 1, mar = c(5, 1, 5, 5))
+        PlotImageScale(data, breaks = breaks, add.axis = FALSE, 
+            axis.pos = 4, col = colors)
+        axis(4, at = breaks)
+    }
+    invisible(pp)
+}
--- a/src/SimComp2D.R
+++ b/src/SimComp2D.R
@ -0,0 +1,128 @@
+## chemical database
+db <- RPhreeFile("mdl_quint_kin.dat", is.db=TRUE)
+phreeqc::phrLoadDatabaseString(db)
+
+## only the directory
+demodir <- system.file("extdata", "demo_rtwithmufits", package="Rmufits")
+
+prop <- c("Al", "C","Ca","Cl","Fe", "K", "Mg","Na", "Si", "pH", ## "pe",
+          "Albite", "Calcite", "Chlorite", "Illite", "Kaolinite")
+
+signif_vector <- c(7,7,7,7,7,7,7,7,7,6, 5,5,5,5,5)
+prop_type <- rep("normal", length(signif_vector))
+
+base <- c("SOLUTION 1",
+          "units  mol/kgw",
+          "pH     6.77",
+          "temp  35",
+          "-water 1",
+          "Al  8.06386e-09",
+          "C   0.0006108294",
+          "Ca  0.09709463",
+          "Cl  4.340042",
+          "Fe  1.234357e-05",
+          "K   0.01117434",
+          "Mg  0.0406959",
+          "Na  4.189209",
+          "Si  0.0001935754",
+          "INCREMENTAL_REACTIONS true",
+          "KINETICS 1 ",
+          "-steps 86400",
+          "-bad_step_max 10000",
+          "-cvode  true",
+          "Albite",
+          "-m      8.432165", ## 1540.0",
+          "-parms  01.54  100",
+          "Calcite",
+          "-m   0.0",
+          "-parms  10  100",
+          "Chlorite",
+          "-m      1.106585", ## 202.100",
+          "-parms  64.84  100",
+          "Illite",
+          "-m      0.9549153", ## 174.400",
+          "-parms  43.38  100",
+          "Kaolinite",
+          "-m      0.0",
+          "-parms  29.17  100",
+          "END")
+
+selout <- c("KNOBS",
+            "-convergence_tolerance 1E-6",
+            "SELECTED_OUTPUT",
+            "-reset            false",
+            "USER_PUNCH",
+            "-head  Al C Ca Cl Fe K Mg Na Si pH Albite Calcite Chlorite Illite Kaolinite", ## pe
+            "10 PUNCH TOT(\"Al\"), TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Fe\"), TOT(\"K\"), TOT(\"Mg\"), TOT(\"Na\"), TOT(\"Si\"), -LA(\"H+\"), KIN(\"Albite\"), KIN(\"Calcite\"), KIN(\"Chlorite\"), KIN(\"Illite\"), KIN(\"Kaolinite\")" )
+
+## Define initial conditions as equilibrium with primary minerals
+ipr <- c(Al  = 8.689e-10,
+         C   = 0.0006108,
+         Ca  = 0.09709,
+         Cl  = 4.34,
+         Fe  = 1.802e-06,
+         K   = 0.01131,
+         Mg  = 0.04074,
+         Na  = 4.189,
+         Si  = 7.653e-05,
+         pH  = 6.889,
+         Albite    =  5.0,
+         Calcite   =  0.0,
+         Chlorite  = 10.0,
+         Illite    =  2.0,
+         Kaolinite =  0.0
+         )
+
+initstate <- matrix(rep(ipr, 2500), byrow=TRUE, ncol=length(ipr))
+colnames(initstate) <- names(ipr)
+
+vecinj <- c(Al= 8.694e-10,
+            C = 8.182e-01,
+            Ca= 9.710e-02,
+            Cl= 4.340e+00,
+            Fe= 1.778e-06,
+            K = 1.131e-02,
+            Mg= 4.074e-02,
+            Na= 4.189e+00,
+            Si= 7.652e-05,
+            pH= 2.556228)
+
+
+## setup boundary conditions for transport - we have already read the
+## GRID with the following code:
+## grid <- Rmufits::ReadGrid(paste0(demodir,"/d2ascii.run.GRID.SUM"))
+## cbound <- which(grid$cell$ACTNUM == 2)
+## dput(cbound)
+cbound <- c(1L, 50L, 100L, 150L, 200L, 250L, 300L, 350L, 400L, 450L, 500L,
+            550L, 600L, 650L, 700L, 750L, 800L, 850L, 900L, 950L, 1000L,
+            1050L, 1100L, 1150L, 1200L, 1250L, 1300L, 1350L, 1400L, 1450L,
+            1500L, 1550L, 1600L, 1650L, 1700L, 1750L, 1800L, 1850L, 1900L,
+            1950L, 2000L, 2050L, 2100L, 2150L, 2200L, 2250L, 2300L, 2350L,
+            2400L, 2450L, 2451L, 2452L, 2453L, 2454L, 2455L, 2456L, 2457L,
+            2458L, 2459L, 2460L, 2461L, 2462L, 2463L, 2464L, 2465L, 2466L,
+            2467L, 2468L, 2469L, 2470L, 2471L, 2472L, 2473L, 2474L, 2475L,
+            2476L, 2477L, 2478L, 2479L, 2480L, 2481L, 2482L, 2483L, 2484L,
+            2485L, 2486L, 2487L, 2488L, 2489L, 2490L, 2491L, 2492L, 2493L,
+            2494L, 2495L, 2496L, 2497L, 2498L, 2499L, 2500L)
+
+boundary_matrix <- matrix(rep(ipr[1:10], length(cbound)), byrow=TRUE, nrow=length(cbound))
+colnames(boundary_matrix) <- names(ipr[1:10])
+boundary_matrix[1, ] <- vecinj
+
+boundary_matrix <- cbind(cbound,boundary_matrix)
+
+setup <- list(n = 2500,
+              bound = boundary_matrix,
+              base  = base,
+              first = selout,
+              initsim = initstate,
+              Cf    = 1,
+              prop  = prop,
+              immobile = seq(11,15),
+              kin      = seq(11,15),
+              phase   = "FLUX1",
+              density = "DEN1",
+              reduce = FALSE,
+              snapshots = demodir, ## directory where we will read MUFITS SUM files
+              gridfile  = paste0(demodir,"/d2ascii.run.GRID.SUM")
+              )
--- a/src/SimCompKtz.R
+++ b/src/SimCompKtz.R
@ -0,0 +1,131 @@
+db <- RPhreeFile("mdl_quint_kin.dat", is.db=TRUE)
+phreeqc::phrLoadDatabaseString(db)
+
+## only the directory
+demodir <- "./snaps/"
+         
+base <- c("SOLUTION 1",
+          "units  mol/kgw",
+          "pH     6.77",
+          "temp  35",
+          "-water 1",
+          "Al  8.06386e-09",
+          "C   0.0006108294",
+          "Ca  0.09709463",
+          "Cl  4.340042",
+          "Fe  1.234357e-05",
+          "K   0.01117434", 
+          "Mg  0.0406959",
+          "Na  4.189209",
+          "Si  0.0001935754",
+          "INCREMENTAL_REACTIONS true",
+          "KINETICS 1 ",
+          "-steps 86400", 
+          "-bad_step_max 10000",
+          "-cvode  true",
+          "Albite",
+          "-m      8.432165", ## 1540.0",
+          "-parms  01.54  100",
+          "Calcite",
+          "-m   0.0",
+          "-parms  10  100",
+          "Chlorite",
+          "-m      1.106585", ## 202.100",
+          "-parms  64.84  100",
+          "Illite",
+          "-m      0.9549153", ## 174.400",
+          "-parms  43.38  100",
+          "Kaolinite",
+          "-m      0.0",
+          "-parms  29.17  100",
+          "END")
+
+selout <- c("KNOBS",
+            "-convergence_tolerance 1E-6",
+            "SELECTED_OUTPUT",
+            "-reset            false",
+            "USER_PUNCH",
+            "-head  Al C Ca Cl Fe K Mg Na Si pH Albite Calcite Chlorite Illite Kaolinite", ## pe 
+            "10 PUNCH TOT(\"Al\"), TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Fe\"), TOT(\"K\"), TOT(\"Mg\"), TOT(\"Na\"), TOT(\"Si\"), -LA(\"H+\"), KIN(\"Albite\"), KIN(\"Calcite\"), KIN(\"Chlorite\"), KIN(\"Illite\"), KIN(\"Kaolinite\")" )
+
+## Define initial conditions as equilibrium with primary minerals
+ipr <- c(Al  = 8.689e-10,
+         C   = 0.0006108,
+         Ca  = 0.09709,
+         Cl  = 4.34,
+         Fe  = 1.802e-06,
+         K   = 0.01131,
+         Mg  = 0.04074,
+         Na  = 4.189,
+         Si  = 7.653e-05,
+         pH  = 6.889,
+         Albite    =  5.0,
+         Calcite   =  0.0,
+         Chlorite  = 10.0,
+         Illite    =  2.0,
+         Kaolinite =  0.0
+         )
+
+initstate <- matrix(rep(ipr, 648420), byrow=TRUE, ncol=length(ipr))
+colnames(initstate) <- names(ipr)
+
+vecinj <- c(Al= 8.694e-10,
+            C = 8.182e-01,
+            Ca= 9.710e-02,
+            Cl= 4.340e+00,
+            Fe= 1.778e-06,
+            K = 1.131e-02,
+            Mg= 4.074e-02,
+            Na= 4.189e+00,
+            Si= 7.652e-05,
+            pH= 2.556228)
+
+            
+            
+
+prop <- c("Al", "C","Ca","Cl","Fe", "K", "Mg","Na", "Si", "pH", ## "pe",
+          "Albite", "Calcite", "Chlorite", "Illite", "Kaolinite")
+
+bound_elm <- c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
+               15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 
+               28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 
+               41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 
+               54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 
+               67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 
+               80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 
+               93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 
+               105L, 106L, 107L, 108L, 214L, 215L)
+
+inj_elm <- c(7426L, 18233L, 29040L, 39847L, 
+             50654L, 61461L, 72268L, 83075L, 93882L, 104689L, 115496L, 126303L, 
+             137110L, 147917L, 158724L, 169531L, 180338L, 191145L, 201952L, 
+             212759L, 223566L, 234373L, 245180L, 255987L, 266794L, 277601L, 
+             288408L, 299215L, 310022L, 320829L, 331636L, 342443L, 353250L, 
+             364057L, 374864L, 385671L, 396478L, 407285L, 418092L, 428899L, 
+             439706L, 450513L, 461320L, 472127L, 482934L, 493741L, 504548L, 
+             515355L)
+
+cbound <- inj_elm
+
+boundinit <- matrix(rep(vecinj, length(cbound)), ncol=length(vecinj), byrow=TRUE) 
+myboundmat <- cbind(cbound,boundinit)
+
+## distinguish between injection and real boundaries
+colnames(myboundmat) <- c("cbound", names(vecinj))
+
+setup <- list(n=648420,
+              base=base, 
+              bound=myboundmat, 
+              first=selout, 
+              initsim=initstate,  
+              Cf=1, 
+              prop=prop, 
+              immobile=seq(11,15),
+              kin= seq(11,15),
+              phase="FLUX1",
+              density="DENS",
+              reduce=TRUE,
+              snapshots="snaps/AllSnaps_cmp_v3.rds",
+              gridfile ="snaps/GridKtz_cmp_v3.rds")
+
+         
--- a/src/SimDol2D.R
+++ b/src/SimDol2D.R
@ -0,0 +1,118 @@
+## chemical database
+db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat",
+                             package="RedModRphree"), is.db=TRUE)
+
+phreeqc::phrLoadDatabaseString(db)
+
+## only the directory
+demodir <- system.file("extdata", "demo_rtwithmufits", package="Rmufits")
+
+prop <- c("C","Ca","Cl","Mg","pH","pe","O2g", "Calcite","Dolomite")
+
+signif_vector <- c(7,7,7,7,7,7,7,5,5)
+prop_type <- c("act","act","act","act","logact","logact","ignore","act","act")
+
+
+base <- c("SOLUTION 1",
+          "units mol/kgw",
+          "temp 25.0",
+          "water 1",
+          "pH 9.91 charge",
+          "pe 4.0",
+          "C   1.2279E-04",
+          "Ca  1.2279E-04",
+          "Mg 0.001",
+          "Cl 0.002",
+          "PURE 1",
+          "O2g -0.1675 10",
+          "KINETICS 1",
+          "-steps 100",
+          "-step_divide 100",
+          "-bad_step_max 2000",
+          "Calcite", "-m      0.000207",
+          "-parms  0.0032",
+          "Dolomite",
+          "-m      0.0",
+          "-parms  0.00032",
+          "END")
+
+selout <- c("SELECTED_OUTPUT", "-high_precision true", "-reset false",
+            "-time", "-soln", "-temperature true", "-water true",
+            "-pH", "-pe", "-totals C Ca Cl Mg",
+            "-kinetic_reactants Calcite Dolomite", "-equilibrium O2g")
+
+initsim <- c("SELECTED_OUTPUT", "-high_precision true",
+             "-reset false",
+             "USER_PUNCH",
+             "-head C Ca Cl Mg pH pe O2g Calcite Dolomite",
+             "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")",
+             "SOLUTION 1",
+             "units mol/kgw",
+             "temp 25.0", "water 1",
+             "pH 9.91 charge",
+             "pe 4.0",
+             "C   1.2279E-04",
+             "Ca  1.2279E-04",
+             "Cl 1E-12",
+             "Mg 1E-12",
+             "PURE 1",
+             "O2g     -0.6788 10.0",
+             "Calcite  0.0 2.07E-3",
+             "Dolomite 0.0 0.0",
+             "END")
+
+vecinj <- c("C"= 0,
+            "Ca"  = 0,
+            "Cl"  = 0.002,
+            "Mg"  = 0.001,
+            "pe"   = 4,
+            "pH"   = 7)
+
+init <- c("C(4)"= 1.2279E-4,
+          "Ca"  =1.2279E-4,
+          "Cl"  =0,        
+          "Mg"  =0,
+          "pe"  =4,
+          "pH"  =7,
+          "Calcite"= 2.07e-4,
+          "Dolomite"= 0)
+
+
+## setup boundary conditions for transport - we have already read the
+## GRID with the following code:
+## grid <- Rmufits::ReadGrid(paste0(demodir,"/d2ascii.run.GRID.SUM"))
+## cbound <- which(grid$cell$ACTNUM == 2)
+## dput(cbound)
+cbound <- c(1L, 50L, 100L, 150L, 200L, 250L, 300L, 350L, 400L, 450L, 500L, 
+            550L, 600L, 650L, 700L, 750L, 800L, 850L, 900L, 950L, 1000L, 
+            1050L, 1100L, 1150L, 1200L, 1250L, 1300L, 1350L, 1400L, 1450L, 
+            1500L, 1550L, 1600L, 1650L, 1700L, 1750L, 1800L, 1850L, 1900L, 
+            1950L, 2000L, 2050L, 2100L, 2150L, 2200L, 2250L, 2300L, 2350L, 
+            2400L, 2450L, 2451L, 2452L, 2453L, 2454L, 2455L, 2456L, 2457L, 
+            2458L, 2459L, 2460L, 2461L, 2462L, 2463L, 2464L, 2465L, 2466L, 
+            2467L, 2468L, 2469L, 2470L, 2471L, 2472L, 2473L, 2474L, 2475L, 
+            2476L, 2477L, 2478L, 2479L, 2480L, 2481L, 2482L, 2483L, 2484L, 
+            2485L, 2486L, 2487L, 2488L, 2489L, 2490L, 2491L, 2492L, 2493L, 
+            2494L, 2495L, 2496L, 2497L, 2498L, 2499L, 2500L)
+
+boundinit <- matrix(rep(init[-c(7,8)], length(cbound)), byrow=TRUE, nrow=length(cbound))
+myboundmat <- cbind(cbound,boundinit)
+myboundmat[cbound==1, c(2:7)] <- vecinj
+colnames(myboundmat) <- c("cbound", names(vecinj))
+
+setup <- list(n=2500,
+              bound=myboundmat,   
+              base=base,
+              first=selout,
+              initsim=initsim,
+              Cf=1,
+              prop=prop,
+              immobile=c(7,8,9),
+              kin= c(8,9),
+              ann=list(O2g=-0.1675), 
+              phase="FLUX1",
+              density="DEN1",
+              reduce=FALSE,
+              snapshots=demodir, ## directory where we will read MUFITS SUM files
+              gridfile=paste0(demodir,"/d2ascii.run.GRID.SUM")
+              )
--- a/src/SimDolKtz.R
+++ b/src/SimDolKtz.R
@ -0,0 +1,130 @@
+# library(RedModRphree)
+# library(Rmufits)
+# library(RcppVTK)
+
+db <- RPhreeFile(system.file("extdata", "phreeqc_kin.dat",
+                             package="RedModRphree"), is.db=TRUE)
+
+phreeqc::phrLoadDatabaseString(db)
+
+prop <- c("C","Ca","Cl","Mg","pH","pe","O2g", "Calcite","Dolomite")
+signif_vector <- c(7,7,7,7,7,7,7,5,5)
+prop_type <- c("act","act","act","act","logact","logact","ignore","act","act")
+
+
+base <- c("SOLUTION 1",
+          "units mol/kgw",
+          "temp 25.0",
+          "water 1",
+          "pH 9.91 charge",
+          "pe 4.0",
+          "C   1.2279E-04",
+          "Ca  1.2279E-04",
+          "Mg 0.001",
+          "Cl 0.002",
+          "PURE 1",
+          "O2g -0.1675 10",
+          "KINETICS 1",
+          "-steps 100",
+          "-step_divide 100",
+          "-bad_step_max 2000",
+          "Calcite", "-m      0.000207",
+          "-parms  0.0032",
+          "Dolomite",
+          "-m      0.0",
+          "-parms  0.00032",
+          "END")
+
+selout <- c("SELECTED_OUTPUT",
+            "-high_precision true",
+            "-reset false",
+            "-time",
+            "-soln",
+            "-temperature true",
+            "-water true",
+            "-pH",
+            "-pe",
+            "-totals C Ca Cl Mg",
+            "-kinetic_reactants Calcite Dolomite",
+            "-equilibrium O2g")
+
+initsim <- c("SELECTED_OUTPUT",
+             "-high_precision true",
+             "-reset false",
+             "USER_PUNCH",
+             "-head C Ca Cl Mg pH pe O2g Calcite Dolomite",
+             "10 PUNCH TOT(\"C\"), TOT(\"Ca\"), TOT(\"Cl\"), TOT(\"Mg\"), -LA(\"H+\"), -LA(\"e-\"), EQUI(\"O2g\"), EQUI(\"Calcite\"), EQUI(\"Dolomite\")",
+             "SOLUTION 1",
+             "units mol/kgw",
+             "temp 25.0", "water 1",
+             "pH 9.91 charge",
+             "pe 4.0",
+             "C   1.2279E-04",
+             "Ca  1.2279E-04",
+             "Cl 1E-12",
+             "Mg 1E-12",
+             "PURE 1",
+             "O2g     -0.6788 10.0",
+             "Calcite  0.0 2.07E-3",
+             "Dolomite 0.0 0.0",
+             "END")
+
+vecinj <- c("C"= 0,
+            "Ca"  = 0,
+            "Cl"  = 0.002,
+            "Mg"  = 0.001,
+            "pe"   = 4,
+            "pH"   = 7)
+
+init <- c("C(4)"= 1.2279E-4,
+          "Ca"  =1.2279E-4,
+          "Cl"  =0,        
+          "Mg"  =0,
+          "pe"  =4,
+          "pH"  =7,
+          "Calcite"= 2.07e-4,
+          "Dolomite"= 0)
+
+bound_elm <- c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 
+               15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 
+               28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 
+               41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 
+               54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 
+               67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 
+               80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 
+               93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 
+               105L, 106L, 107L, 108L, 214L, 215L)
+
+inj_elm <- c(7426L, 18233L, 29040L, 39847L, 
+             50654L, 61461L, 72268L, 83075L, 93882L, 104689L, 115496L, 126303L, 
+             137110L, 147917L, 158724L, 169531L, 180338L, 191145L, 201952L, 
+             212759L, 223566L, 234373L, 245180L, 255987L, 266794L, 277601L, 
+             288408L, 299215L, 310022L, 320829L, 331636L, 342443L, 353250L, 
+             364057L, 374864L, 385671L, 396478L, 407285L, 418092L, 428899L, 
+             439706L, 450513L, 461320L, 472127L, 482934L, 493741L, 504548L, 
+             515355L)
+
+cbound <- inj_elm
+
+boundinit <- matrix(rep(vecinj, length(cbound)), ncol=length(vecinj), byrow=TRUE) 
+myboundmat <- cbind(cbound,boundinit)
+
+## distinguish between injection and real boundaries
+colnames(myboundmat) <- c("cbound", names(vecinj))
+
+setup <- list(n=648420,
+              bound=myboundmat,
+              base=base,
+              first=selout,
+              initsim=initsim,
+              Cf=1,
+              prop=prop,
+              immobile=c(7,8,9),
+              kin= c(8,9),
+              ann=list(O2g=-0.1675),
+              phase="FLUX1",
+              density="DENS",
+              reduce=FALSE,
+              snapshots="snaps/AllSnaps_cmp_v3.rds",
+              gridfile ="snaps/GridKtz_cmp_v3.rds"
+              )
--- a/src/Test_Pro.R
+++ b/src/Test_Pro.R
@ -0,0 +1,115 @@
+## Example script for assessing simulation results with "RFun_Eval.R"
+## Time-stamp: "Last modified 2020-02-05 03:36:05 delucia"
+
+## Read all the needed functions
+source("RFun_Eval.R")
+
+
+
+## read the simulation results of a SimDol2D variant with setup$prolong
+sd <- ReadRTSims("test_prol4")
+
+simtimes <- sapply(sd, "[","simtime")
+
+
+## we still need to read the grid to perform 2D visualization
+demodir <- system.file("extdata", "demo_rtwithmufits", package="Rmufits")
+grid <- ReadGrid(paste0(demodir,"/d2ascii.run.GRID.SUM"))
+
+## fix the color scale
+br <- seq(0, 0.0012, length=13)
+
+
+## we want animation
+library(animation)
+
+## workhorse function to be used with package "animation"
+PlotAn <- function(tot, prop, grid, breaks)
+{
+    for (step in seq(1, length(tot))) {
+        snap <- tot[[step]]$C
+        time <- tot[[step]]$simtime/3600/24
+        ind <- match(prop, colnames(snap))
+        Plot2DCellData(snap[,ind], grid=grid, contour=FALSE, breaks=breaks, nlevels=length(breaks), scale=TRUE, main=paste0(prop," after ", time, "days"))
+    }
+}
+
+### simple animations ###
+
+## As gif
+saveGIF({
+    PlotAn(tot=sd, prop="Dolomite", grid=grid, breaks=br)
+      }, img.name = 'Dolomite_plot', movie.name="Dol2Dbreaks.gif",
+          interval = 0.5, nmax = length(sd))
+
+## as HTML
+saveHTML({
+    PlotAn(tot=sd, prop="Dolomite", grid=grid, breaks=br)
+      }, img.name = 'Dolomite_plot', htmlfile="dolomite.html",
+          interval = 0.5, nmax = length(sd))
+
+## For inclusion in latex
+saveLatex({
+    PlotAn(tot=sd, prop="Dolomite", grid=grid, breaks=br)
+      }, img.name = 'Dolomite_plot',
+          latex.filename = 'dolomite_prolong.tex',
+          interval = 0.5, nmax = length(sd))
+
+
+
+sd <- ReadRTSims("test_prol4")
+
+source("RFun_Eval.R")
+
+library(RedModRphree)
+library(Rmufits)
+library(RcppVTK)
+
+
+#### Evaluation of discrepancies of KtzDol_6p200_1_nodht_180 / KtzDol_6p200_1_dhtlog_180
+dht <- ReadRTSims("KtzDol_6p200_1_dhtlog_180")
+ref <- ReadRTSims("KtzDol_6p200_1_nodht_180")
+
+rmse <- ComputeErrors(dht, ref, FUN=RMSE)
+
+rae  <- ComputeErrors(dht, ref, FUN=RAEmax)
+
+mae  <- ComputeErrors(dht, ref, FUN=MAE)
+
+rel_max_rmse <- ComputeErrors(dht, ref, FUN=RmaxRMSE)
+
+## Visualize the 2 computed relative errors
+## start by defining a nice color palette using RColorBrewer
+mycol <- RColorBrewer::brewer.pal(8, "Dark2")
+
+## uncomment the next line to save this image to a pdf file
+cairo_pdf("Ref_VS_DHT.pdf", height=4, width=12)
+par(mfrow=c(1,2), family="serif")
+
+ErrorProgress(rmse,  colors=mycol, ignore=c("O2g", "pe", "Cl", "C"), log="y",
+              ylim=c(1E-12, 1E-3), las=1, main="Mean RMSE, reference vs. dht", metric="RMSE")
+abline(h=10^-seq(11,2), col="grey", lwd=1, lty="dashed")
+
+ErrorProgress(mae,  colors=mycol, ignore=c("O2g", "pe", "Cl", "C"), log="y",
+              ylim=c(1E-11, 1E-2), las=1, main="Max Absolute Error, reference vs. dht", metric="MAE")
+abline(h=10^-seq(10,3), col="grey", lwd=1, lty="dashed")
+dev.off()
+
+cairo_pdf("Scatter_Ref_VS_DHT.pdf", height=5, width=9)
+par(family="serif")
+PlotScatter(ref[[200]], dht[[200]],
+            which=c("Calcite", "Dolomite", "Ca", "Mg", "C", "pH"),
+            cols=3, labs=c("Reference", "DHT"), pch=".", cex=2)
+dev.off()
+
+time_ref <- readRDS("KtzDol_6p200_1_nodht_180/timings.rds")
+time_dht <- readRDS("KtzDol_6p200_1_dhtlog_180/timings.rds")
+
+
+## export to paraview
+resk <- lapply(res, function(x) return(data.matrix(x$C)))
+ExportToParaview("template_vtu_ketzinlarge.vtu", "KtzDol_6p200_1/paraview", results=resk)
+
+
+
+
--- a/src/Test_Validation.R
+++ b/src/Test_Validation.R
@ -0,0 +1,92 @@
+## Example script for assessing simulation results with "RFun_Eval.R"
+## Time-stamp: "Last modified 2020-01-27 17:23:25 delucia"
+
+## Read all the needed functions
+source("RFun_Eval.R")
+
+
+## Read all .rds in a given directory
+trds <- ReadRTSims("TestDHT_Log")
+## Read all .dht in a given directory
+tdht <- ReadAllDHT("TestDHT_Log")
+
+## control the content of a .dht
+rc <- trds[[1]]$C
+rt <- trds[[1]]$T
+dht <- tdht[[1]][, 10:18]
+## extract the unique rows of the first iteration (3 rows expected, as per dht)
+rtu <- mgcv::uniquecombs(rt)
+rcu <- mgcv::uniquecombs(rc)
+all.equal(attr(rtu,"index"), attr(rcu,"index"))
+colnames(dht) <- colnames(rcu)
+
+
+## compare _Log _NoLog and reference
+## First read all the simulations
+rlog <- ReadRTSims("TestDHT_Log")   ## all default values, np 4, with --dht-log
+nlog <- ReadRTSims("TestDHT_NoLog") ## all default values, np 4, without --dht-log
+ref  <- ReadRTSims("TestNoDHT")     ## all default values, np 4
+
+## Compute absolute RMSE for each variable
+rmse_lnl <- ComputeErrors(rlog, nlog, FUN=RMSE)
+## Compute relative RMSE for each variable using sd as norm
+rel_sd_rmse_lnl <- ComputeErrors(rlog, nlog, FUN=RsdRMSE)
+## Compute relative RMSE for each variable using MAX as norm
+rel_max_rmse_lnl <- ComputeErrors(rlog, nlog, FUN=RmaxRMSE)
+
+## Visualize the 2 computed relative errors
+## start by defining a nice color palette using RColorBrewer
+mycol <- RColorBrewer::brewer.pal(8, "Dark2")
+
+## uncomment the next line to save this image to a pdf file
+## cairo_pdf("RelErr_Dol_dhtlog_vs_nolog.pdf", height=9, width=12)
+par(mfrow=c(1,2))
+ErrorProgress(rel_sd_rmse_lnl,  colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs dht, norm=SD", metric="RMSE_SD")
+## add a grid
+abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
+ErrorProgress(rel_max_rmse_lnl, colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs dht, norm=MAX", metric="RMSE_MAX")
+abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
+
+## uncomment the next line when saving to pdf
+## dev.off()
+
+
+## Visualize Scatter Plot of each variable
+PlotScatter(rlog[[20]], nlog[[20]], labs=c("DHT LOG", "DHT"))
+
+PlotScatter(rlog[[4]], nlog[[4]], labs=c("DHT LOG", "DHT"))
+
+## Same as before but between dht-log and ref
+rmse_logref <- ComputeErrors(rlog, ref, FUN=RMSE)
+rel_sd_rmse_logref <- ComputeErrors(rlog, ref, FUN=RsdRMSE)
+rel_max_rmse_logref <- ComputeErrors(rlog, ref, FUN=RmaxRMSE)
+
+cairo_pdf("RelErr_Dol_ref_vs_dhtlog.pdf", height=9, width=12)
+par(mfrow=c(1,2))
+ErrorProgress(rel_sd_rmse_logref,  colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs Ref, norm=SD", metric="RMSE_SD")
+abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
+ErrorProgress(rel_max_rmse_logref,  colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-log vs Ref, norm=MAX", metric="RMSE_MAX")
+abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
+
+dev.off()
+
+x11(); PlotScatter(rlog[[20]]$C, ref[[20]]$C, labs=c("DHT log", "ref"))
+
+rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RMSE)
+rel_sd_rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RsdRMSE)
+ErrorProgress(rel_sd_rmse_nlogref, log="y")
+PlotScatter(nlog[[20]]$C, ref[[20]]$C, labs=c("DHT no log", "ref"))
+
+## Same as before but between dht-nolog and ref
+rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RMSE)
+rel_sd_rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RsdRMSE)
+rel_max_rmse_nlogref <- ComputeErrors(nlog, ref, FUN=RmaxRMSE)
+
+cairo_pdf("RelErr_Dol_ref_vs_dht.pdf", height=9, width=12)
+par(mfrow=c(1,2))
+ErrorProgress(rel_sd_rmse_nlogref,  colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-nolog vs Ref, norm=SD", metric="RMSE_SD")
+abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
+ErrorProgress(rel_max_rmse_nlogref,  colors=mycol, ignore=c("O2g"), log="y", ylim=c(1E-08, 1E-3), las=1, main="Rel.err dht-nolog vs Ref, norm=MAX", metric="RMSE_MAX")
+abline(h=10^-seq(8,3), col="grey", lwd=0.5, lty="dashed")
+
+dev.off()
--- a/src/argh.h
+++ b/src/argh.h
@ -0,0 +1,432 @@
+#pragma once
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <cassert>
+
+namespace argh
+{
+   // Terminology:
+   // A command line is composed of 2 types of args:
+   // 1. Positional args, i.e. free standing values
+   // 2. Options: args beginning with '-'. We identify two kinds:
+   //    2.1: Flags: boolean options =>  (exist ? true : false)
+   //    2.2: Parameters: a name followed by a non-option value
+
+#if !defined(__GNUC__) || (__GNUC__ >= 5)
+   using string_stream = std::istringstream;
+#else
+    // Until GCC 5, istringstream did not have a move constructor.
+    // stringstream_proxy is used instead, as a workaround.
+   class stringstream_proxy
+   {
+   public:
+      stringstream_proxy() = default;
+
+      // Construct with a value.
+      stringstream_proxy(std::string const& value) :
+         stream_(value)
+      {}
+
+      // Copy constructor.
+      stringstream_proxy(const stringstream_proxy& other) :
+         stream_(other.stream_.str())
+      {
+         stream_.setstate(other.stream_.rdstate());
+      }
+
+      void setstate(std::ios_base::iostate state) { stream_.setstate(state); }
+
+      // Stream out the value of the parameter.
+      // If the conversion was not possible, the stream will enter the fail state,
+      // and operator bool will return false.
+      template<typename T>
+      stringstream_proxy& operator >> (T& thing)
+      {
+         stream_ >> thing;
+         return *this;
+      }
+
+
+      // Get the string value.
+      std::string str() const { return stream_.str(); }
+
+      std::stringbuf* rdbuf() const { return stream_.rdbuf(); }
+
+      // Check the state of the stream. 
+      // False when the most recent stream operation failed
+      operator bool() const { return !!stream_; }
+
+      ~stringstream_proxy() = default;
+   private:
+      std::istringstream stream_;
+   };
+   using string_stream = stringstream_proxy;
+#endif
+
+   class parser
+   {
+   public:
+      enum Mode { PREFER_FLAG_FOR_UNREG_OPTION = 1 << 0,
+                  PREFER_PARAM_FOR_UNREG_OPTION = 1 << 1,
+                  NO_SPLIT_ON_EQUALSIGN = 1 << 2,
+                  SINGLE_DASH_IS_MULTIFLAG = 1 << 3,
+                };
+
+      parser() = default;
+
+      parser(std::initializer_list<char const* const> pre_reg_names)
+      {  add_params(pre_reg_names); }
+
+      parser(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
+      {  parse(argv, mode); }
+
+      parser(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION)
+      {  parse(argc, argv, mode); }
+
+      void add_param(std::string const& name);
+      void add_params(std::initializer_list<char const* const> init_list);
+
+      void parse(const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
+      void parse(int argc, const char* const argv[], int mode = PREFER_FLAG_FOR_UNREG_OPTION);
+
+      std::multiset<std::string>          const& flags()    const { return flags_;    }
+      std::map<std::string, std::string>  const& params()   const { return params_;   }
+      std::vector<std::string>            const& pos_args() const { return pos_args_; }
+
+      // begin() and end() for using range-for over positional args.
+      std::vector<std::string>::const_iterator begin() const { return pos_args_.cbegin(); }
+      std::vector<std::string>::const_iterator end()   const { return pos_args_.cend();   }
+      size_t size()                                    const { return pos_args_.size();   }
+
+      //////////////////////////////////////////////////////////////////////////
+      // Accessors
+
+      // flag (boolean) accessors: return true if the flag appeared, otherwise false.
+      bool operator[](std::string const& name) const;
+
+      // multiple flag (boolean) accessors: return true if at least one of the flag appeared, otherwise false.
+      bool operator[](std::initializer_list<char const* const> init_list) const;
+
+      // returns positional arg string by order. Like argv[] but without the options
+      std::string const& operator[](size_t ind) const;
+
+      // returns a std::istream that can be used to convert a positional arg to a typed value.
+      string_stream operator()(size_t ind) const;
+
+      // same as above, but with a default value in case the arg is missing (index out of range).
+      template<typename T>
+      string_stream operator()(size_t ind, T&& def_val) const;
+
+      // parameter accessors, give a name get an std::istream that can be used to convert to a typed value.
+      // call .str() on result to get as string
+      string_stream operator()(std::string const& name) const;
+
+      // accessor for a parameter with multiple names, give a list of names, get an std::istream that can be used to convert to a typed value.
+      // call .str() on result to get as string
+      // returns the first value in the list to be found.
+      string_stream operator()(std::initializer_list<char const* const> init_list) const;
+
+      // same as above, but with a default value in case the param was missing.
+      // Non-string def_val types must have an operator<<() (output stream operator)
+      // If T only has an input stream operator, pass the string version of the type as in "3" instead of 3.
+      template<typename T>
+      string_stream operator()(std::string const& name, T&& def_val) const;
+
+      // same as above but for a list of names. returns the first value to be found.
+      template<typename T>
+      string_stream operator()(std::initializer_list<char const* const> init_list, T&& def_val) const;
+
+   private:
+      string_stream bad_stream() const;
+      std::string trim_leading_dashes(std::string const& name) const;
+      bool is_number(std::string const& arg) const;
+      bool is_option(std::string const& arg) const;
+      bool got_flag(std::string const& name) const;
+      bool is_param(std::string const& name) const;
+
+   private:
+      std::vector<std::string> args_;
+      std::map<std::string, std::string> params_;
+      std::vector<std::string> pos_args_;
+      std::multiset<std::string> flags_;
+      std::set<std::string> registeredParams_;
+      std::string empty_;
+   };
+
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::parse(const char * const argv[], int mode)
+   {
+      int argc = 0;
+      for (auto argvp = argv; *argvp; ++argc, ++argvp);
+      parse(argc, argv, mode);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::parse(int argc, const char* const argv[], int mode /*= PREFER_FLAG_FOR_UNREG_OPTION*/)
+   {
+      // convert to strings
+      args_.resize(argc);
+      std::transform(argv, argv + argc, args_.begin(), [](const char* const arg) { return arg;  });
+
+      // parse line
+      for (auto i = 0u; i < args_.size(); ++i)
+      {
+         if (!is_option(args_[i]))
+         {
+            pos_args_.emplace_back(args_[i]);
+            continue;
+         }
+
+         auto name = trim_leading_dashes(args_[i]);
+
+         if (!(mode & NO_SPLIT_ON_EQUALSIGN))
+         {
+            auto equalPos = name.find('=');
+            if (equalPos != std::string::npos)
+            {
+               params_.insert({ name.substr(0, equalPos), name.substr(equalPos + 1) });
+               continue;
+            }
+         }
+
+         // if the option is unregistered and should be a multi-flag
+         if (1 == (args_[i].size() - name.size()) &&         // single dash
+            argh::parser::SINGLE_DASH_IS_MULTIFLAG & mode && // multi-flag mode
+            !is_param(name))                                  // unregistered
+         {
+            std::string keep_param; 
+            
+            if (!name.empty() && is_param(std::string(1ul, name.back()))) // last char is param
+            {
+               keep_param += name.back();
+               name.resize(name.size() - 1);
+            }
+
+            for (auto const& c : name)
+            {
+               flags_.emplace(std::string{ c });
+            }
+
+            if (!keep_param.empty())
+            {
+               name = keep_param;
+            }
+            else
+            {
+               continue; // do not consider other options for this arg
+            }
+         }
+
+         // any potential option will get as its value the next arg, unless that arg is an option too
+         // in that case it will be determined a flag.
+         if (i == args_.size() - 1 || is_option(args_[i + 1]))
+         {
+            flags_.emplace(name);
+            continue;
+         }
+
+         // if 'name' is a pre-registered option, then the next arg cannot be a free parameter to it is skipped
+         // otherwise we have 2 modes:
+         // PREFER_FLAG_FOR_UNREG_OPTION: a non-registered 'name' is determined a flag. 
+         //                               The following value (the next arg) will be a free parameter.
+         //
+         // PREFER_PARAM_FOR_UNREG_OPTION: a non-registered 'name' is determined a parameter, the next arg
+         //                                will be the value of that option.
+
+         assert(!(mode & argh::parser::PREFER_FLAG_FOR_UNREG_OPTION)
+             || !(mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION));
+
+         bool preferParam = mode & argh::parser::PREFER_PARAM_FOR_UNREG_OPTION;
+
+         if (is_param(name) || preferParam)
+         {
+            params_.insert({ name, args_[i + 1] });
+            ++i; // skip next value, it is not a free parameter
+            continue;
+         }
+         else
+         {
+            flags_.emplace(name);
+         }
+      };
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::bad_stream() const
+   {
+      string_stream bad;
+      bad.setstate(std::ios_base::failbit);
+      return bad;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::is_number(std::string const& arg) const
+   {
+      // inefficient but simple way to determine if a string is a number (which can start with a '-')
+      std::istringstream istr(arg);
+      double number;
+      istr >> number;
+      return !(istr.fail() || istr.bad());
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::is_option(std::string const& arg) const
+   {
+      assert(0 != arg.size());
+      if (is_number(arg))
+         return false;
+      return '-' == arg[0];
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline std::string parser::trim_leading_dashes(std::string const& name) const
+   {
+      auto pos = name.find_first_not_of('-');
+      return std::string::npos != pos ? name.substr(pos) : name;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool argh::parser::got_flag(std::string const& name) const
+   {
+      return flags_.end() != flags_.find(trim_leading_dashes(name));
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool argh::parser::is_param(std::string const& name) const
+   {
+      return registeredParams_.count(name);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::operator[](std::string const& name) const
+   {
+      return got_flag(name);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline bool parser::operator[](std::initializer_list<char const* const> init_list) const
+   {
+      return std::any_of(init_list.begin(), init_list.end(), [&](char const* const name) { return got_flag(name); });
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline std::string const& parser::operator[](size_t ind) const
+   {
+      if (ind < pos_args_.size())
+         return pos_args_[ind];
+      return empty_;
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(std::string const& name) const
+   {
+      auto optIt = params_.find(trim_leading_dashes(name));
+      if (params_.end() != optIt)
+         return string_stream(optIt->second);
+      return bad_stream();
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(std::initializer_list<char const* const> init_list) const
+   {
+      for (auto& name : init_list)
+      {
+         auto optIt = params_.find(trim_leading_dashes(name));
+         if (params_.end() != optIt)
+            return string_stream(optIt->second);
+      }
+      return bad_stream();
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   template<typename T>
+   string_stream parser::operator()(std::string const& name, T&& def_val) const
+   {
+      auto optIt = params_.find(trim_leading_dashes(name));
+      if (params_.end() != optIt)
+         return string_stream(optIt->second);
+
+      std::ostringstream ostr;
+      ostr << def_val;
+      return string_stream(ostr.str()); // use default
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   // same as above but for a list of names. returns the first value to be found.
+   template<typename T>
+   string_stream parser::operator()(std::initializer_list<char const* const> init_list, T&& def_val) const
+   {
+      for (auto& name : init_list)
+      {
+         auto optIt = params_.find(trim_leading_dashes(name));
+         if (params_.end() != optIt)
+            return string_stream(optIt->second);
+      }      
+      std::ostringstream ostr;
+      ostr << def_val;
+      return string_stream(ostr.str()); // use default
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline string_stream parser::operator()(size_t ind) const
+   {
+      if (pos_args_.size() <= ind)
+         return bad_stream();
+
+      return string_stream(pos_args_[ind]);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   template<typename T>
+   string_stream parser::operator()(size_t ind, T&& def_val) const
+   {
+      if (pos_args_.size() <= ind)
+      {
+         std::ostringstream ostr;
+         ostr << def_val;
+         return string_stream(ostr.str());
+      }
+
+      return string_stream(pos_args_[ind]);
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::add_param(std::string const& name)
+   {
+      registeredParams_.insert(trim_leading_dashes(name));
+   }
+
+   //////////////////////////////////////////////////////////////////////////
+
+   inline void parser::add_params(std::initializer_list<char const* const> init_list)
+   {
+      for (auto& name : init_list)
+         registeredParams_.insert(trim_leading_dashes(name));
+   }
+}
+
+
--- a/src/dht_wrapper.cpp
+++ b/src/dht_wrapper.cpp
@ -0,0 +1,175 @@
+#include "dht_wrapper.h"
+#include <openssl/md5.h>
+
+/*init globals*/
+bool dht_enabled;
+int dht_snaps;
+int dht_strategy;
+int dht_significant_digits;
+std::string dht_file;
+std::vector<int> dht_significant_digits_vector;
+std::vector<string> prop_type_vector;
+bool dht_logarithm;
+uint64_t dht_size_per_process;
+uint64_t dht_hits, dht_miss, dht_collision;
+RInside *R_DHT;
+std::vector<bool> dht_flags;
+DHT *dht_object;
+
+double *fuzzing_buffer;
+
+bool dt_differ;
+/*functions*/
+
+uint64_t get_md5(int key_size, void *key) {
+    MD5_CTX ctx;
+    unsigned char sum[MD5_DIGEST_LENGTH];
+    uint64_t retval, *v1, *v2;
+
+    MD5_Init(&ctx);
+    MD5_Update(&ctx, key, key_size);
+    MD5_Final(sum, &ctx);
+
+    v1 = (uint64_t *)&sum[0];
+    v2 = (uint64_t *)&sum[8];
+    retval = *v1 ^ *v2;
+
+    return retval;
+}
+
+double Round_off(RInside &R, double N, double n) {
+    double result;
+    R["roundsig"] = n;
+    R["roundin"] = N;
+
+    result = R.parseEval("signif(roundin, digits=roundsig)");
+
+    return result;
+}
+
+/*
+*   Stores fuzzed version of key in fuzzing_buffer
+*/
+void fuzz_for_dht(RInside &R, int var_count, void *key, double dt) {
+    unsigned int i = 0;
+    //introduce fuzzing to allow more hits in DHT
+    for (i = 0; i < (unsigned int)var_count; i++) {
+        if (prop_type_vector[i] == "act") {
+            //with log10
+            if (dht_logarithm) {
+                if (((double *)key)[i] < 0)
+                    cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Negative value at key!" << endl;
+                else if (((double *)key)[i] == 0)
+                    fuzzing_buffer[i] = 0;
+                else
+                    //fuzzing_buffer[i] = Round_off(R, std::log10(((double *)key)[i]), dht_significant_digits_vector[i] - 1);
+                    fuzzing_buffer[i] = ROUND(-(std::log10(((double *)key)[i])), dht_significant_digits_vector[i]);
+            } else {
+                //without log10
+                //fuzzing_buffer[i] = Round_off(R, ((double *)key)[i], dht_significant_digits_vector[i]);
+                fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_significant_digits_vector[i]);
+            }
+        } else if (prop_type_vector[i] == "logact") {
+            //fuzzing_buffer[i] = Round_off(R, ((double *)key)[i], dht_significant_digits_vector[i]);
+            fuzzing_buffer[i] = ROUND((((double *)key)[i]), dht_significant_digits_vector[i]);
+        } else if (prop_type_vector[i] == "ignore") {
+            fuzzing_buffer[i] = 0;
+        } else {
+            cerr << "dht_wrapper.cpp::fuzz_for_dht(): Warning! Probably wrong prop_type!" << endl;
+        }
+    }
+    if (dt_differ)
+        fuzzing_buffer[var_count] = dt;
+}
+
+void check_dht(RInside &R, int length, std::vector<bool> &out_result_index, double *work_package) {
+    void *key;
+    int res;
+    int var_count = prop_type_vector.size();
+    double dt;
+
+    dt = R.parseEval("mysetup$dt");
+
+    for (int i = 0; i < length; i++) {
+        key = (void *)&(work_package[i * var_count]);
+
+        //fuzz data (round, logarithm etc.)
+        fuzz_for_dht(R, var_count, key, dt);
+
+        //overwrite input with data from DHT, IF value is found in DHT
+        res = DHT_read(dht_object, fuzzing_buffer, key);
+
+        if (res == DHT_SUCCESS) {
+            //flag that this line is replaced by DHT-value, do not simulate!!
+            out_result_index[i] = false;
+            dht_hits++;
+        } else if (res == DHT_READ_ERROR) {
+            //this line is untouched, simulation is needed
+            out_result_index[i] = true;
+            dht_miss++;
+        } else {
+            //MPI ERROR ... WHAT TO DO NOW?
+            //RUNNING CIRCLES WHILE SCREAMING
+        }
+    }
+}
+
+void fill_dht(RInside &R, int length, std::vector<bool> &result_index, double *work_package, double *results) {
+    void *key;
+    void *data;
+    int res;
+    int var_count = prop_type_vector.size();
+    double dt;
+
+    dt = R.parseEval("mysetup$dt");
+
+    for (int i = 0; i < length; i++) {
+        key = (void *)&(work_package[i * var_count]);
+        data = (void *)&(results[i * var_count]);
+
+        if (result_index[i]) {
+            //If true -> was simulated, needs to be inserted into dht
+
+            //fuzz data (round, logarithm etc.)
+            fuzz_for_dht(R, var_count, key, dt);
+
+            res = DHT_write(dht_object, fuzzing_buffer, data);
+
+            if (res != DHT_SUCCESS) {
+                if (res == DHT_WRITE_SUCCESS_WITH_COLLISION) {
+                    dht_collision++;
+                } else {
+                    //MPI ERROR ... WHAT TO DO NOW?
+                    //RUNNING CIRCLES WHILE SCREAMING
+                }
+            }
+        }
+    }
+}
+
+void print_statistics() {
+    int res;
+
+    res = DHT_print_statistics(dht_object);
+
+    if (res != DHT_SUCCESS) {
+        //MPI ERROR ... WHAT TO DO NOW?
+        //RUNNING CIRCLES WHILE SCREAMING
+    }
+}
+
+int table_to_file(char *filename) {
+    int res = DHT_to_file(dht_object, filename);
+    return res;
+}
+
+int file_to_table(char *filename) {
+
+    int res = DHT_from_file(dht_object, filename);
+    if (res != DHT_SUCCESS) 
+        return res;
+
+    DHT_print_statistics(dht_object);
+
+    return DHT_SUCCESS;
+}
--- a/src/dht_wrapper.h
+++ b/src/dht_wrapper.h
@ -0,0 +1,50 @@
+#pragma once
+#include <RInside.h> 
+#include <string>
+#include <vector>
+#include <math.h>
+#include "DHT.h"
+
+using namespace std;
+using namespace Rcpp;
+
+/*Functions*/
+uint64_t get_md5(int key_size, void* key);
+void fuzz_for_dht(RInside &R, int var_count, void *key, double dt);
+void check_dht(RInside &R, int length, std::vector<bool> &out_result_index, double *work_package);
+void fill_dht(RInside &R, int length, std::vector<bool> &result_index, double *work_package, double *results);
+void print_statistics();
+int table_to_file(char* filename);
+int file_to_table(char* filename);
+
+/*globals*/
+extern bool dht_enabled;
+extern int dht_snaps;
+extern std::string dht_file;
+extern bool dt_differ;
+
+//Default DHT Size per process in Byte (defaults to 1 GiB)
+#define DHT_SIZE_PER_PROCESS 1073741824
+
+//sets default dht access and distribution strategy
+#define DHT_STRATEGY 0
+// 0 -> DHT is on workers, access from workers only
+// 1 -> DHT is on workers + master, access from master only !NOT IMPLEMENTED YET!
+
+#define ROUND(value,signif) (((int) (pow(10.0, (double) signif) * value)) * pow(10.0, (double) -signif))
+
+extern int dht_strategy;
+extern int dht_significant_digits;
+extern std::vector<int> dht_significant_digits_vector;
+extern std::vector<string> prop_type_vector;
+extern bool dht_logarithm;
+extern uint64_t dht_size_per_process;
+
+//global DHT object, can be NULL if not initialized, check strategy
+extern DHT* dht_object;
+
+//DHT Performance counter
+extern uint64_t dht_hits, dht_miss, dht_collision;
+
+extern double* fuzzing_buffer;
+extern std::vector<bool> dht_flags;
--- a/src/global_buffer.h
+++ b/src/global_buffer.h
@ -0,0 +1,9 @@
+#pragma once
+
+#define BUFFER_OFFSET 5
+
+/*Globals*/
+extern double* mpi_buffer;
+extern double* mpi_buffer_results;
+
+extern uint32_t work_package_size; 
--- a/src/kin.cpp
+++ b/src/kin.cpp
@ -0,0 +1,818 @@
+#include <string>
+#include <vector>
+#include <iostream>
+#include <cstring>
+
+#include <RInside.h>  // for the embedded R via RInside
+ 
+#include <mpi.h>      // mpi header file
+
+#include "argh.h"     // Argument handler https://github.com/adishavit/argh BSD-licenced
+#include "DHT.h"      // MPI-DHT Implementation
+#include "worker.h"
+#include "r_utils.h"
+#include "dht_wrapper.h"
+#include "global_buffer.h"
+
+using namespace std;
+using namespace Rcpp;
+
+double* mpi_buffer;
+double* mpi_buffer_results;
+
+uint32_t work_package_size; 
+#define WORK_PACKAGE_SIZE_DEFAULT 5
+
+bool store_result;
+
+std::set<std::string> paramList() {
+    std::set<std::string> options;
+    //global
+    options.insert("work-package-size");
+    //only DHT
+    options.insert("dht-signif");
+    options.insert("dht-strategy");
+    options.insert("dht-size");
+    options.insert("dht-snaps");
+    options.insert("dht-file");
+
+    return options;
+}
+
+std::set<std::string> flagList() {
+    std::set<std::string> options;
+    //global
+    options.insert("ignore-result");
+    //only DHT
+    options.insert("dht");
+    options.insert("dht-log");
+
+    return options;
+}
+
+std::list<std::string> checkOptions(argh::parser cmdl) {
+    std::list<std::string> retList;
+    std::set<std::string> flist = flagList();
+    std::set<std::string> plist = paramList();
+
+    for (auto& flag: cmdl.flags()) {
+        if (!(flist.find(flag) != flist.end())) retList.push_back(flag);
+    }
+
+    for (auto& param: cmdl.params()) {
+        if (!(plist.find(param.first) != plist.end())) retList.push_back(param.first);
+    }
+
+    return retList;
+}
+
+typedef struct 
+{
+    char has_work;
+    double* send_addr;
+} worker_struct;
+
+
+int main(int argc, char *argv[]) {
+    double sim_start, sim_b_transport, sim_a_transport, sim_b_chemistry, sim_a_chemistry,
+	sim_end;
+
+    double cummul_transport = 0.f;
+    double cummul_chemistry = 0.f;
+    double cummul_workers   = 0.f;
+    double cummul_chemistry_master   = 0.f;
+
+    double cummul_master_seq_pre_loop = 0.f;
+    double cummul_master_seq_loop = 0.f;
+    double master_idle = 0.f;
+
+    double master_send_a, master_send_b;
+    double cummul_master_send = 0.f;
+    double master_recv_a, master_recv_b;
+    double cummul_master_recv = 0.f;
+
+    double sim_a_seq, sim_b_seq, sim_c_seq, sim_d_seq;
+    double idle_a, idle_b;
+
+    double sim_c_chemistry, sim_d_chemistry;
+    double sim_e_chemistry, sim_f_chemistry;
+
+    argh::parser cmdl(argv);
+
+    // cout << "CPP: Start Init (MPI)" << endl;
+
+    MPI_Init( &argc, &argv );
+
+    int world_size;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+
+    int world_rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+    /*Create custom Communicator with all processes except 0 (the master) for DHT storage*/
+    //only needed if strategy == 0, but done anyway
+    MPI_Group group_world;
+    MPI_Group dht_group;
+    MPI_Comm  dht_comm;
+    int* process_ranks;
+    
+    // make a list of processes in the new communicator
+    process_ranks= (int*) malloc(world_size*sizeof(int));
+    for(int I = 1; I < world_size; I++)
+        process_ranks[I-1] = I; 
+        
+    //get the group under MPI_COMM_WORLD
+    MPI_Comm_group(MPI_COMM_WORLD, &group_world); 
+    //create the new group
+    MPI_Group_incl(group_world, world_size-1, process_ranks, &dht_group); 
+    // create the new communicator
+    MPI_Comm_create(MPI_COMM_WORLD, dht_group, &dht_comm);
+    free (process_ranks); //cleanup
+    // cout << "Done";
+
+    if (cmdl[{"help", "h"}]) {
+        if (world_rank == 0) {
+            cout << "Todo" << endl <<
+            "See README.md for further information." << endl;
+        }
+        MPI_Finalize();
+        return EXIT_SUCCESS;
+    }
+
+    /*INIT is now done separately in an R file provided here as argument!*/
+    if (!cmdl(2)) {
+        if (world_rank == 0) {
+            cerr << "ERROR. Kin needs 2 positional arguments: " << endl <<
+            "1) the R script defining your simulation and" << endl <<
+            "2) the directory prefix where to save results and profiling" << endl;
+        }
+        MPI_Finalize();
+        return EXIT_FAILURE;
+    }
+    
+    std::list<std::string> optionsError = checkOptions(cmdl);
+    if (!optionsError.empty()) {
+        if (world_rank == 0) {
+            cerr << "Unrecognized option(s):\n" << endl;
+            for (auto option: optionsError) {
+                cerr << option << endl;
+            }
+            cerr << "\nMake sure to use available options. Exiting!" << endl;
+        }
+        MPI_Finalize();
+        return EXIT_FAILURE;
+    }
+
+    /*Parse DHT arguments*/
+    dht_enabled = cmdl["dht"];
+    // cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n';
+    
+    if (dht_enabled) {
+        cmdl("dht-strategy", 0) >> dht_strategy;
+        // cout << "CPP: DHT strategy is " << dht_strategy << endl;
+        
+        cmdl("dht-signif", 5) >> dht_significant_digits;
+        // cout << "CPP: DHT significant digits = " << dht_significant_digits << endl;
+        
+        dht_logarithm = cmdl["dht-log"];
+        // cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" : "OFF" ) << endl;
+        
+        cmdl("dht-size", DHT_SIZE_PER_PROCESS) >> dht_size_per_process;
+        // cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << endl;
+
+        cmdl("dht-snaps", 0) >> dht_snaps;
+
+        cmdl("dht-file") >> dht_file;
+    }
+
+    /*Parse work package size*/
+    cmdl("work-package-size", WORK_PACKAGE_SIZE_DEFAULT) >> work_package_size;
+
+
+    /*Parse output options*/
+    store_result = !cmdl["ignore-result"];
+
+
+    if (world_rank==0) {
+        cout << "CPP: Complete results storage is " << ( store_result ? "ON" : "OFF" ) << endl;
+        cout << "CPP: Work Package Size: " << work_package_size << endl;
+        cout << "CPP: DHT is " << ( dht_enabled ? "ON" : "OFF" ) << '\n';
+
+        if (dht_enabled) {
+            cout << "CPP: DHT strategy is " << dht_strategy << endl;
+            cout << "CPP: DHT key default digits (ignored if 'signif_vector' is defined) = " << dht_significant_digits << endl;
+            cout << "CPP: DHT logarithm before rounding: " << ( dht_logarithm ? "ON" : "OFF" ) << endl;
+            cout << "CPP: DHT size per process (Byte) = " << dht_size_per_process << endl;
+            cout << "CPP: DHT save snapshots is " << dht_snaps << endl;
+            cout << "CPP: DHT load file is " << dht_file << endl;
+        }
+    }
+
+    cout << "CPP: R Init (RInside) on process " << world_rank << endl;
+    RInside R(argc, argv); 
+
+    // if local_rank == 0 then master else worker
+    R["local_rank"] = world_rank;
+
+    /*Loading Dependencies*/
+    std::string r_load_dependencies = 
+        "suppressMessages(library(Rmufits));"
+        "suppressMessages(library(RedModRphree));"
+        "source('kin_r_library.R');"
+        "source('parallel_r_library.R');";
+    R.parseEvalQ(r_load_dependencies);
+    
+    std::string filesim;
+    cmdl(1) >> filesim;               // <- first positional argument
+    R["filesim"] = wrap(filesim);     // assign a char* (string) to 'filesim'
+    R.parseEvalQ("source(filesim)");  // eval the init string, ignoring any returns
+
+    std::string out_dir;
+    if (world_rank == 0) {            // only rank 0 initializes goes through the whole initialization 
+        cmdl(2) >> out_dir;           // <- second positional argument 
+        R["fileout"] = wrap(out_dir); // assign a char* (string) to 'fileout'
+
+        // Note: R::sim_init() checks if the directory already exists,
+        // if not it makes it
+
+        // pass the boolean "store_result" to the R process
+        R["store_result"] = store_result;
+
+        //get timestep vector from grid_init function ...
+        std::string master_init_code = "mysetup <- master_init(setup=setup)";
+        R.parseEval(master_init_code);
+
+        dt_differ = R.parseEval("mysetup$dt_differ");
+        MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
+    } else {                           // workers will only read the setup DataFrame defined by input file
+        R.parseEval("mysetup <- setup");
+        MPI_Bcast(&dt_differ, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
+    }
+
+    if (world_rank==0) {
+        cout << "CPP: R init done on process with rank " << world_rank << endl;
+    }
+
+    //initialize chemistry on all processes
+    std::string init_chemistry_code = "mysetup <- init_chemistry(setup=mysetup)";
+    R.parseEval(init_chemistry_code);
+
+    /* Retrieve state_C from R context for MPI buffer generation */
+    Rcpp::DataFrame state_C = R.parseEval("mysetup$state_C");
+
+    /* Init Parallel helper functions */
+    R["n_procs"] = world_size-1; /* worker count */
+    R["work_package_size"] = work_package_size;
+
+    // Removed additional field for ID in previous versions
+    if (world_rank == 0)
+    {
+        mpi_buffer = (double*) calloc(state_C.nrow() * (state_C.ncol()), sizeof(double));
+    } else
+    {
+        mpi_buffer = (double*) calloc((work_package_size * (state_C.ncol())) + BUFFER_OFFSET, sizeof(double));
+        mpi_buffer_results = (double*) calloc(work_package_size * (state_C.ncol()), sizeof(double));
+    }
+
+    if (world_rank==0) {
+        cout << "CPP: parallel init completed (buffers allocated)!" << endl;
+    }
+
+    // MDL: pass to R the DHT stuff (basically, only for storing of
+    // simulation parameters). These 2 variables are always defined:
+    R["dht_enabled"] = dht_enabled;
+    R["dht_log"] = dht_logarithm;
+
+    if (dht_enabled)
+    {
+        //cout << "\nCreating DHT\n";
+        //determine size of dht entries
+        int dht_data_size = state_C.ncol() * sizeof(double);
+        int dht_key_size  = state_C.ncol() * sizeof(double) + (dt_differ * sizeof(double));
+
+        //determine bucket count for preset memory usage
+        //bucket size is key + value + 1 byte for status
+        int dht_buckets_per_process = dht_size_per_process / (1 + dht_data_size + dht_key_size);
+
+        // MDL : following code moved here from worker.cpp 
+        /*Load significance vector from R setup file (or set default)*/
+        bool signif_vector_exists = R.parseEval("exists('signif_vector')");
+        if (signif_vector_exists)
+        { 
+            dht_significant_digits_vector = as<std::vector<int>>(R["signif_vector"]);
+        } else 
+        {
+            dht_significant_digits_vector.assign(dht_object->key_size / sizeof(double), dht_significant_digits);
+        }
+
+        /*Load property type vector from R setup file (or set default)*/
+        bool prop_type_vector_exists = R.parseEval("exists('prop_type')");
+        if (prop_type_vector_exists)
+        { 
+            prop_type_vector = as<std::vector<string>>(R["prop_type"]);
+        } else 
+        {
+            prop_type_vector.assign(dht_object->key_size / sizeof(double), "act");
+        }
+
+        if(world_rank == 0)
+        {
+            //print only on master, values are equal on all workes
+            cout << "CPP: dht_data_size: " << dht_data_size << "\n";
+            cout << "CPP: dht_key_size: " << dht_key_size << "\n";
+            cout << "CPP: dht_buckets_per_process: " << dht_buckets_per_process << endl;
+
+            // MDL: new output on signif_vector and prop_type
+            if (signif_vector_exists) {
+                cout << "CPP: using problem-specific rounding digits: " << endl;
+                R.parseEval("print(data.frame(prop=prop, type=prop_type, digits=signif_vector))");
+            } else
+            {
+                cout << "CPP: using DHT default rounding digits = " << dht_significant_digits << endl;
+            }
+
+            // MDL: pass to R the DHT stuff. These variables exist
+            // only if dht_enabled is true
+            R["dht_final_signif"] = dht_significant_digits_vector;
+            R["dht_final_proptype"] = prop_type_vector;
+        }
+
+        if (dht_strategy == 0)
+        {
+            if(world_rank != 0) {
+                dht_object = DHT_create(dht_comm, dht_buckets_per_process, dht_data_size, dht_key_size, get_md5);   
+                 
+                //storing for access from worker and callback functions
+                fuzzing_buffer = (double   *) malloc (dht_key_size);    
+            }
+        } else {
+            dht_object = DHT_create(MPI_COMM_WORLD, dht_buckets_per_process, dht_data_size, dht_key_size, get_md5);        
+        }
+
+        if (world_rank==0) {
+            cout << "CPP: DHT successfully created!" << endl;
+        }
+    }
+
+
+    // MDL: store all parameters
+    if (world_rank==0) {
+        cout << "CPP: Calling R Function to store calling parameters" << endl;
+        R.parseEvalQ("StoreSetup(setup=mysetup)");
+
+    }
+    
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    if (world_rank == 0) 
+    {   /* This is executed by the master */
+
+        Rcpp::NumericVector master_send;
+        Rcpp::NumericVector master_recv;
+
+        sim_a_seq = MPI_Wtime();
+
+        worker_struct* workerlist = (worker_struct*) calloc(world_size-1, sizeof(worker_struct));
+        int need_to_receive;
+        MPI_Status probe_status;
+        double* timings;
+        uint64_t* dht_perfs = NULL;
+
+        int local_work_package_size;
+
+        // a temporary send buffer
+        double* send_buffer;
+        send_buffer = (double*) calloc((work_package_size * (state_C.ncol() )) + BUFFER_OFFSET, sizeof(double));
+
+        // helper variables
+        int iteration;
+        double dt, current_sim_time;
+
+
+        int n_wp = 1; // holds the actual number of wp which is
+                      // computed later in R::distribute_work_packages()
+        std::vector<int> wp_sizes_vector; // vector with the sizes of
+                                          // each package
+
+        sim_start = MPI_Wtime();
+
+        //Iteration Count is dynamic, retrieving value from R (is only needed by master for the following loop)
+        uint32_t maxiter = R.parseEval("mysetup$maxiter");
+
+        sim_b_seq = MPI_Wtime();
+
+        cummul_master_seq_pre_loop += sim_b_seq - sim_a_seq;
+
+        /*SIMULATION LOOP*/
+        for(uint32_t iter = 1; iter < maxiter+1; iter++)
+        {
+            sim_a_seq = MPI_Wtime();
+
+            cummul_master_send = 0.f;
+            cummul_master_recv = 0.f;
+
+
+            cout << "CPP: Evaluating next time step" << endl;
+            R.parseEvalQ("mysetup <- master_iteration_setup(mysetup)");
+
+            /*displaying iteration number, with C++ and R iterator*/
+            cout << "CPP: Going through iteration " << iter << endl;
+            cout << "CPP: R's $iter: "<< ((uint32_t) (R.parseEval("mysetup$iter"))) <<". Iteration" << endl; 
+
+            cout << "CPP: Calling Advection" << endl;
+	        
+            sim_b_transport = MPI_Wtime();
+            R.parseEvalQ("mysetup <- master_advection(setup=mysetup)");
+            sim_a_transport = MPI_Wtime();
+            
+            
+            cout << "CPP: Chemistry" << endl;
+            /*Fallback for sequential execution*/
+            sim_b_chemistry = MPI_Wtime();
+
+            if(world_size == 1) 
+            {
+                // MDL : the transformation of values into pH and pe
+                // takes now place in master_advection() so the
+                // following line is unneeded
+                // R.parseEvalQ("mysetup$state_T <- RedModRphree::Act2pH(mysetup$state_T)");
+                R.parseEvalQ("result <- slave_chemistry(setup=mysetup, data=mysetup$state_T)");
+                R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
+            } else { /*send work to workers*/
+
+                
+                // NEW: only in the first iteration we call
+                // R::distribute_work_packages()!!
+                if (iter==1)
+                {
+                    R.parseEvalQ("wp_ids <- distribute_work_packages(len=nrow(mysetup$state_T), package_size=work_package_size)");
+
+                    // we only sort once the vector
+                    R.parseEvalQ("ordered_ids <- order(wp_ids)");
+                    R.parseEvalQ("wp_sizes_vector <- compute_wp_sizes(wp_ids)");
+                    n_wp = (int) R.parseEval("length(wp_sizes_vector)");
+                    wp_sizes_vector = as<std::vector<int>>(R["wp_sizes_vector"]);
+                    cout << "CPP: Total number of work packages: " << n_wp << endl;
+                    R.parseEval("stat_wp_sizes(wp_sizes_vector)");
+                    
+                }
+
+                /* shuffle and extract data
+                   MDL: we now apply :Act2pH directly in master_advection
+                 */
+                // R.parseEval("tmp <- shuffle_field(RedModRphree::Act2pH(mysetup$state_T), ordered_ids)");
+                R.parseEval("tmp <- shuffle_field(mysetup$state_T, ordered_ids)");
+                Rcpp::DataFrame chemistry_data = R.parseEval("tmp");
+
+                convert_R_Dataframe_2_C_buffer(mpi_buffer, chemistry_data);
+                // cout << "CPP: shuffle_field() done" << endl;
+
+                /* send and receive work; this is done by counting
+                 * the wp */
+                int pkg_to_send = n_wp;
+                int pkg_to_recv = n_wp;
+                size_t colCount = chemistry_data.ncol();
+                int free_workers = world_size-1;
+                double* work_pointer = mpi_buffer;
+                sim_c_chemistry = MPI_Wtime();
+
+                /* visual progress */
+                float progress = 0.0;
+                int barWidth = 70;
+
+                //retrieve data from R runtime
+                iteration = (int) R.parseEval("mysetup$iter");
+                dt = (double) R.parseEval("mysetup$requested_dt");
+                current_sim_time = (double) R.parseEval("mysetup$simulation_time-mysetup$requested_dt");
+
+                int count_pkgs = 0;
+
+                sim_b_seq = MPI_Wtime();
+
+                sim_c_chemistry = MPI_Wtime();
+
+                while (pkg_to_recv > 0) // start dispatching work packages
+                {
+                    /* visual progress */
+                    progress = (float) (count_pkgs+1)/n_wp;
+                        
+                    cout << "[";
+                    int pos = barWidth * progress;
+                    for (int iprog = 0; iprog < barWidth; ++iprog) {
+                        if (iprog < pos)
+                            cout << "=";
+                        else if (iprog == pos)
+                            cout << ">";
+                        else
+                            cout << " ";
+                    }
+                    std::cout << "] " << int(progress * 100.0) << " %\r";
+                    std::cout.flush();
+                    /* end visual progress */
+
+                    
+                    if (pkg_to_send > 0) {
+
+                        master_send_a = MPI_Wtime();
+                        /*search for free workers and send work*/
+                        for (int p = 0; p < world_size-1; p++) {
+                            if (workerlist[p].has_work == 0 && pkg_to_send > 0) /* worker is free */ {
+                                
+                                // to enable different work_package_size, set local copy of work_package_size to 
+                                // either global work_package size or remaining 'to_send' packages
+                              	// to_send >= work_package_size ? local_work_package_size = work_package_size : local_work_package_size = to_send;
+
+                                local_work_package_size = (int) wp_sizes_vector[count_pkgs];
+                                count_pkgs++;
+
+                                // cout << "CPP: sending pkg n. " << count_pkgs << " with size " << local_work_package_size << endl;
+
+                                /*push pointer forward to next work package, after taking the current one*/
+                                workerlist[p].send_addr = work_pointer;
+
+                                int end_of_wp = local_work_package_size * colCount;
+                                work_pointer = &(work_pointer[end_of_wp]);
+
+                                // fill send buffer starting with work_package ...
+                                std::memcpy(send_buffer, workerlist[p].send_addr, (end_of_wp) * sizeof(double));
+                                // followed by: work_package_size
+                                send_buffer[end_of_wp] = (double) local_work_package_size;
+                                // current iteration of simulation
+                                send_buffer[end_of_wp + 1] = (double) iteration;
+                                // size of timestep in seconds
+                                send_buffer[end_of_wp + 2] = dt;
+                                // current time of simulation (age) in seconds
+                                send_buffer[end_of_wp + 3] = current_sim_time;
+                                // placeholder for work_package_count
+                                send_buffer[end_of_wp + 4] = 0.; 
+                                
+                                /* ATTENTION Worker p has rank p+1 */
+                                MPI_Send(send_buffer, end_of_wp + BUFFER_OFFSET, MPI_DOUBLE, p+1, TAG_WORK, MPI_COMM_WORLD);
+
+                                workerlist[p].has_work = 1;
+                                free_workers--;
+                                pkg_to_send -= 1;
+                            }
+                        }
+                        master_send_b = MPI_Wtime();
+                        cummul_master_send += master_send_b - master_send_a;
+                    }
+
+                    
+
+                    /*check if there are results to receive and receive them*/  
+                    need_to_receive = 1;
+                    master_recv_a = MPI_Wtime();
+                    while(need_to_receive && pkg_to_recv > 0)
+                    {
+                        
+                        if (pkg_to_send > 0 && free_workers > 0)
+                            MPI_Iprobe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &need_to_receive, &probe_status);
+                        else {
+                            idle_a = MPI_Wtime();
+                            MPI_Probe(MPI_ANY_SOURCE, TAG_WORK, MPI_COMM_WORLD, &probe_status);
+                            idle_b = MPI_Wtime();
+                            master_idle += idle_b - idle_a;
+                        }
+
+                        if(need_to_receive) 
+                            {
+                                int p = probe_status.MPI_SOURCE;
+                                int size;
+                                MPI_Get_count(&probe_status, MPI_DOUBLE, &size);
+                                MPI_Recv(workerlist[p-1].send_addr, size, MPI_DOUBLE, p, TAG_WORK, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                                workerlist[p-1].has_work = 0;
+                                pkg_to_recv -= 1;
+                                free_workers++;
+                            } 
+                    }
+                    master_recv_b = MPI_Wtime();
+                    cummul_master_recv += master_recv_b - master_recv_a;
+                }
+
+                sim_c_seq = MPI_Wtime();
+
+                // don't overwrite last progress
+                cout << endl;
+                
+                sim_d_chemistry = MPI_Wtime();
+                cummul_workers   += sim_d_chemistry - sim_c_chemistry;
+
+                convert_C_buffer_2_R_Dataframe(mpi_buffer, chemistry_data);
+
+                R["chemistry_data"] = chemistry_data;
+
+                /* unshuffle results */
+                R.parseEval("result <- unshuffle_field(chemistry_data, ordered_ids)");
+
+                /* do master stuff */
+                sim_e_chemistry = MPI_Wtime();
+                R.parseEvalQ("mysetup <- master_chemistry(setup=mysetup, data=result)");
+                sim_f_chemistry = MPI_Wtime();
+                cummul_chemistry_master   += sim_f_chemistry - sim_e_chemistry;
+
+            }
+            sim_a_chemistry = MPI_Wtime();
+
+	    // MDL master_iteration_end just writes on disk state_T and
+	    // state_C after every iteration if the cmdline option
+	    // --ignore-results is not given (and thus the R variable
+	    // store_result is TRUE)
+            R.parseEvalQ("mysetup <- master_iteration_end(setup=mysetup)");
+
+    	    cummul_transport += sim_a_transport - sim_b_transport;
+            cummul_chemistry += sim_a_chemistry - sim_b_chemistry;
+
+            cout << endl << "CPP: End of *coupling* iteration "<< iter <<"/" << maxiter << endl << endl;
+
+            if (dht_enabled) {
+                for (int i=1; i < world_size; i++) {
+                    MPI_Send(NULL, 0, MPI_DOUBLE, i, TAG_DHT_STATS, MPI_COMM_WORLD);
+                }
+
+                // MPI_Barrier(MPI_COMM_WORLD);
+
+                if (dht_snaps == 2) {
+                    std::stringstream outfile;
+                    outfile << out_dir << "/iter_" << std::setfill('0') << std::setw(3) << iter << ".dht";
+                    for (int i=1; i < world_size; i++) {
+                        MPI_Send(outfile.str().c_str(), outfile.str().size(), MPI_CHAR, i, TAG_DHT_STORE, MPI_COMM_WORLD);
+                    }
+                    MPI_Barrier(MPI_COMM_WORLD);
+                }
+            }
+
+
+        sim_d_seq = MPI_Wtime();
+
+        cummul_master_seq_loop += ((sim_b_seq - sim_a_seq) - (sim_a_transport - sim_b_transport)) + (sim_d_seq - sim_c_seq); 
+        master_send.push_back(cummul_master_send, "it_" + to_string(iter));
+        master_recv.push_back(cummul_master_recv, "it_" + to_string(iter));           
+
+        } // END SIMULATION LOOP
+
+        sim_end = MPI_Wtime();
+
+        if (dht_enabled && dht_snaps > 0) {
+            cout << "CPP: Master: Instruct workers to write DHT to file ..." << endl;
+            std::string outfile;
+            outfile = out_dir + ".dht";
+            for (int i=1; i < world_size; i++) {
+                MPI_Send(outfile.c_str(), outfile.size(), MPI_CHAR, i, TAG_DHT_STORE, MPI_COMM_WORLD);
+            }
+            MPI_Barrier(MPI_COMM_WORLD);
+            cout << "CPP: Master: ... done" << endl;
+        }
+
+        Rcpp::NumericVector phreeqc_time;
+        Rcpp::NumericVector dht_get_time;
+        Rcpp::NumericVector dht_fill_time;
+        Rcpp::IntegerVector phreeqc_counts;
+        Rcpp::NumericVector idle_worker;
+
+        int phreeqc_tmp;
+
+        timings = (double*) calloc(3, sizeof(double));
+
+        if (dht_enabled) {
+            dht_hits = 0;
+            dht_miss = 0;
+            dht_collision = 0;
+            dht_perfs = (uint64_t*) calloc(3, sizeof(uint64_t));
+        }
+
+        double idle_worker_tmp;
+
+        for (int p = 0; p < world_size-1; p++)
+        {
+            /* ATTENTION Worker p has rank p+1 */
+            /* Send termination message to worker */
+            MPI_Send(NULL, 0, MPI_DOUBLE, p+1, TAG_FINISH, MPI_COMM_WORLD);
+
+            
+            MPI_Recv(timings, 3, MPI_DOUBLE, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            phreeqc_time.push_back(timings[0], "w" + to_string(p+1));
+
+            MPI_Recv(&phreeqc_tmp, 1, MPI_INT, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            phreeqc_counts.push_back(phreeqc_tmp, "w" + to_string(p+1));
+
+            MPI_Recv(&idle_worker_tmp, 1, MPI_DOUBLE, p+1, TAG_TIMING, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            idle_worker.push_back(idle_worker_tmp, "w" + to_string(p+1));
+
+            if (dht_enabled)
+            {
+                dht_get_time.push_back(timings[1], "w" + to_string(p+1));
+                dht_fill_time.push_back(timings[2], "w" + to_string(p+1));
+
+                MPI_Recv(dht_perfs, 3, MPI_UNSIGNED_LONG_LONG, p+1, TAG_DHT_PERF, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                dht_hits += dht_perfs[0];
+                dht_miss += dht_perfs[1];
+                dht_collision += dht_perfs[2];
+            }
+        }
+
+        R.parseEvalQ("profiling <- list()");
+
+        R["simtime"] = sim_end - sim_start;
+        R.parseEvalQ("profiling$simtime <- simtime");
+        R["simtime_transport"] = cummul_transport;
+        R.parseEvalQ("profiling$simtime_transport <- simtime_transport");
+        R["simtime_chemistry"] = cummul_chemistry;
+        R.parseEvalQ("profiling$simtime_chemistry <- simtime_chemistry");
+        R["simtime_workers"] = cummul_workers;
+        R.parseEvalQ("profiling$simtime_workers <- simtime_workers");
+        R["simtime_chemistry_master"] = cummul_chemistry_master;
+        R.parseEvalQ("profiling$simtime_chemistry_master <- simtime_chemistry_master");
+
+        R["seq_master_prep"] = cummul_master_seq_pre_loop;
+        R.parseEvalQ("profiling$seq_master_prep <- seq_master_prep");
+        R["seq_master_loop"] = cummul_master_seq_loop;
+        R.parseEvalQ("profiling$seq_master_loop <- seq_master_loop");
+
+        // R["master_send"] = master_send;
+        // R.parseEvalQ("profiling$master_send <- master_send");
+        // R["master_recv"] = master_recv;
+        // R.parseEvalQ("profiling$master_recv <- master_recv");
+
+        R["idle_master"] = master_idle;
+        R.parseEvalQ("profiling$idle_master <- idle_master");
+        R["idle_worker"] = idle_worker;
+        R.parseEvalQ("profiling$idle_worker <- idle_worker");
+
+        R["phreeqc_time"] = phreeqc_time;
+        R.parseEvalQ("profiling$phreeqc <- phreeqc_time");
+
+        R["phreeqc_count"] = phreeqc_counts;
+        R.parseEvalQ("profiling$phreeqc_count <- phreeqc_count");
+
+        if (dht_enabled)
+        {
+            R["dht_hits"] = dht_hits;
+            R.parseEvalQ("profiling$dht_hits <- dht_hits");
+            R["dht_miss"] = dht_miss;
+            R.parseEvalQ("profiling$dht_miss <- dht_miss");
+            R["dht_collision"] = dht_collision;
+            R.parseEvalQ("profiling$dht_collisions <- dht_collision");
+            R["dht_get_time"] = dht_get_time;
+            R.parseEvalQ("profiling$dht_get_time <- dht_get_time");
+            R["dht_fill_time"] = dht_fill_time;
+            R.parseEvalQ("profiling$dht_fill_time <- dht_fill_time");
+        }
+
+        
+        free(workerlist);
+        free(timings);
+
+        if (dht_enabled)
+            free(dht_perfs);
+
+        cout << "CPP: Done! Results are stored as R objects into <" << out_dir << "/timings.rds>" << endl;
+        /*exporting results and profiling data*/
+	
+        std::string r_vis_code;
+        r_vis_code = "saveRDS(profiling, file=paste0(fileout,'/timings.rds'));";
+        R.parseEval(r_vis_code);
+    } else { /*This is executed by the workers*/
+        if (!dht_file.empty()) {
+            int res = file_to_table((char *) dht_file.c_str());
+            if (res != DHT_SUCCESS) {
+                if (res == DHT_WRONG_FILE) {
+                    if (world_rank == 2) cerr << "CPP: Worker: Wrong File" << endl;
+                } else {
+                    if (world_rank == 2) cerr << "CPP: Worker: Error in loading current state of DHT from file" << endl;
+                }
+                return EXIT_FAILURE;
+            } else {
+                if (world_rank == 2) cout << "CPP: Worker: Successfully loaded state of DHT from file " << dht_file << endl;
+                std::cout.flush();
+            }
+        }
+        worker_function(R);
+        free(mpi_buffer_results);
+    }
+    
+    cout << "CPP: finished, cleanup of process " << world_rank << endl;
+
+    if (dht_enabled)
+    {
+        
+        if (dht_strategy == 0)
+        {
+            if(world_rank != 0) {
+                DHT_free(dht_object, NULL, NULL);
+            }
+        } else {
+            DHT_free(dht_object, NULL, NULL);
+        }
+    }
+
+    free(mpi_buffer);
+    MPI_Finalize();
+    
+    if (world_rank==0) {
+        cout << "CPP: done, bye!" << endl;
+    }
+    
+    exit(0);
+}
+
--- a/src/kin_r_library.R
+++ b/src/kin_r_library.R
@ -0,0 +1,662 @@
+## Simple function to check file extension. It is needed to check if
+## the GridFile is SUM (MUFITS format) or rds/RData
+FileExt <- function (x)
+{
+    pos <- regexpr("\\.([[:alnum:]]+)$", x)
+    ifelse(pos > -1L, substring(x, pos + 1L), "")
+}
+
+### This function is only called by the master process. It sets up
+### the data structures for the coupled simulations and performs the
+### first timestep or "iteration zero", possibly involving the phreeqc
+### calculation of the initial state (if it is not already provided as
+### matrix) and the first Advection iteration
+master_init <- function(setup)
+{
+    msgm("Process with rank 0 reading GRID and MUFITS_sims")
+
+    ## MDL TODO: actually grid and all the MUFITS snapshots should be
+    ## read and stored only by the master process!!
+
+    ## Setup the directory where we will store the results
+    verb <- FALSE
+    if (local_rank==0) {
+        verb <- TRUE ## verbosity loading MUFITS results
+        if (!dir.exists(fileout)) {
+            dir.create(fileout)
+            msgm("created directory ", fileout)
+        } else {
+            msgm("dir ", fileout," already exists, I will overwrite!")
+        }
+        if (!exists("store_result"))
+            msgm("store_result doesn't exist!")
+        else
+            msgm("store_result is ", store_result)
+    } else {
+        
+    }
+
+    ## to enhance flexibility, gridfile can be now given in SUM or,
+    ## already processed, in rds format. We check for extension first. 
+    gridext <- FileExt(setup$gridfile)
+    if (gridext=="SUM") {
+        msgm("Generating grid from MUFITS SUM file...")
+        mufits_grid <- ReadGrid(setup$gridfile, verbose=verb)
+    } else {
+        msgm("Reading grid from rds file...")
+        mufits_grid <- readRDS(setup$gridfile)
+    }
+
+    ## load all the snapshots at once. The setup argument "snapshots"
+    ## now can be either a *directory* where the .SUM files are stored
+    ## or a rds file.
+    if (dir.exists(setup$snapshots)) {
+        msgm("<snapshots> points to a directory; reading from SUM files in there...")
+        mufits_sims <- LoadMufitsSumRes(dir=setup$snapshots, verbose=verb)
+    } else {
+        msgm("<snapshots> points to a file. Reading as rds...")
+        mufits_sims <- readRDS(setup$snapshots)
+    }
+    
+    ## Since this function is evaluated by the R process called from
+    ## the C++ program, we need to make available all these variables
+    ## to the R parent frame!
+    assign("mufits_grid", mufits_grid, pos=parent.frame())
+    assign("mufits_sims", mufits_sims, pos=parent.frame())
+    ## cat(local_rank, "assignement complete\n")
+
+
+    ## we calculate the *coupling* iterations
+    nstep <- length(mufits_sims)
+    msgm("using", nstep,"flow snapshots")
+    ## cat(local_rank, "nstep:", nstep, "\n")
+    ## cat(local_rank, "names:", paste(names(mufits_sims), collate=","), "\n")
+
+    ## we have these snapshots; we output the results of the coupling
+    ## after each timesteps
+    timesteps <- diff(sapply(mufits_sims, function(x) {return(x$info)}))
+    ## cat(local_rank, "timesteps:", paste0(timesteps, collate=" "), "\n")
+
+    dt_differ <- abs(max(timesteps) - min(timesteps)) != 0
+
+    maxiter <- length(timesteps)
+
+
+    ## steady state after last flow snapshot? It is controlled by
+    ## "setup$prolong", containing an integer which represents the
+    ## number of further iterations
+    setup$steady <- FALSE
+    if ("prolong" %in% names(setup)) {
+        
+        last_dt <- timesteps[length(timesteps)]
+        timesteps <- c(timesteps, rep(last_dt, setup$prolong))
+        msgm("simulation prolonged for", setup$prolong, "additional iterations")
+        ## we set this flag to TRUE to check afterwards which snapshot we need to use at each iteration 
+        setup$steady <- TRUE
+        setup$last_snapshot <- maxiter
+        maxiter <- maxiter + setup$prolong
+
+    }
+
+    ## now that we know how many iterations we're gonna have, we can
+    ## setup the optional output
+    if (local_rank==0) {
+        if (is.null(setup$iter_output)) {
+            ## "iter_output" is not specified: store all iterations
+            setup$out_save <- seq(1,maxiter)
+            msgm("setup$iter_output unspecified, storing all iterations")
+        } else if (setup$iter_output=="all") {
+            ## "iter_output" is "all": store all iterations
+            setup$out_save <- seq(1,maxiter)
+            msgm("storing all iterations")
+        } else if (is.numeric(setup$iter_output)) {
+            msgm("storing iterations:", paste(setup$iter_output, collapse=", "))
+            setup$out_save <- as.integer(setup$iter_output)
+        } else if (setup$iter_output=="last") {
+            msgm("storing only the last iteration")
+            setup$out_save <- maxiter
+        } else {## fallback to "all"
+            setup$out_save <- seq(1,maxiter)
+            msgm("invalid setup$iter_output: storing all iterations")
+        }
+    }
+    
+    setup$iter <- 1
+    setup$maxiter <- maxiter
+    setup$timesteps <- timesteps
+    setup$simulation_time <- 0
+    setup$dt_differ <- dt_differ
+     
+    if (nrow(setup$bound)==1) {
+        boundmatAct <- t(RedModRphree::pH2Act(setup$bound))
+        msg("formed correct matrix from setup$bound:")
+        print(boundmatAct)
+    } else {
+        boundmatAct <- RedModRphree::pH2Act(setup$bound)
+    }
+
+    setup$boundmatAct <- boundmatAct
+    return(setup)
+}
+
+## This function is called by all processes.
+## Since the worker processes need state_C in worker.cpp -> worker_function()
+## even the worker need to run this code.
+## TODO: worker.cpp -> worker_function() --> state_C is only needed to obtain headers 
+## and size of dataframe ... if this will be adjusted, this function will be 
+## unnecessary for workers
+init_chemistry <- function(setup) 
+{
+    ## setup the chemistry
+    if (!is.matrix(setup$initsim)) {
+        msgm("initial state defined through PHREEQC simulation, assuming homogeneous medium!")
+        tmpfirstrun <- RunPQC(setup$initsim, second=TRUE)
+        
+        ## if (local_rank==0){
+        ##     print(tmpfirstrun)
+        ## }
+        remcolnames <- colnames(tmpfirstrun)
+        if (nrow(tmpfirstrun) > 1) {
+            ## msgm("Iter 0 selecting second row")
+            firstrun <- matrix(tmpfirstrun[2,], nrow=1, byrow=TRUE)
+            colnames(firstrun) <- remcolnames
+        } else {
+            firstrun  <- tmpfirstrun
+        }
+        state_C <- matrix(rep(firstrun,setup$n), byrow=TRUE, ncol=length(setup$prop))
+        colnames(state_C) <- colnames(firstrun)
+
+        ## if (local_rank==0)
+        ##     saveRDS(state_C, "initstate.rds")
+    } else {
+        msgm("given initial state")
+        state_C <- setup$initsim
+    }
+    ## save state_T and state_C
+    setup$state_C <- state_C
+
+    return(setup)
+}
+
+## relic code, may be useful in future
+finit <- function(setup)
+{
+    ## saved <- setup$saved
+    ## state_C <- setup$state_C
+    ## timesteps <- setup$timesteps
+    ## out_save <- setup$out_save
+    ## to_save <- setup$to_save
+
+    ## if (out_save) {
+    ##     msgm("saving <saved>")
+    ##     attr(saved,"savedtimesteps") <- timesteps[save]
+    ##     attr(saved,"timesteps") <- timesteps
+    ##     return(saved)
+    ## } else {    
+    ##     attr(saved,"timesteps") <- timesteps
+    ##     return(state_C)
+    ## }
+}
+
+## This function, called only by the master, computes the *inner*
+## timesteps for transport given the requested simulation timestep and
+## the current snapshot (through the Courant Fredrich Levy condition)
+## NB: we always iterate: 1)T 2)C
+master_iteration_setup <- function(setup)
+{
+    ## in this version, "grid" and "mufits_sims" are variables already
+    ## living in the environment of the R process with local_rank 0
+
+    ## if (local_rank == 0) {
+    ##     cat("Process ", local_rank, "iteration_start\n")
+    ## } else {
+    ##     cat("Process ", local_rank, "SHOULD NOT call iteration_start!\n")
+    ## }
+
+
+    ## in C++, "iter" starts from 1
+    iter <- setup$iter
+    
+    next_dt <- setup$timesteps[iter]
+
+    ## setting the current flow snapshot - we have to check if
+    ## we need prolongation or not
+    if (setup$steady) {
+        if (iter > setup$last_snapshot)
+            snap <- mufits_sims[[setup$last_snapshot]]
+        else
+            snap <- mufits_sims[[iter]]
+    } else
+        snap <- mufits_sims[[iter]]
+
+    msgm("Current simulation time:", round(setup$simulation_time),"[s] or ", round(setup$simulation_time/3600/24,2), "[day]")
+    msgm("Requested time step for current iteration:", round(next_dt),"[s] or ", round(next_dt/3600/24,2), "[day]")
+    setup$simulation_time <- setup$simulation_time+next_dt
+    msgm("Target simulation time:", round(setup$simulation_time),"[s] or ", round((setup$simulation_time)/3600/24,2), "[day]")
+
+    
+    ## since the phase and density may change in MUFITS
+    ## simulations/snapshots, we MUST tell their name at setup. Here
+    ## we match the right column for both variables
+    nphase   <- match(setup$phase,   colnames(snap$conn))
+    ndensity <- match(setup$density, colnames(snap$cell))
+
+    ## the "new cfl"
+    flux <- snap$conn[, nphase]
+    cellporvol <- mufits_grid$cell$PORO * mufits_grid$cell$CELLVOL
+    
+    ## in MUFITS, positive flux means from CELLID1 to CELLID2
+    pos_flux <- ifelse(flux>0, snap$conn$CONNID1, snap$conn$CONNID2)
+    poro <- mufits_grid$cell$PORO[pos_flux]
+    vol <- mufits_grid$cell$CELLVOL[snap$conn$CONNID1]
+
+    ## extract the right density for the right fluxes
+    density <- snap$cell[pos_flux, ndensity]
+    
+    ## transform flux from Mufits ton/day to m3/s. This is a VOLUMETRIC FLUX
+    fluxv <- flux/3600/24*1000/density
+    
+    ## now we want the darcy velocity
+    excl0 <- which(abs(fluxv)<.Machine$double.eps)
+    ## msgm("excl0"); print(excl0)
+    ## msgm("length(excl0)"); print(length(excl0))
+
+    ## The CFL condition is expressed in terms of total flux and total
+    ## pore volume
+    cfl <- as.numeric(min(abs(vol*poro/fluxv)[-excl0]))
+
+    if (!is.finite(cfl))
+        stop(msgm("CFL is ", cfl,"; quitting"))
+             
+    allowed_dt <- setup$Cf*cfl
+    requested_dt <- next_dt ## target_time - setup$simulation_time
+    msgm("CFL allows dt of <", round(allowed_dt, 2)," [s]; multiplicator is ", setup$Cf,
+         "; requested_dt is: ", round(requested_dt, 2))
+        
+    if (requested_dt > allowed_dt) {
+        inniter <- requested_dt%/%allowed_dt ## integer division
+        inner_timesteps <- c(rep(allowed_dt, inniter), requested_dt%%allowed_dt)
+        ## was: inner_timesteps <- c(rep(allowed_dt, inniter), requested_dt - allowed_dt * inniter)
+    } else {
+        inner_timesteps <- requested_dt
+        inniter <- 1
+    }
+
+    msgm("Performing ", inniter, " inner iterations")
+
+    setup$inner_timesteps <- inner_timesteps
+    setup$requested_dt <- requested_dt
+    setup$allowed_dt <- allowed_dt
+    setup$inniter <- inniter
+    setup$iter <- iter
+
+    ## TODO these 3 can be actually spared
+    setup$fluxv <- fluxv
+    ## setup$no_transport_conn <- excl0
+    setup$cellporvol <- cellporvol
+    
+    return(setup)
+}
+
+
+## This function, called only by master, stores on disk the last
+## calculated time step if store_result is TRUE and increments the
+## iteration counter
+master_iteration_end <- function(setup) {
+    iter    <- setup$iter
+    ## MDL Write on disk state_T and state_C after every iteration
+    ## comprised in setup$out_save
+    if (store_result) {
+        if (iter %in% setup$out_save) {
+            nameout <- paste0(fileout, '/iter_', sprintf('%03d', iter), '.rds')
+            info <- list(tr_req_dt     = as.integer(setup$requested_dt),
+                         tr_allow_dt   = setup$allowed_dt,
+                         tr_inniter    = as.integer(setup$inniter)
+                         )
+            saveRDS(list(T=setup$state_T, C=setup$state_C,
+                         simtime=as.integer(setup$simulation_time),
+                         tr_info=info), file=nameout)
+            msgm("results stored in <", nameout, ">")
+        }
+    }
+    msgm("done iteration", iter, "/", setup$maxiter)
+    setup$iter <- setup$iter + 1
+    return(setup)
+}
+
+## master: compute advection
+master_advection <- function(setup) {
+    msgm("requested dt=", setup$requested_dt)
+    concmat <- RedModRphree::pH2Act(setup$state_C)
+    inner_timesteps <- setup$inner_timesteps
+    Cf <- setup$Cf 
+    iter <- setup$iter
+
+    ## MDL: not used at the moment, so commenting it out
+    ## excl <- setup$no_transport_conn
+    ## msgm("excl"); print(excl)
+    immobile <- setup$immobile
+    boundmat <- setup$boundmatAct
+    
+    ## setting the current flow snapshot - we have to check if
+    ## we are in "prolongation" or not
+    if (setup$steady) {
+        if (iter > setup$last_snapshot)
+            snap <- mufits_sims[[setup$last_snapshot]]
+        else
+            snap <- mufits_sims[[iter]]
+    } else
+        snap <- mufits_sims[[iter]]
+
+    ## conc is a matrix with colnames 
+    if (is.matrix(concmat)) {
+        if (length(immobile)>0)
+            val <- concmat[,-immobile]
+        else
+            val <- concmat
+        
+        ## sort the columns of the matrix matching the names of boundary 
+        sptr <- colnames(val)
+        inflow <- boundmat[, colnames(boundmat)!="cbound", drop=FALSE]
+        spin <- colnames(inflow) 
+        ind <- match(spin,sptr)
+        if (TRUE %in% is.na(ind)) {
+            msgm("Missing boundary conditions for some species")
+            val <- cbind(val,matrix(0,ncol=sum(is.na(ind)),nrow=nrow(val) ))
+            colnames(val) <- c(sptr,spin[is.na(ind)])
+        }
+        
+        sptr <- colnames(val)
+        ind <- match(sptr,spin)
+        ## msgm("ind"); print(ind)
+        
+        cnew <- val
+
+        msgm("Computing transport of ", ncol(val), " species")
+        ## if (local_rank==0) {
+        ##     saveRDS(list(conc=newconc, times=times, activeCells=grid$cell$CELLID[-exclude_cell], fluxv=fluxv),
+        ##             file=paste0("TranspUpwind_", local_rank, ".RData"))
+        ## }
+        ## cnew[ boundmat[, 1] ] <- boundmat[, 2]
+
+        ## MDL 20200227: Here was the bug: "excl" refers to
+        ## CONNECTIONS and not GRID_CELLS!!
+        
+        ## cells where we won't update the concentrations: union of
+        ## boundary and no-flow cells
+        ## if (length(excl) == 0)
+        ##     exclude_cell <- sort(c(boundmat[, 1]))
+        ## else
+        ##     exclude_cell <- sort(c(boundmat[, 1], excl))
+        exclude_cell <- sort(c(boundmat[, 1]))
+
+        ## msgm("mufits_grid$cell$CELLID[-exclude_cell]:")
+        ## print(mufits_grid$cell$CELLID[-exclude_cell])
+
+        for (i in seq(1, ncol(val))) {
+            ## form a 2-column matrix with cell id and boundary
+            ## concentration for those elements
+            newboundmat <- boundmat[,c(1, ind[i]+1)]
+            ## vector with the old concentrations
+            concv <- val[,i]
+            ## apply boundary conditions to the concentration vector
+            ## (they should stay constant but better safe than sorry)
+            concv[newboundmat[, 1]] <- newboundmat[, 2]
+
+            ## call the function
+            cnew[,i] <- CppTransportUpwindIrr(concv = concv,
+                                              times = inner_timesteps, 
+                                              activeCells = mufits_grid$cell$CELLID[-exclude_cell],
+                                              fluxv = setup$fluxv, 
+                                              listconn = mufits_grid$listconn,
+                                              porVol = setup$cellporvol)
+        }
+        
+        colnames(cnew) <- colnames(val)
+        
+        if ( length(immobile) > 0)  { 
+            res <- cbind(cnew, concmat[,immobile])
+        } else {
+            res <- cnew
+        }
+
+        ## check for negative values. This SHOULD NOT OCCUR and may be
+        ## the result of numerical dispersion (or bug in my transport
+        ## code!)
+        if (any(res <0 )) {
+            rem_neg <- which(res<0, arr.ind=TRUE)
+            a <- nrow(rem_neg)
+            res[res < 0 ] <- 0
+            msgm("-> ", a, "concentrations were negative")
+            print(rem_neg)
+        }
+        ## msgm("state_T after iteration", setup$iter, ":")
+        ## print(head(res))
+    } else {
+        msgm("state_C at iteration", setup$iter, " is not a Matrix, doing nothing!")
+    }
+
+    ## retransform concentrations H+ and e- into pH and pe
+    state_T <- RedModRphree::Act2pH(res)
+
+    setup$state_T <- state_T
+    
+    return(setup)
+}
+
+
+## function for the workers to compute chemistry through PHREEQC
+slave_chemistry <- function(setup, data)
+{
+    base     <- setup$base
+    first    <- setup$first
+    prop     <- setup$prop
+    immobile <- setup$immobile
+    kin      <- setup$kin
+    ann      <- setup$ann
+
+    if (local_rank == 0) {
+        iter     <- setup$iter
+        timesteps <- setup$timesteps
+        dt       <- timesteps[iter]
+    }
+
+    state_T <- data ## not the global field, but the work-package
+
+    ## treat special H+/pH, e-/pe cases
+    state_T <- RedModRphree::Act2pH(state_T)
+    
+    ## reduction of the problem
+    if(setup$reduce)
+        reduced <- ReduceStateOmit(state_T, omit=setup$ann)
+    else
+        reduced <- state_T
+
+    ## if (local_rank==1) {
+    ##     msg("worker", local_rank,"; iter=", iter, "dt=", dt)
+    ##     msg("reduce is", setup$reduce)
+    ##     msg("data:")
+    ##     print(reduced)
+    ##     msg("base:")
+    ##     print(base)
+    ##     msg("first:")
+    ##     print(first)
+    ##     msg("ann:")
+    ##     print(ann)
+    ##     msg("prop:")
+    ##     print(prop)
+    ##     msg("immobile:")
+    ##     print(immobile)
+    ##     msg("kin:")
+    ##     print(kin)
+    ## }
+
+    ## form the PHREEQC input script for the current work package
+    inplist <- splitMultiKin(data=reduced, procs=1, base=base, first=first,
+                             ann=ann, prop=prop, minerals=immobile, kin=kin, dt=dt)
+
+    ## if (local_rank==1 & iter==1) 
+    ##         RPhreeWriteInp("FirstInp", inplist)
+
+    tmpC <- RunPQC(inplist, procs=1, second=TRUE)
+
+    ## recompose after the reduction
+    if (setup$reduce)
+        state_C <- RecomposeState(tmpC, reduced)
+    else {
+        state_C <- tmpC
+    }
+
+    ## the next line is needed since we don't need all columns of
+    ## PHREEQC output
+    return(state_C[, prop])
+} 
+
+
+
+## This function, called by master
+master_chemistry <- function(setup, data)
+{
+    state_T <- setup$state_T 
+
+    msgm(" chemistry iteration", setup$iter)
+
+    ## treat special H+/pH, e-/pe cases
+    state_T <- RedModRphree::Act2pH(state_T)
+    
+    ## reduction of the problem
+    if(setup$reduce)
+        reduced <- ReduceStateOmit(state_T, omit=setup$ann)
+    else
+        reduced <- state_T
+    
+    ### inject data from workers
+    res_C <- data
+
+    rownames(res_C) <- NULL
+    
+    ## print(res_C)
+    
+    if (nrow(res_C) > nrow(reduced)) {
+        res_C <- res_C[seq(2,nrow(res_C), by=2),]
+    }
+    
+    ## recompose after the reduction
+    if (setup$reduce)
+        state_C <- RecomposeState(res_C, reduced)
+    else {
+        state_C <- res_C
+    }
+    
+    setup$state_C <- state_C
+    setup$reduced <- reduced
+
+    return(setup)
+} 
+    
+
+## Adapted version for "reduction"
+ReduceStateOmit <- function (data, omit=NULL, sign=6) 
+{
+    require(mgcv)
+
+    rem <- colnames(data)
+    if (is.list(omit)) {
+        indomi <- match(names(omit), colnames(data))
+        datao <- data[, -indomi]
+    } else datao <- data
+
+    datao <- signif(datao, sign)
+    red <- mgcv::uniquecombs(datao)
+    inds <- attr(red, "index")
+    now <- ncol(red)
+
+    
+    ## reattach the omitted column(s)
+    ## FIXME: control if more than one ann is present
+    if (is.list(omit)) {
+        red <- cbind(red, rep( data[ 1, indomi], nrow(red)))
+    
+        colnames(red)[now+1] <- names(omit)
+
+        ret <- red[, colnames(data)]
+    } else {
+        ret <- red
+    }
+    rownames(ret) <- NULL
+    attr(ret, "index") <- inds
+    return(ret)
+}
+
+
+
+## Attach the name of the calling function to the message displayed on
+## R's stdout
+msgm <- function (...)
+{
+    
+    if (local_rank==0) {
+        fname <- as.list(sys.call(-1))[[1]]
+        prefix <- paste0("R: ", fname, " ::")
+        cat(paste(prefix, ..., "\n"))
+    }
+    invisible()
+}
+
+
+## Function called by master R process to store on disk all relevant
+## parameters for the simulation
+StoreSetup <- function(setup) {
+
+    to_store <- vector(mode="list", length=5)
+    names(to_store) <- c("Sim", "Flow", "Transport", "Chemistry", "DHT")
+
+    ## read the setup R file, which is sourced in kin.cpp
+    tmpbuff <- file(filesim, "r")
+    setupfile <- readLines(tmpbuff)
+    close.connection(tmpbuff) 
+
+    to_store$Sim <- setupfile
+    
+    to_store$Flow <- list(
+        snapshots  = setup$snapshots,
+        gridfile   = setup$gridfile,
+        phase      = setup$phase,
+        density    = setup$density,
+        dt_differ  = setup$dt_differ,
+        prolong    = setup$prolong,
+        maxiter    = setup$maxiter,
+        saved_iter = setup$iter_output,
+        out_save   = setup$out_save )
+
+    to_store$Transport <- list(
+        boundary = setup$bound,
+        Cf       = setup$Cf,
+        prop     = setup$prop,
+        immobile = setup$immobile,
+        reduce   = setup$reduce )
+    
+    to_store$Chemistry <- list(
+        nprocs   = n_procs,
+        wp_size  = work_package_size,
+        base     = setup$base,
+        first    = setup$first,
+        init     = setup$initsim,
+        db       = db,
+        kin      = setup$kin,
+        ann      = setup$ann)
+    
+    if (dht_enabled) {
+        to_store$DHT <- list(
+            enabled   = dht_enabled,
+            log       = dht_log,
+            signif    = dht_final_signif,
+            proptype  = dht_final_proptype)
+        
+    } else {
+        to_store$DHT <- FALSE
+    }
+
+    saveRDS(to_store, file=paste0(fileout,'/setup.rds'))
+    msgm("initialization stored in ", paste0(fileout,'/setup.rds'))
+}
--- a/src/kin_r_library_warnings.R
+++ b/src/kin_r_library_warnings.R
@ -0,0 +1,584 @@
+## Simple function to check file extension. It is needed to check if
+## the GridFile is SUM (MUFITS format) or rds/RData
+FileExt <- function (x)
+{
+    pos <- regexpr("\\.([[:alnum:]]+)$", x)
+    ifelse(pos > -1L, substring(x, pos + 1L), "")
+}
+
+### This function is only called by the master process. It sets up
+### the data structures for the coupled simulations and performs the
+### first timestep or "iteration zero", possibly involving the phreeqc
+### calculation of the initial state (if it is not already provided as
+### matrix) and the first Advection iteration
+master_init <- function(setup)
+{
+    msgm("Process with rank 0 reading GRID and MUFITS_sims")
+
+    ## MDL TODO: actually grid and all the MUFITS snapshots should be
+    ## read and stored only by the master process!!
+
+    ## Setup the directory where we will store the results
+    verb <- FALSE
+    if (local_rank==0) {
+        verb <- TRUE ## verbosity loading MUFITS results
+        if (!dir.exists(fileout)) {
+            dir.create(fileout)
+            msgm("created directory ", fileout)
+        } else {
+            msgm("dir ", fileout," already exists, I will overwrite!")
+        }
+        if (!exists("store_result"))
+            msgm("store_result doesn't exist!")
+        else
+            msgm("store_result is ", store_result)
+
+    } else {
+        
+    }
+
+    ## to enhance flexibility, gridfile can be now given in SUM or,
+    ## already processed, in rds format. We check for extension first. 
+    gridext <- FileExt(setup$gridfile)
+    if (gridext=="SUM") {
+        msgm("Generating grid from MUFITS SUM file...")
+        mufits_grid <- ReadGrid(setup$gridfile, verbose=verb)
+    } else {
+        msgm("Reading grid from rds file...")
+        mufits_grid <- readRDS(setup$gridfile)
+    }
+
+    ## load all the snapshots at once. The setup argument "snapshots"
+    ## now can be either a *directory* where the .SUM files are stored
+    ## or a rds file.
+    if (dir.exists(setup$snapshots)) {
+        msgm("<snapshots> points to a directory; reading from SUM files in there...")
+        mufits_sims <- LoadMufitsSumRes(dir=setup$snapshots, verbose=verb)
+    } else {
+        msgm("<snapshots> points to a file. Reading as rds...")
+        mufits_sims <- readRDS(setup$snapshots)
+    }
+    
+    ## Since this function is evaluated by the R process called from
+    ## the C++ program, we need to make available all these variables
+    ## to the R parent frame!
+    assign("mufits_grid", mufits_grid, pos=parent.frame())
+    assign("mufits_sims", mufits_sims, pos=parent.frame())
+    ## cat(local_rank, "assignement complete\n")
+
+
+    ## we calculate the *coupling* iterations
+    nstep <- length(mufits_sims)
+    msgm("using", nstep,"flow snapshots")
+    ## cat(local_rank, "nstep:", nstep, "\n")
+    ## cat(local_rank, "names:", paste(names(mufits_sims), collate=","), "\n")
+
+    ## we have these snapshots; we output the results of the coupling
+    ## after each timesteps
+    timesteps <- diff(sapply(mufits_sims, function(x) {return(x$info)}))
+    ## cat(local_rank, "timesteps:", paste0(timesteps, collate=" "), "\n")
+
+    maxiter <- length(timesteps)
+
+    ## steady case not treated in this version!!!!
+    steady <- FALSE
+
+    setup$iter <- 1
+    setup$maxiter <- maxiter
+    setup$timesteps <- timesteps
+    setup$simulation_time <- 0
+    
+    setup$steady <- steady
+    
+    if (nrow(setup$bound)==1) {
+        boundmatAct <- t(RedModRphree::pH2Act(setup$bound))
+        msg("formed correct matrix from setup$bound:")
+        print(boundmatAct)
+    } else {
+        boundmatAct <- RedModRphree::pH2Act(setup$bound)
+    }
+
+    setup$boundmatAct <- boundmatAct
+    return(setup)
+}
+
+## This function is called by all processes.
+## Since the worker processes need state_C in worker.cpp -> worker_function()
+## even the worker need to run this code.
+## TODO: worker.cpp -> worker_function() --> state_C is only needed to obtain headers 
+## and size of dataframe ... if this will be adjusted, this function will be 
+## unnecessary for workers
+init_chemistry <- function(setup) 
+{
+    ## setup the chemistry
+    if (!is.matrix(setup$initsim)) {
+        msgm("initial state defined through PHREEQC simulation, assuming homogeneous medium!")
+        tmpfirstrun <- RunPQC_Warnings(setup$initsim, second=TRUE)
+        
+        ## if (local_rank==0){
+        ##     print(tmpfirstrun)
+        ## }
+        remcolnames <- colnames(tmpfirstrun)
+        if (nrow(tmpfirstrun) > 1) {
+            ## msgm("Iter 0 selecting second row")
+            firstrun <- matrix(tmpfirstrun[2,], nrow=1, byrow=TRUE)
+            colnames(firstrun) <- remcolnames
+        } else {
+            firstrun  <- tmpfirstrun
+        }
+        state_C <- matrix(rep(firstrun,setup$n), byrow=TRUE, ncol=length(setup$prop))
+        colnames(state_C) <- colnames(firstrun)
+
+        ## if (local_rank==0)
+        ##     saveRDS(state_C, "initstate.rds")
+    } else {
+        msgm("given initial state")
+        state_C <- setup$initsim
+    }
+    ## save state_T and state_C
+    setup$state_C <- state_C
+
+    return(setup)
+}
+
+## relic code, may be useful in future
+finit <- function(setup)
+{
+    ## saved <- setup$saved
+    ## state_C <- setup$state_C
+    ## timesteps <- setup$timesteps
+    ## out_save <- setup$out_save
+    ## to_save <- setup$to_save
+
+    ## if (out_save) {
+    ##     msgm("saving <saved>")
+    ##     attr(saved,"savedtimesteps") <- timesteps[save]
+    ##     attr(saved,"timesteps") <- timesteps
+    ##     return(saved)
+    ## } else {    
+    ##     attr(saved,"timesteps") <- timesteps
+    ##     return(state_C)
+    ## }
+}
+
+## This function, called only by the master, computes the *inner*
+## timesteps for transport given the requested simulation timestep and
+## the current snapshot (through the Courant Fredrich Levy condition)
+## NB: we always iterate: 1)T 2)C
+master_iteration_setup <- function(setup)
+{
+    ## in this version, "grid" and "mufits_sims" are variables already
+    ## living in the environment of the R process with local_rank 0
+
+    ## if (local_rank == 0) {
+    ##     cat("Process ", local_rank, "iteration_start\n")
+    ## } else {
+    ##     cat("Process ", local_rank, "SHOULD NOT call iteration_start!\n")
+    ## }
+
+
+    ## in C++, "iter" starts from 1
+    iter <- setup$iter
+    
+    next_dt <- setup$timesteps[iter]
+
+    ## setting the current snapshot
+    snap <- mufits_sims[[iter]]
+
+    msgm("Current simulation time:", round(setup$simulation_time),"[s] or ", round(setup$simulation_time/3600/24,2), "[day]")
+    msgm("Time step for current iteration:", round(next_dt),"[s] or ", round(next_dt/3600/24,2), "[day]")
+    setup$simulation_time <- setup$simulation_time+next_dt
+    msgm("Target simulation time:", round(setup$simulation_time),"[s] or ", round((setup$simulation_time)/3600/24,2), "[day]")
+
+    
+    ## since the phase and density may change in MUFITS
+    ## simulations/snapshots, we MUST tell their name at setup. Here
+    ## we match the right column for both variables
+    nphase   <- match(setup$phase,   colnames(snap$conn))
+    ndensity <- match(setup$density, colnames(snap$cell))
+
+    ## the "new cfl"
+    flux <- snap$conn[, nphase]
+    cellporvol <- mufits_grid$cell$PORO * mufits_grid$cell$CELLVOL
+    
+    ## in MUFITS, positive flux means from CELLID1 to CELLID2
+    pos_flux <- ifelse(flux>0, snap$conn$CONNID1, snap$conn$CONNID2)
+    poro <- mufits_grid$cell$PORO[pos_flux]
+    vol <- mufits_grid$cell$CELLVOL[snap$conn$CONNID1]
+
+    ## extract the right density for the right fluxes
+    density <- snap$cell[pos_flux, ndensity]
+    
+    ## transform flux from Mufits ton/day to m3/s. This is a VOLUMETRIC FLUX
+    fluxv <- flux/3600/24*1000/density
+    
+    ## now we want the darcy velocity
+    excl0 <- which(abs(fluxv)<.Machine$double.eps)
+    ## msgm("excl0"); print(excl0)
+    ## msgm("length(excl0)"); print(length(excl0))
+    ## The CFL condition is expressed in terms of total flux and total
+    ## pore volume
+    cfl <- as.numeric(min(abs(vol*poro/fluxv)[-excl0]))
+
+    if (!is.finite(cfl))
+        stop(msgm("CFL is ", cfl,"; quitting"))
+             
+    allowed_dt <- setup$Cf*cfl
+    requested_dt <- next_dt ## target_time - setup$simulation_time
+    msgm("CFL allows dt of <", round(cfl, 2)," [s]; multiplicator is ", setup$Cf,
+         "; requested_dt is: ", round(requested_dt, 2))
+        
+    if (requested_dt > allowed_dt) {
+        inniter <- floor(requested_dt/allowed_dt)
+        inner_timesteps <- c(rep(allowed_dt, inniter), requested_dt - allowed_dt * inniter)
+        msgm("Performing ", inniter, " inner iterations")
+    } else {
+        inner_timesteps <- requested_dt
+        inniter <- 1
+    }
+
+    setup$inner_timesteps <- inner_timesteps
+    setup$requested_dt <- requested_dt
+    setup$allowed_dt <- allowed_dt
+    setup$inniter <- inniter
+    setup$iter <- iter
+    setup$fluxv <- fluxv
+    setup$no_transport_cells <- excl0
+    setup$cellporvol <- cellporvol
+    
+    return(setup)
+}
+
+
+## This function, called only by master, stores on disk the last
+## calculated time step if store_result is TRUE and increments the
+## iteration counter
+master_iteration_end <- function(setup) {
+    iter    <- setup$iter
+    ## MDL Write on disk state_T and state_C after every iteration
+    if (store_result) {
+        nameout <- paste0(fileout, '/iter_', sprintf('%03d', iter), '.rds')
+        saveRDS(list(T=setup$state_T, C=setup$state_C), file=nameout)
+        msgm("results stored in <", nameout, ">")
+    }
+    msgm("done iteration", iter, "/", setup$maxiter)
+    setup$iter <- setup$iter + 1
+    return(setup)
+}
+
+## master: compute advection
+master_advection <- function(setup) {
+    msgm("requested dt=", setup$requested_dt)
+    concmat <- RedModRphree::pH2Act(setup$state_C)
+    inner_timesteps <- setup$inner_timesteps
+    Cf <- setup$Cf 
+    iter <- setup$iter
+    excl <- setup$no_transport_cells
+    ## msgm("excl"); print(excl)
+    immobile <- setup$immobile
+    boundmat <- setup$boundmatAct
+    snap <- mufits_sims[[setup$iter]]
+
+    ## conc is a matrix with colnames 
+    if (is.matrix(concmat)) {
+        if (length(immobile)>0)
+            val <- concmat[,-immobile]
+        else
+            val <- concmat
+        
+        ## sort the columns of the matrix matching the names of boundary 
+        sptr <- colnames(val)
+        inflow <- boundmat[, colnames(boundmat)!="cbound", drop=FALSE]
+        spin <- colnames(inflow) 
+        ind <- match(spin,sptr)
+        if (TRUE %in% is.na(ind)) {
+            msgm("Missing boundary conditions for some species")
+            val <- cbind(val,matrix(0,ncol=sum(is.na(ind)),nrow=nrow(val) ))
+            colnames(val) <- c(sptr,spin[is.na(ind)])
+        }
+        
+        sptr <- colnames(val)
+        ind <- match(sptr,spin)
+        ## msgm("ind"); print(ind)
+        
+        cnew <- val
+
+        msgm("Computing transport of ", ncol(val), " species")
+        ## if (local_rank==0) {
+        ##     saveRDS(list(conc=newconc, times=times, activeCells=grid$cell$CELLID[-exclude_cell], fluxv=fluxv),
+        ##             file=paste0("TranspUpwind_", local_rank, ".RData"))
+        ## }
+        ## cnew[ boundmat[, 1] ] <- boundmat[, 2]
+
+        ## cells where we won't update the concentrations: union of
+        ## boundary and no-flow cells
+        if (length(excl) == 0)
+            exclude_cell <- sort(c(boundmat[, 1]))
+        else
+            exclude_cell <- sort(c(boundmat[, 1], excl))
+
+        ## msgm("mufits_grid$cell$CELLID[-excludecell]:")
+        ## print(mufits_grid$cell$CELLID[-exclude_cell])
+
+        for (i in seq(1, ncol(val))) {
+            ## form a 2-column matrix with cell id and boundary
+            ## concentration for those elements
+            newboundmat <- boundmat[,c(1, ind[i]+1)]
+            ## vector with the old concentrations
+            concv <- val[,i]
+            ## apply boundary conditions to the concentration vector
+            ## (they should stay constant but better safe than sorry)
+            concv[newboundmat[, 1]] <- newboundmat[, 2]
+
+            ## call the function
+            cnew[,i] <- CppTransportUpwindIrr(concv = concv,
+                                              times = inner_timesteps, 
+                                              activeCells = mufits_grid$cell$CELLID[-exclude_cell],
+                                              fluxv = setup$fluxv, 
+                                              listconn = mufits_grid$listconn,
+                                              porVol = setup$cellporvol)
+        }
+        
+        colnames(cnew) <- colnames(val)
+        
+        if ( length(immobile) > 0)  { 
+            res <- cbind(cnew, concmat[,immobile])
+        } else {
+            res <- cnew
+        }
+
+        ## check for negative values. This SHOULD NOT OCCUR and may be
+        ## the result of numerical dispersion (or bug in my transport
+        ## code!)
+        if (any(res <0 )) { 
+            a <- length(res[res < 0 ])
+            res[res < 0 ] <- 0
+            msgm("-> ", a, "concentrations were negative")
+        }
+        ## msgm("state_T after iteration", setup$iter, ":")
+        ## print(head(res))
+    } else {
+        msgm("state_C at iteration", setup$iter, " is not a Matrix, doing nothing!")
+    }
+
+    setup$state_T <- res
+    
+    return(setup)
+}
+
+
+## function for the workers to compute chemistry through PHREEQC
+slave_chemistry <- function(setup, data)
+{
+    base     <- setup$base
+    first    <- setup$first
+    prop     <- setup$prop
+    immobile <- setup$immobile
+    kin      <- setup$kin
+    ann      <- setup$ann
+
+    if (local_rank == 0) {
+        iter     <- setup$iter
+        timesteps <- setup$timesteps
+        dt       <- timesteps[iter]
+    }
+
+    state_T <- data ## not the global field, but the work-package
+
+    ## treat special H+/pH, e-/pe cases
+    state_T <- RedModRphree::Act2pH(state_T)
+    
+    ## reduction of the problem
+    if(setup$reduce)
+        reduced <- ReduceStateOmit(state_T, omit=setup$ann)
+    else
+        reduced <- state_T
+
+    ## if (local_rank==1) {
+    ##     msg("worker", local_rank,"; iter=", iter, "dt=", dt)
+    ##     msg("reduce is", setup$reduce)
+    ##     msg("data:")
+    ##     print(reduced)
+    ##     msg("base:")
+    ##     print(base)
+    ##     msg("first:")
+    ##     print(first)
+    ##     msg("ann:")
+    ##     print(ann)
+    ##     msg("prop:")
+    ##     print(prop)
+    ##     msg("immobile:")
+    ##     print(immobile)
+    ##     msg("kin:")
+    ##     print(kin)
+    ## }
+
+    ## form the PHREEQC input script for the current work package
+    inplist <- splitMultiKin(data=reduced, procs=1, base=base, first=first,
+                             ann=ann, prop=prop, minerals=immobile, kin=kin, dt=dt)
+
+    ## if (local_rank==1 & iter==1) 
+    ##         RPhreeWriteInp("FirstInp", inplist)
+
+    tmpC <- RunPQC_Warnings(inplist, procs=1, second=TRUE)
+
+    ## recompose after the reduction
+    if (setup$reduce)
+        state_C <- RecomposeState(tmpC, reduced)
+    else {
+        state_C <- tmpC
+    }
+
+    ## the next line is needed since we don't need all columns of
+    ## PHREEQC output
+    return(state_C[, prop])
+} 
+
+
+
+## This function, called by master
+master_chemistry <- function(setup, data)
+{
+    state_T <- setup$state_T 
+
+    msgm(" chemistry iteration", setup$iter)
+
+    ## treat special H+/pH, e-/pe cases
+    state_T <- RedModRphree::Act2pH(state_T)
+    
+    ## reduction of the problem
+    if(setup$reduce)
+        reduced <- ReduceStateOmit(state_T, omit=setup$ann)
+    else
+        reduced <- state_T
+    
+    ### inject data from workers
+    res_C <- data
+
+    rownames(res_C) <- NULL
+    
+    ## print(res_C)
+    
+    if (nrow(res_C) > nrow(reduced)) {
+        res_C <- res_C[seq(2,nrow(res_C), by=2),]
+    }
+    
+    ## recompose after the reduction
+    if (setup$reduce)
+        state_C <- RecomposeState(res_C, reduced)
+    else {
+        state_C <- res_C
+    }
+    
+    setup$state_C <- state_C
+    setup$reduced <- reduced
+
+    return(setup)
+} 
+    
+
+## Adapted version for "reduction"
+ReduceStateOmit <- function (data, omit=NULL, sign=6) 
+{
+    require(mgcv)
+
+    rem <- colnames(data)
+    if (is.list(omit)) {
+        indomi <- match(names(omit), colnames(data))
+        datao <- data[, -indomi]
+    } else datao <- data
+
+    datao <- signif(datao, sign)
+    red <- mgcv::uniquecombs(datao)
+    inds <- attr(red, "index")
+    now <- ncol(red)
+
+    
+    ## reattach the omitted column(s)
+    ## FIXME: control if more than one ann is present
+    if (is.list(omit)) {
+        red <- cbind(red, rep( data[ 1, indomi], nrow(red)))
+    
+        colnames(red)[now+1] <- names(omit)
+
+        ret <- red[, colnames(data)]
+    } else {
+        ret <- red
+    }
+    rownames(ret) <- NULL
+    attr(ret, "index") <- inds
+    return(ret)
+}
+
+
+
+## Attach the name of the calling function to the message displayed on
+## R's stdout
+msgm <- function (...)
+{
+    
+    if (local_rank==0) {
+        fname <- as.list(sys.call(-1))[[1]]
+        prefix <- paste0("R: ", fname, " ::")
+        cat(paste(prefix, ..., "\n"))
+    }
+    invisible()
+}
+
+
+RunPQC_Warnings <- function (input, procs = 1, second = TRUE) 
+{
+
+    .runPQC <- function(input, onlysecond) {
+        require(phreeqc)
+        phreeqc::phrSetErrorStringsOn(TRUE)
+        error_count <- phreeqc::phrRunString(input)
+        warnings <- phreeqc::phrGetWarningStrings()
+        checked_warn <- CheckWarnings(warnings)
+
+        if (is.null(error_count)) {
+            out <- phreeqc::phrGetSelectedOutput()[[1]]
+            nso <- colnames(out)
+            nso <- sub("..mol.kgw.", "", nso, fixed = TRUE)
+            nso <- sub(".mol.kgw.", "", nso, fixed = TRUE)
+            nso <- sub("^k_", "", nso)
+            nso <- sub(".g.", "(g)", nso, fixed = TRUE)
+            if (second)
+                out <- out[seq(2, nrow(out), by = 2), ]
+            colnames(out) <- nso
+            return(data.matrix(out))
+        } else {
+            msgm("Error in simmulation!")
+            cat(phrGetErrorStrings(), sep="\n")
+        }
+    }
+    
+    if (!is.list(input)) {
+        if (is.character(input)) {
+            res <- .runPQC(input, onlysecond = second)
+        } else {
+            stopmsg("something wrong with the input, dying!")
+        }
+    }
+    else {
+        res <- foreach(i = seq_along(input), .combine = rbind) %dopar%
+            .runPQC(input[[i]], onlysecond = second)
+    }
+    return(res)
+}
+
+
+CheckWarnings <- function(warnings)
+{
+    ## get rid of "Negative moles in solution" warnings
+    recover <- grep("Negative moles", warnings, fixed=TRUE)
+    warnings <- warnings[-recover]
+
+    lw <- length(warnings)
+    if (lw>0) {
+        ## nfile <- paste0("warnings_", local_rank, ".txt")
+        nfile <- "Warnings.txt"
+        cat(warnings, file=nfile, sep="\n", append=TRUE)
+    }
+    return(lw)
+}
--- a/src/mdl_quint_kin.dat
+++ b/src/mdl_quint_kin.dat
--- a/src/parallel_r_library.R
+++ b/src/parallel_r_library.R
@ -0,0 +1,43 @@
+distribute_work_packages <- function(len, package_size) 
+{
+    ## Check if work_package is a divisor of grid length and act
+    ## accordingly
+    if ((len %% package_size) == 0) {
+        n_packages <- len/package_size
+    } else {
+        n_packages <- floor(len/package_size) + 1
+    }
+
+    ids <- rep(1:n_packages, times=package_size, each = 1)[1:len]
+    return(ids)
+}
+
+compute_wp_sizes <- function(ids)
+{
+    as.integer(table(ids))
+}
+
+shuffle_field <- function(data, send_ord) 
+{
+    shf <- data[send_ord,]
+    return(shf)
+}
+
+unshuffle_field <- function(data, send_ord) 
+{
+    data[send_ord,] <- data
+    rownames(data) <- NULL
+    return(data)
+}
+
+stat_wp_sizes <- function(sizes)
+{
+    res <- as.data.frame(table(sizes))
+    if (nrow(res)>1) {
+        msgm("Chosen work_package_size is not a divisor of grid length. ")
+        colnames(res) <- c("Size", "N")
+        print(res)
+    } else {
+        msgm("All work packages of length ", sizes[1])
+    }
+}
--- a/src/r_utils.cpp
+++ b/src/r_utils.cpp
@ -0,0 +1,44 @@
+#include "r_utils.h"
+
+/* This function converts a pure double dataframe into a double array.
+    buffer      <- double array, needs to be allocated before
+    df          <- reference to input dataframe
+*/
+void convert_R_Dataframe_2_C_buffer(double* buffer, Rcpp::DataFrame &df)
+{
+    size_t rowCount = df.nrow();
+    size_t colCount = df.ncol();
+
+    for (size_t i = 0; i < rowCount; i++)
+    {
+        for (size_t j = 0; j < colCount; j++)
+        {
+            /* Access column vector j and extract value of line i */
+            Rcpp::DoubleVector col = df[j];
+            buffer[i * colCount + j] = col[i];
+        }
+    }
+}
+
+/* This function converts a double array into a double dataframe.
+    buffer      <- input double array
+    df          <- reference to output dataframe, needs to be of fitting size, structure will be taken from it
+*/
+void convert_C_buffer_2_R_Dataframe(double* buffer, Rcpp::DataFrame &df)
+{
+    size_t rowCount = df.nrow();
+    size_t colCount = df.ncol();
+
+    for (size_t i = 0; i < rowCount; i++)
+    {
+        for (size_t j = 0; j < colCount; j++)
+        {
+            /* Access column vector j and extract value of line i */
+            Rcpp::DoubleVector col = df[j];
+            col[i] = buffer[i * colCount + j];
+        }
+    }
+}
+
+
+
--- a/src/r_utils.h
+++ b/src/r_utils.h
@ -0,0 +1,6 @@
+#pragma once
+#include <RInside.h> 
+
+/*Functions*/
+void convert_R_Dataframe_2_C_buffer(double* buffer, Rcpp::DataFrame &df);
+void convert_C_buffer_2_R_Dataframe(double* buffer, Rcpp::DataFrame &df);
--- a/src/worker.cpp
+++ b/src/worker.cpp
@ -0,0 +1,277 @@
+#include "worker.h"
+#include "dht_wrapper.h"
+#include "global_buffer.h"
+#include "r_utils.h"
+#include <mpi.h>
+#include <iostream>
+
+void worker_function(RInside& R) 
+{
+    int world_rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+    MPI_Status probe_status;
+    int count;
+
+    int local_work_package_size;
+    int iteration;
+    double dt, current_sim_time;
+
+    double idle_a, idle_b;
+    double cummul_idle = 0.f;
+
+    double dht_get_start=0, dht_get_end=0;
+    double dht_fill_start=0, dht_fill_end=0;
+    double phreeqc_time_start=0, phreeqc_time_end=0;
+    int phreeqc_count = 0;
+
+    //timing[0] -> phreeqc
+    //timing[1] -> dht_get
+    //timing[2] -> dht_fill
+    double timing[3];
+    timing[0] = 0.0;
+    timing[1] = 0.0;
+    timing[2] = 0.0;
+
+    //dht_perf[0] -> hits
+    //dht_perf[1] -> miss
+    //dht_perf[2] -> collisions
+    uint64_t dht_perf[3];
+
+    if (dht_enabled) 
+    {
+        dht_flags.resize(work_package_size, true); //set size
+        dht_flags.assign(work_package_size, true); //assign all elements to true (default)
+        dht_hits = 0;
+        dht_miss = 0;
+        dht_collision = 0;
+
+
+        // MDL: This code has now been moved to kin.cpp 
+        // /*Load significance vector from R setup file (or set default)*/
+        // bool signif_vector_exists = R.parseEval("exists('signif_vector')");
+        // if (signif_vector_exists)
+        // { 
+        //     dht_significant_digits_vector = as<std::vector<int>>(R["signif_vector"]);
+        // } else 
+        // {
+        //     dht_significant_digits_vector.assign(dht_object->key_size / sizeof(double), dht_significant_digits);
+        // }
+
+        // /*Load property type vector from R setup file (or set default)*/
+        // bool prop_type_vector_exists = R.parseEval("exists('prop_type')");
+        // if (prop_type_vector_exists)
+        // { 
+        //     prop_type_vector = as<std::vector<string>>(R["prop_type"]);
+        // } else 
+        // {
+        //     prop_type_vector.assign(dht_object->key_size / sizeof(double), "normal");
+        // }
+    }
+
+    //initialization of helper variables
+    iteration = 0;
+    dt = 0;
+    current_sim_time = 0;
+    local_work_package_size = 0;
+
+    /*worker loop*/
+    while(1)
+    {
+        /*Wait for Message*/
+        idle_a = MPI_Wtime();
+        MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &probe_status);
+        idle_b = MPI_Wtime();
+
+        if (probe_status.MPI_TAG == TAG_WORK) 
+        {   /* do work */
+            
+            cummul_idle += idle_b - idle_a;
+
+            /* get number of doubles sent */
+            MPI_Get_count(&probe_status, MPI_DOUBLE, &count);
+
+            /* receive */
+            MPI_Recv(mpi_buffer, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+            //decrement count of work_package by BUFFER_OFFSET
+            count -= BUFFER_OFFSET;
+            //check for changes on all additional variables given by the 'header' of mpi_buffer
+            if (mpi_buffer[count] != local_work_package_size) { //work_package_size
+                local_work_package_size = mpi_buffer[count];
+                R["work_package_size"] = local_work_package_size;
+                R.parseEvalQ("mysetup$work_package_size <- work_package_size");
+            }
+            if (mpi_buffer[count+1] != iteration) { //current iteration of simulation
+                iteration = mpi_buffer[count+1];
+                R["iter"] = iteration;
+                R.parseEvalQ("mysetup$iter <- iter");
+            }
+            if (mpi_buffer[count+2] != dt) { //current timestep size
+                dt = mpi_buffer[count+2];
+                R["dt"] = dt;
+                R.parseEvalQ("mysetup$dt <- dt");
+            }
+            if (mpi_buffer[count+3] != current_sim_time) { //current simulation time ('age' of simulation)
+                current_sim_time = mpi_buffer[count+3];
+                R["simulation_time"] = current_sim_time;
+                R.parseEvalQ("mysetup$simulation_time <- simulation_time");
+            }
+            /* 4th double value is currently a placeholder */
+            // if (mpi_buffer[count+4] != placeholder) {
+            //     placeholder = mpi_buffer[count+4];
+            //     R["mysetup$placeholder"] = placeholder;
+            // }
+
+            /* get df with right structure to fill in work package */
+            R.parseEvalQ("tmp2 <- head(mysetup$state_C, work_package_size)");
+            // R.parseEval("print(rownames(tmp2)[1:5])");
+            // R.parseEval("print(head(tmp2, 2))");
+            // R.parseEvalQ("tmp2$id <- as.double(rownames(tmp2))");
+
+            Rcpp::DataFrame buffer = R.parseEval("tmp2");
+
+            if (dht_enabled)
+            {
+                // DEBUG
+                // cout << "RANK " << world_rank << " start checking DHT\n";
+
+                //resize helper vector dht_flags of work_package_size changes
+                if ((int) dht_flags.size() != local_work_package_size) {
+                    dht_flags.resize(local_work_package_size, true); //set size
+                    dht_flags.assign(local_work_package_size, true); //assign all elements to true (default)
+                } 
+                
+                dht_get_start = MPI_Wtime();
+                check_dht(R, local_work_package_size, dht_flags, mpi_buffer);
+                dht_get_end = MPI_Wtime();
+
+                //DEBUG
+                //cout << "RANK " << world_rank << " checking DHT complete \n";
+
+                R["dht_flags"] = as<LogicalVector>(wrap(dht_flags));
+                //R.parseEvalQ("print(head(dht_flags))");
+            }
+
+            /* work */
+            convert_C_buffer_2_R_Dataframe(mpi_buffer, buffer);
+            R["work_package_full"] = buffer;
+            //R["work_package"] = buffer;
+            
+            //DEBUG
+            //R.parseEvalQ("print(head(work_package_full))");
+            //R.parseEvalQ("print( c(length(dht_flags), nrow(work_package_full)) )");
+
+            if (dht_enabled)
+            {
+                R.parseEvalQ("work_package <- work_package_full[dht_flags,]");
+            } else {
+                R.parseEvalQ("work_package <- work_package_full");
+            }
+
+            //DEBUG
+            // R.parseEvalQ("print(head(work_package),2)");
+            
+            // R.parseEvalQ("rownames(work_package) <- work_package$id");
+            // R.parseEval("print(paste('id %in% colnames(work_package)', 'id' %in% colnames(work_package)");
+            // R.parseEvalQ("id_store <- rownames(work_package)"); //"[, ncol(work_package)]");
+            // R.parseEvalQ("work_package$id <- NULL");
+            R.parseEvalQ("work_package <- as.matrix(work_package)");
+            
+            unsigned int nrows = R.parseEval("nrow(work_package)");
+
+            if (nrows > 0)
+            {
+                /*Single Line error Workaround*/
+                if (nrows <=1)
+                {
+                    //duplicate line to enable correct simmulation
+                    R.parseEvalQ("work_package <- work_package[rep(1:nrow(work_package), times = 2), ]");
+                }
+
+                phreeqc_count++;
+
+                phreeqc_time_start = MPI_Wtime();
+                // MDL
+                // R.parseEvalQ("print('Work_package:\n'); print(head(work_package , 2)); cat('RCpp: worker_function:', local_rank, ' \n')");
+                R.parseEvalQ("result <- as.data.frame(slave_chemistry(setup=mysetup, data = work_package))");
+                phreeqc_time_end = MPI_Wtime();
+                // R.parseEvalQ("result$id <- id_store");
+            } else 
+            {
+                //cout << "Work-Package is empty, skipping phreeqc!" << endl;
+            }
+
+
+            if (dht_enabled)
+            {
+                R.parseEvalQ("result_full <- work_package_full");
+                if (nrows > 0)
+                    R.parseEvalQ("result_full[dht_flags,] <- result");
+            } else {
+                R.parseEvalQ("result_full <- result");
+            }
+
+            Rcpp::DataFrame result = R.parseEval("result_full");
+            convert_R_Dataframe_2_C_buffer(mpi_buffer_results, result);
+
+            /* send results to master */
+            MPI_Request send_req;
+            MPI_Isend(mpi_buffer_results, count, MPI_DOUBLE, 0, TAG_WORK, MPI_COMM_WORLD, &send_req);
+
+            if (dht_enabled)
+            {
+                dht_fill_start = MPI_Wtime();
+                fill_dht(R, local_work_package_size, dht_flags, mpi_buffer, mpi_buffer_results);
+                dht_fill_end = MPI_Wtime();
+
+                timing[1] += dht_get_end - dht_get_start;
+                timing[2] += dht_fill_end - dht_fill_start;
+            }
+
+            timing[0] += phreeqc_time_end - phreeqc_time_start;
+
+            MPI_Wait(&send_req,MPI_STATUS_IGNORE);
+
+        } else if (probe_status.MPI_TAG == TAG_FINISH) 
+        {   /* recv and die */
+            /* before death, submit profiling/timings to master*/
+
+            MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_FINISH, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+            //timings
+            MPI_Send(timing, 3, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
+
+            MPI_Send(&phreeqc_count, 1, MPI_INT, 0, TAG_TIMING, MPI_COMM_WORLD);
+            MPI_Send(&cummul_idle, 1, MPI_DOUBLE, 0, TAG_TIMING, MPI_COMM_WORLD);
+
+            if(dht_enabled) 
+            {
+                //dht_perf
+                dht_perf[0] = dht_hits;
+                dht_perf[1] = dht_miss;
+                dht_perf[2] = dht_collision;
+                MPI_Send(dht_perf, 3, MPI_UNSIGNED_LONG_LONG, 0, TAG_DHT_PERF, MPI_COMM_WORLD);
+            }
+
+            break;
+
+        } else if ((probe_status.MPI_TAG == TAG_DHT_STATS)) {
+            MPI_Recv(NULL, 0, MPI_DOUBLE, 0, TAG_DHT_STATS, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            print_statistics();
+            MPI_Barrier(MPI_COMM_WORLD);
+        } else if ((probe_status.MPI_TAG == TAG_DHT_STORE)) {
+            char* outdir;
+            MPI_Get_count(&probe_status, MPI_CHAR, &count);
+            outdir = (char *) calloc(count + 1, sizeof(char));
+            MPI_Recv(outdir, count, MPI_CHAR, 0, TAG_DHT_STORE, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            int res = table_to_file((char *) outdir);
+            if (res != DHT_SUCCESS) {
+                if (world_rank == 2) cerr << "CPP: Worker: Error in writing current state of DHT to file (TAG_DHT_STORE)" << endl;
+            } else {
+                if (world_rank == 2) cout << "CPP: Worker: Successfully written DHT to file " << outdir << endl;
+            }
+            free(outdir);
+            MPI_Barrier(MPI_COMM_WORLD);
+        }
+    }
+}
--- a/src/worker.h
+++ b/src/worker.h
@ -0,0 +1,17 @@
+#pragma once
+#include <RInside.h>
+
+using namespace std;
+using namespace Rcpp;
+
+/*Functions*/
+void worker_function(RInside &R);
+
+
+/*Globals*/
+#define TAG_WORK 42
+#define TAG_FINISH 43
+#define TAG_TIMING 44
+#define TAG_DHT_PERF 45
+#define TAG_DHT_STATS 46
+#define TAG_DHT_STORE 47