Merge branch 'fix-hashing' into 'main'

fix: use Murmur hashing for DHT lookup

See merge request naaice/poet!2
This commit is contained in:
Max Lübke 2023-03-28 14:31:59 +02:00
commit 76dcdf400e
5 changed files with 88 additions and 10 deletions

View File

@ -100,7 +100,7 @@ typedef struct {
/** Size of the MPI communicator respectively all participating processes. */ /** Size of the MPI communicator respectively all participating processes. */
int comm_size; int comm_size;
/** Pointer to a hashfunction. */ /** Pointer to a hashfunction. */
uint64_t (*hash_func)(int, void*); uint64_t (*hash_func)(int, const void*);
/** Pre-allocated memory where a bucket can be received. */ /** Pre-allocated memory where a bucket can be received. */
void* recv_entry; void* recv_entry;
/** Pre-allocated memory where a bucket to send can be stored. */ /** Pre-allocated memory where a bucket to send can be stored. */
@ -143,7 +143,7 @@ typedef struct {
*/ */
extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process,
unsigned int data_size, unsigned int key_size, unsigned int data_size, unsigned int key_size,
uint64_t (*hash_func)(int, void*)); uint64_t (*hash_func)(int, const void*));
/** /**
* @brief Write data into DHT. * @brief Write data into DHT.

View File

@ -1,3 +1,5 @@
// // Time-stamp: "Last modified 2023-03-27 14:51:05 mluebke"
/* /*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam) ** Potsdam)
@ -26,13 +28,15 @@
namespace poet { namespace poet {
// Sum of POET interpreted as ASCII
constexpr uint32_t HASH_SEED = 80 + 79 + 69 + 84;
void initHashCtx(const EVP_MD *md); void initHashCtx(const EVP_MD *md);
void freeHashCtx(); void freeHashCtx();
uint64_t hashDHT(int key_size, void *key); uint64_t hashDHT(int key_size, const void *key);
uint64_t Murmur2_64A(int len, const void *key);
} // namespace poet
}
#endif // HASHFUNCTIONS_H_ #endif // HASHFUNCTIONS_H_

View File

@ -52,7 +52,7 @@ static int read_flag(char flag_byte) {
} }
DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size,
unsigned int key_size, uint64_t (*hash_func)(int, void *)) { unsigned int key_size, uint64_t (*hash_func)(int, const void *)) {
DHT *object; DHT *object;
MPI_Win window; MPI_Win window;
void *mem_alloc; void *mem_alloc;

View File

@ -76,8 +76,8 @@ DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size,
uint32_t key_size = key_count * sizeof(DHT_Keyelement); uint32_t key_size = key_count * sizeof(DHT_Keyelement);
uint32_t data_size = data_count * sizeof(double); uint32_t data_size = data_count * sizeof(double);
uint32_t buckets_per_process = dht_size / (1 + data_size + key_size); uint32_t buckets_per_process = dht_size / (1 + data_size + key_size);
dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size + 1,
&poet::hashDHT); &poet::Murmur2_64A);
// extract needed values from sim_param struct // extract needed values from sim_param struct
// t_simparams tmp = params.getNumParams(); // t_simparams tmp = params.getNumParams();

View File

@ -1,4 +1,10 @@
// Time-stamp: "Last modified 2023-03-27 14:50:53 mluebke"
/* /*
**-----------------------------------------------------------------------------
** MurmurHash2 was written by Austin Appleby, and is placed in the public
** domain. The author hereby disclaims copyright to this source code.
**-----------------------------------------------------------------------------
**
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam) ** Potsdam)
** **
@ -25,6 +31,18 @@
#include <openssl/md5.h> #include <openssl/md5.h>
#include <stdexcept> #include <stdexcept>
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
// HACK: I know this is not a good practice, but this will do it for now! // HACK: I know this is not a good practice, but this will do it for now!
EVP_MD_CTX *ctx = NULL; EVP_MD_CTX *ctx = NULL;
@ -40,7 +58,7 @@ void poet::freeHashCtx() {
ctx = NULL; ctx = NULL;
} }
uint64_t poet::hashDHT(int key_size, void *key) { uint64_t poet::hashDHT(int key_size, const void *key) {
unsigned char sum[MD5_DIGEST_LENGTH]; unsigned char sum[MD5_DIGEST_LENGTH];
uint32_t md_len; uint32_t md_len;
uint64_t retval, *v1, *v2; uint64_t retval, *v1, *v2;
@ -60,3 +78,59 @@ uint64_t poet::hashDHT(int key_size, void *key) {
return retval; return retval;
} }
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
// objsize: 0x170-0x321: 433
uint64_t poet::Murmur2_64A(int len, const void *key) {
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = HASH_SEED ^ (len * m);
const uint64_t *data = (const uint64_t *)key;
const uint64_t *end = data + (len / 8);
while (data != end) {
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char *data2 = (const unsigned char *)data;
switch (len & 7) {
case 7:
h ^= uint64_t(data2[6]) << 48;
case 6:
h ^= uint64_t(data2[5]) << 40;
case 5:
h ^= uint64_t(data2[4]) << 32;
case 4:
h ^= uint64_t(data2[3]) << 24;
case 3:
h ^= uint64_t(data2[2]) << 16;
case 2:
h ^= uint64_t(data2[1]) << 8;
case 1:
h ^= uint64_t(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}