From f1a10a9b2264ff2a4ab5a511bca972520805c9e5 Mon Sep 17 00:00:00 2001 From: Max Luebke Date: Tue, 28 Mar 2023 14:27:00 +0200 Subject: [PATCH] fix: use Murmur hashing for DHT lookup --- include/poet/DHT.h | 4 +- include/poet/HashFunctions.hpp | 12 +++-- src/ChemistryModule/DHT.c | 2 +- src/ChemistryModule/DHT_Wrapper.cpp | 4 +- src/ChemistryModule/HashFunctions.cpp | 76 ++++++++++++++++++++++++++- 5 files changed, 88 insertions(+), 10 deletions(-) diff --git a/include/poet/DHT.h b/include/poet/DHT.h index 1acba5022..e8f8399f8 100644 --- a/include/poet/DHT.h +++ b/include/poet/DHT.h @@ -100,7 +100,7 @@ typedef struct { /** Size of the MPI communicator respectively all participating processes. */ int comm_size; /** Pointer to a hashfunction. */ - uint64_t (*hash_func)(int, void*); + uint64_t (*hash_func)(int, const void*); /** Pre-allocated memory where a bucket can be received. */ void* recv_entry; /** Pre-allocated memory where a bucket to send can be stored. */ @@ -143,7 +143,7 @@ typedef struct { */ extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process, unsigned int data_size, unsigned int key_size, - uint64_t (*hash_func)(int, void*)); + uint64_t (*hash_func)(int, const void*)); /** * @brief Write data into DHT. diff --git a/include/poet/HashFunctions.hpp b/include/poet/HashFunctions.hpp index 84ae38ed6..1a7b1c4f1 100644 --- a/include/poet/HashFunctions.hpp +++ b/include/poet/HashFunctions.hpp @@ -1,3 +1,5 @@ +// // Time-stamp: "Last modified 2023-03-27 14:51:05 mluebke" + /* ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of ** Potsdam) @@ -26,13 +28,15 @@ namespace poet { +// Sum of POET interpreted as ASCII +constexpr uint32_t HASH_SEED = 80 + 79 + 69 + 84; + void initHashCtx(const EVP_MD *md); void freeHashCtx(); -uint64_t hashDHT(int key_size, void *key); +uint64_t hashDHT(int key_size, const void *key); +uint64_t Murmur2_64A(int len, const void *key); - - -} +} // namespace poet #endif // HASHFUNCTIONS_H_ diff --git a/src/ChemistryModule/DHT.c b/src/ChemistryModule/DHT.c index 98cc0d435..485f50e18 100644 --- a/src/ChemistryModule/DHT.c +++ b/src/ChemistryModule/DHT.c @@ -52,7 +52,7 @@ static int read_flag(char flag_byte) { } DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size, - unsigned int key_size, uint64_t (*hash_func)(int, void *)) { + unsigned int key_size, uint64_t (*hash_func)(int, const void *)) { DHT *object; MPI_Win window; void *mem_alloc; diff --git a/src/ChemistryModule/DHT_Wrapper.cpp b/src/ChemistryModule/DHT_Wrapper.cpp index 9615b78f5..27e975f1a 100644 --- a/src/ChemistryModule/DHT_Wrapper.cpp +++ b/src/ChemistryModule/DHT_Wrapper.cpp @@ -76,8 +76,8 @@ DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size, uint32_t key_size = key_count * sizeof(DHT_Keyelement); uint32_t data_size = data_count * sizeof(double); uint32_t buckets_per_process = dht_size / (1 + data_size + key_size); - dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size, - &poet::hashDHT); + dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size + 1, + &poet::Murmur2_64A); // extract needed values from sim_param struct // t_simparams tmp = params.getNumParams(); diff --git a/src/ChemistryModule/HashFunctions.cpp b/src/ChemistryModule/HashFunctions.cpp index ee6393e14..79a8773cd 100644 --- a/src/ChemistryModule/HashFunctions.cpp +++ b/src/ChemistryModule/HashFunctions.cpp @@ -1,4 +1,10 @@ +// Time-stamp: "Last modified 2023-03-27 14:50:53 mluebke" /* +**----------------------------------------------------------------------------- +** MurmurHash2 was written by Austin Appleby, and is placed in the public +** domain. The author hereby disclaims copyright to this source code. +**----------------------------------------------------------------------------- +** ** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of ** Potsdam) ** @@ -25,6 +31,18 @@ #include #include +#if defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + // HACK: I know this is not a good practice, but this will do it for now! EVP_MD_CTX *ctx = NULL; @@ -40,7 +58,7 @@ void poet::freeHashCtx() { ctx = NULL; } -uint64_t poet::hashDHT(int key_size, void *key) { +uint64_t poet::hashDHT(int key_size, const void *key) { unsigned char sum[MD5_DIGEST_LENGTH]; uint32_t md_len; uint64_t retval, *v1, *v2; @@ -60,3 +78,59 @@ uint64_t poet::hashDHT(int key_size, void *key) { return retval; } + +//----------------------------------------------------------------------------- +// MurmurHash2, 64-bit versions, by Austin Appleby + +// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment +// and endian-ness issues if used across multiple platforms. + +// 64-bit hash for 64-bit platforms +// objsize: 0x170-0x321: 433 + +uint64_t poet::Murmur2_64A(int len, const void *key) { + const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); + const int r = 47; + + uint64_t h = HASH_SEED ^ (len * m); + + const uint64_t *data = (const uint64_t *)key; + const uint64_t *end = data + (len / 8); + + while (data != end) { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char *data2 = (const unsigned char *)data; + + switch (len & 7) { + case 7: + h ^= uint64_t(data2[6]) << 48; + case 6: + h ^= uint64_t(data2[5]) << 40; + case 5: + h ^= uint64_t(data2[4]) << 32; + case 4: + h ^= uint64_t(data2[3]) << 24; + case 3: + h ^= uint64_t(data2[2]) << 16; + case 2: + h ^= uint64_t(data2[1]) << 8; + case 1: + h ^= uint64_t(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +}