Merge branch 'fix-hashing' into 'main'

fix: use Murmur hashing for DHT lookup

See merge request naaice/poet!2
This commit is contained in:
Max Lübke 2023-03-28 14:31:59 +02:00
commit 810f16f64c
5 changed files with 88 additions and 10 deletions

View File

@ -100,7 +100,7 @@ typedef struct {
/** Size of the MPI communicator respectively all participating processes. */
int comm_size;
/** Pointer to a hashfunction. */
uint64_t (*hash_func)(int, void*);
uint64_t (*hash_func)(int, const void*);
/** Pre-allocated memory where a bucket can be received. */
void* recv_entry;
/** Pre-allocated memory where a bucket to send can be stored. */
@ -143,7 +143,7 @@ typedef struct {
*/
extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process,
unsigned int data_size, unsigned int key_size,
uint64_t (*hash_func)(int, void*));
uint64_t (*hash_func)(int, const void*));
/**
* @brief Write data into DHT.

View File

@ -1,3 +1,5 @@
// // Time-stamp: "Last modified 2023-03-27 14:51:05 mluebke"
/*
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
@ -26,13 +28,15 @@
namespace poet {
// Sum of POET interpreted as ASCII
constexpr uint32_t HASH_SEED = 80 + 79 + 69 + 84;
void initHashCtx(const EVP_MD *md);
void freeHashCtx();
uint64_t hashDHT(int key_size, void *key);
uint64_t hashDHT(int key_size, const void *key);
uint64_t Murmur2_64A(int len, const void *key);
}
} // namespace poet
#endif // HASHFUNCTIONS_H_

View File

@ -52,7 +52,7 @@ static int read_flag(char flag_byte) {
}
DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size,
unsigned int key_size, uint64_t (*hash_func)(int, void *)) {
unsigned int key_size, uint64_t (*hash_func)(int, const void *)) {
DHT *object;
MPI_Win window;
void *mem_alloc;

View File

@ -76,8 +76,8 @@ DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size,
uint32_t key_size = key_count * sizeof(DHT_Keyelement);
uint32_t data_size = data_count * sizeof(double);
uint32_t buckets_per_process = dht_size / (1 + data_size + key_size);
dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size,
&poet::hashDHT);
dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size + 1,
&poet::Murmur2_64A);
// extract needed values from sim_param struct
// t_simparams tmp = params.getNumParams();

View File

@ -1,4 +1,10 @@
// Time-stamp: "Last modified 2023-03-27 14:50:53 mluebke"
/*
**-----------------------------------------------------------------------------
** MurmurHash2 was written by Austin Appleby, and is placed in the public
** domain. The author hereby disclaims copyright to this source code.
**-----------------------------------------------------------------------------
**
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
** Potsdam)
**
@ -25,6 +31,18 @@
#include <openssl/md5.h>
#include <stdexcept>
#if defined(_MSC_VER)
#define BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
// HACK: I know this is not a good practice, but this will do it for now!
EVP_MD_CTX *ctx = NULL;
@ -40,7 +58,7 @@ void poet::freeHashCtx() {
ctx = NULL;
}
uint64_t poet::hashDHT(int key_size, void *key) {
uint64_t poet::hashDHT(int key_size, const void *key) {
unsigned char sum[MD5_DIGEST_LENGTH];
uint32_t md_len;
uint64_t retval, *v1, *v2;
@ -60,3 +78,59 @@ uint64_t poet::hashDHT(int key_size, void *key) {
return retval;
}
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
// objsize: 0x170-0x321: 433
uint64_t poet::Murmur2_64A(int len, const void *key) {
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
const int r = 47;
uint64_t h = HASH_SEED ^ (len * m);
const uint64_t *data = (const uint64_t *)key;
const uint64_t *end = data + (len / 8);
while (data != end) {
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char *data2 = (const unsigned char *)data;
switch (len & 7) {
case 7:
h ^= uint64_t(data2[6]) << 48;
case 6:
h ^= uint64_t(data2[5]) << 40;
case 5:
h ^= uint64_t(data2[4]) << 32;
case 4:
h ^= uint64_t(data2[3]) << 24;
case 3:
h ^= uint64_t(data2[2]) << 16;
case 2:
h ^= uint64_t(data2[1]) << 8;
case 1:
h ^= uint64_t(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}