mirror of
https://git.gfz-potsdam.de/naaice/poet.git
synced 2025-12-16 04:48:23 +01:00
Merge branch 'fix-hashing' into 'main'
fix: use Murmur hashing for DHT lookup See merge request naaice/poet!2
This commit is contained in:
commit
76dcdf400e
@ -100,7 +100,7 @@ typedef struct {
|
|||||||
/** Size of the MPI communicator respectively all participating processes. */
|
/** Size of the MPI communicator respectively all participating processes. */
|
||||||
int comm_size;
|
int comm_size;
|
||||||
/** Pointer to a hashfunction. */
|
/** Pointer to a hashfunction. */
|
||||||
uint64_t (*hash_func)(int, void*);
|
uint64_t (*hash_func)(int, const void*);
|
||||||
/** Pre-allocated memory where a bucket can be received. */
|
/** Pre-allocated memory where a bucket can be received. */
|
||||||
void* recv_entry;
|
void* recv_entry;
|
||||||
/** Pre-allocated memory where a bucket to send can be stored. */
|
/** Pre-allocated memory where a bucket to send can be stored. */
|
||||||
@ -143,7 +143,7 @@ typedef struct {
|
|||||||
*/
|
*/
|
||||||
extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process,
|
extern DHT* DHT_create(MPI_Comm comm, uint64_t size_per_process,
|
||||||
unsigned int data_size, unsigned int key_size,
|
unsigned int data_size, unsigned int key_size,
|
||||||
uint64_t (*hash_func)(int, void*));
|
uint64_t (*hash_func)(int, const void*));
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Write data into DHT.
|
* @brief Write data into DHT.
|
||||||
|
|||||||
@ -1,3 +1,5 @@
|
|||||||
|
// // Time-stamp: "Last modified 2023-03-27 14:51:05 mluebke"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
|
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
|
||||||
** Potsdam)
|
** Potsdam)
|
||||||
@ -26,13 +28,15 @@
|
|||||||
|
|
||||||
namespace poet {
|
namespace poet {
|
||||||
|
|
||||||
|
// Sum of POET interpreted as ASCII
|
||||||
|
constexpr uint32_t HASH_SEED = 80 + 79 + 69 + 84;
|
||||||
|
|
||||||
void initHashCtx(const EVP_MD *md);
|
void initHashCtx(const EVP_MD *md);
|
||||||
void freeHashCtx();
|
void freeHashCtx();
|
||||||
|
|
||||||
uint64_t hashDHT(int key_size, void *key);
|
uint64_t hashDHT(int key_size, const void *key);
|
||||||
|
uint64_t Murmur2_64A(int len, const void *key);
|
||||||
|
|
||||||
|
} // namespace poet
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // HASHFUNCTIONS_H_
|
#endif // HASHFUNCTIONS_H_
|
||||||
|
|||||||
@ -52,7 +52,7 @@ static int read_flag(char flag_byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size,
|
DHT *DHT_create(MPI_Comm comm, uint64_t size, unsigned int data_size,
|
||||||
unsigned int key_size, uint64_t (*hash_func)(int, void *)) {
|
unsigned int key_size, uint64_t (*hash_func)(int, const void *)) {
|
||||||
DHT *object;
|
DHT *object;
|
||||||
MPI_Win window;
|
MPI_Win window;
|
||||||
void *mem_alloc;
|
void *mem_alloc;
|
||||||
|
|||||||
@ -76,8 +76,8 @@ DHT_Wrapper::DHT_Wrapper(MPI_Comm dht_comm, uint32_t dht_size,
|
|||||||
uint32_t key_size = key_count * sizeof(DHT_Keyelement);
|
uint32_t key_size = key_count * sizeof(DHT_Keyelement);
|
||||||
uint32_t data_size = data_count * sizeof(double);
|
uint32_t data_size = data_count * sizeof(double);
|
||||||
uint32_t buckets_per_process = dht_size / (1 + data_size + key_size);
|
uint32_t buckets_per_process = dht_size / (1 + data_size + key_size);
|
||||||
dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size,
|
dht_object = DHT_create(dht_comm, buckets_per_process, data_size, key_size + 1,
|
||||||
&poet::hashDHT);
|
&poet::Murmur2_64A);
|
||||||
|
|
||||||
// extract needed values from sim_param struct
|
// extract needed values from sim_param struct
|
||||||
// t_simparams tmp = params.getNumParams();
|
// t_simparams tmp = params.getNumParams();
|
||||||
|
|||||||
@ -1,4 +1,10 @@
|
|||||||
|
// Time-stamp: "Last modified 2023-03-27 14:50:53 mluebke"
|
||||||
/*
|
/*
|
||||||
|
**-----------------------------------------------------------------------------
|
||||||
|
** MurmurHash2 was written by Austin Appleby, and is placed in the public
|
||||||
|
** domain. The author hereby disclaims copyright to this source code.
|
||||||
|
**-----------------------------------------------------------------------------
|
||||||
|
**
|
||||||
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
|
** Copyright (C) 2018-2021 Alexander Lindemann, Max Luebke (University of
|
||||||
** Potsdam)
|
** Potsdam)
|
||||||
**
|
**
|
||||||
@ -25,6 +31,18 @@
|
|||||||
#include <openssl/md5.h>
|
#include <openssl/md5.h>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
|
||||||
|
#define BIG_CONSTANT(x) (x)
|
||||||
|
|
||||||
|
// Other compilers
|
||||||
|
|
||||||
|
#else // defined(_MSC_VER)
|
||||||
|
|
||||||
|
#define BIG_CONSTANT(x) (x##LLU)
|
||||||
|
|
||||||
|
#endif // !defined(_MSC_VER)
|
||||||
|
|
||||||
// HACK: I know this is not a good practice, but this will do it for now!
|
// HACK: I know this is not a good practice, but this will do it for now!
|
||||||
EVP_MD_CTX *ctx = NULL;
|
EVP_MD_CTX *ctx = NULL;
|
||||||
|
|
||||||
@ -40,7 +58,7 @@ void poet::freeHashCtx() {
|
|||||||
ctx = NULL;
|
ctx = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t poet::hashDHT(int key_size, void *key) {
|
uint64_t poet::hashDHT(int key_size, const void *key) {
|
||||||
unsigned char sum[MD5_DIGEST_LENGTH];
|
unsigned char sum[MD5_DIGEST_LENGTH];
|
||||||
uint32_t md_len;
|
uint32_t md_len;
|
||||||
uint64_t retval, *v1, *v2;
|
uint64_t retval, *v1, *v2;
|
||||||
@ -60,3 +78,59 @@ uint64_t poet::hashDHT(int key_size, void *key) {
|
|||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
// MurmurHash2, 64-bit versions, by Austin Appleby
|
||||||
|
|
||||||
|
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
|
||||||
|
// and endian-ness issues if used across multiple platforms.
|
||||||
|
|
||||||
|
// 64-bit hash for 64-bit platforms
|
||||||
|
// objsize: 0x170-0x321: 433
|
||||||
|
|
||||||
|
uint64_t poet::Murmur2_64A(int len, const void *key) {
|
||||||
|
const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
|
||||||
|
const int r = 47;
|
||||||
|
|
||||||
|
uint64_t h = HASH_SEED ^ (len * m);
|
||||||
|
|
||||||
|
const uint64_t *data = (const uint64_t *)key;
|
||||||
|
const uint64_t *end = data + (len / 8);
|
||||||
|
|
||||||
|
while (data != end) {
|
||||||
|
uint64_t k = *data++;
|
||||||
|
|
||||||
|
k *= m;
|
||||||
|
k ^= k >> r;
|
||||||
|
k *= m;
|
||||||
|
|
||||||
|
h ^= k;
|
||||||
|
h *= m;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned char *data2 = (const unsigned char *)data;
|
||||||
|
|
||||||
|
switch (len & 7) {
|
||||||
|
case 7:
|
||||||
|
h ^= uint64_t(data2[6]) << 48;
|
||||||
|
case 6:
|
||||||
|
h ^= uint64_t(data2[5]) << 40;
|
||||||
|
case 5:
|
||||||
|
h ^= uint64_t(data2[4]) << 32;
|
||||||
|
case 4:
|
||||||
|
h ^= uint64_t(data2[3]) << 24;
|
||||||
|
case 3:
|
||||||
|
h ^= uint64_t(data2[2]) << 16;
|
||||||
|
case 2:
|
||||||
|
h ^= uint64_t(data2[1]) << 8;
|
||||||
|
case 1:
|
||||||
|
h ^= uint64_t(data2[0]);
|
||||||
|
h *= m;
|
||||||
|
};
|
||||||
|
|
||||||
|
h ^= h >> r;
|
||||||
|
h *= m;
|
||||||
|
h ^= h >> r;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user