402 lines
8.4 KiB
C
402 lines
8.4 KiB
C
// Reference:
|
|
// https://abseil.io/about/design/swisstables#swiss-tables-design-notes
|
|
// https://github.com/gtmshrm/Swissmap
|
|
// https://github.com/Tessil/robin-map
|
|
// https://github.com/google/highwayhash
|
|
// https://peps.python.org/pep-0456
|
|
#include "hash_map.h"
|
|
|
|
#include <string.h>
|
|
|
|
#if defined(_WIN32)
|
|
#include <xmmintrin.h>
|
|
#define ALIGNED_ALLOC(size_in_bytes, aligment) _mm_malloc(size_in_bytes, aligment)
|
|
#else
|
|
#include <stdlib.h>
|
|
#define ALIGNED_ALLOC(size_in_bytes, aligment) aligned_alloc(aligment, size_in_bytes)
|
|
#endif /* _WIN32 */
|
|
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
#define LIKELY(x) __builtin_expect(!!(x), 1)
|
|
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
|
|
#else
|
|
#define LIKELY(x) (x)
|
|
#define UNLIKELY(x) (x)
|
|
#endif /* __GNUC__ || __clang__ */
|
|
|
|
// Based on XXHASH
|
|
#include "xxhash.h"
|
|
|
|
//static inline T getTombstone() noexcept;
|
|
//static inline T getEmpty() noexcept;
|
|
//static inline uint64_t hash(const T& key) noexcept;
|
|
//static inline bool isEqual(const T& lhs, const T& rhs) noexcept;
|
|
//static inline bool isValid(const T& key) noexcept;
|
|
|
|
struct BucketEntry
|
|
{
|
|
|
|
};
|
|
|
|
#define POW2UINT(v) do { \
|
|
(v)--; \
|
|
(v) |= (v) >> 1; \
|
|
(v) |= (v) >> 2; \
|
|
(v) |= (v) >> 4; \
|
|
(v) |= (v) >> 8; \
|
|
(v) |= (v) >> 16; \
|
|
(v)++; \
|
|
} while (0)
|
|
|
|
#define CACHE_LINE_SIZE ((size_t)(64))
|
|
|
|
#define ALIAN(cursor, aligment) (((size_t)(cursor) + ((size_t)(alignment) - 1)) & ~((size_t)(alignment) - 1))
|
|
#define ALIANOF(s, aligment) (((size_t)(s) + ((size_t)(alignment) - 1)) & ~((size_t)(alignment) - 1))
|
|
#define IS_POINTER_ALIGNED(cursor, alignment) (((uintpr_t)(cursor) & ((alignment) - 1)) == 0)
|
|
|
|
#define MAX(a,b) (a) > (b) ? (a) : (b)
|
|
|
|
static const size_t kPrimes[] = {
|
|
1u,
|
|
5u,
|
|
17u,
|
|
29u,
|
|
37u,
|
|
53u,
|
|
67u,
|
|
79u,
|
|
97u,
|
|
131u,
|
|
193u,
|
|
257u,
|
|
389u,
|
|
521u,
|
|
769u,
|
|
1031u,
|
|
1543u,
|
|
2053u,
|
|
3079u,
|
|
6151u,
|
|
12289u,
|
|
24593u,
|
|
49157u,
|
|
#if SIZE_MAX >= ULONG_MAX
|
|
98317ul,
|
|
196613ul,
|
|
393241ul,
|
|
786433ul,
|
|
1572869ul,
|
|
3145739ul,
|
|
6291469ul,
|
|
12582917ul,
|
|
25165843ul,
|
|
50331653ul,
|
|
100663319ul,
|
|
201326611ul,
|
|
402653189ul,
|
|
805306457ul,
|
|
1610612741ul,
|
|
3221225473ul,
|
|
4294967291ul,
|
|
#endif
|
|
#if SIZE_MAX >= ULLONG_MAX
|
|
6442450939ull,
|
|
12884901893ull,
|
|
25769803751ull,
|
|
51539607551ull,
|
|
103079215111ull,
|
|
206158430209ull,
|
|
412316860441ull,
|
|
824633720831ull,
|
|
1649267441651ull,
|
|
3298534883309ull,
|
|
6597069766657ull,
|
|
#endif
|
|
};
|
|
|
|
const uint32_t kMinNumberOfBuckets = 16;
|
|
const uint32_t kPrimeMinNumberOfBuckets = 5;
|
|
|
|
// Open Addressing Hash Table
|
|
struct HashMap
|
|
{
|
|
HashMapGrowthPolicy policy;
|
|
void *storage;
|
|
uint32_t num_of_buckets;
|
|
uint32_t num_of_elements;
|
|
int is_internal_storage;
|
|
};
|
|
|
|
/*
|
|
* SipHash-2-4
|
|
*/
|
|
#define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
|
|
|
|
#define HALF_ROUND(a,b,c,d,s,t) \
|
|
do { \
|
|
a += b; c += d; \
|
|
b = ROTATE(b, s) ^ a; \
|
|
d = ROTATE(d, t) ^ c; \
|
|
a = ROTATE(a, 32); \
|
|
} while (0)
|
|
|
|
#define DOUBLE_ROUND(v0,v1,v2,v3) \
|
|
do { \
|
|
HALF_ROUND(v0,v1,v2,v3,13,16); \
|
|
HALF_ROUND(v2,v1,v0,v3,17,21); \
|
|
HALF_ROUND(v0,v1,v2,v3,13,16); \
|
|
HALF_ROUND(v2,v1,v0,v3,17,21); \
|
|
} while (0)
|
|
|
|
|
|
uint64_t SipHash24(const void *src, unsigned long src_sz, const char key[16])
|
|
{
|
|
const uint64_t *_key = (uint64_t *)key;
|
|
uint64_t k0 = (uint64_t)(_key[0]);
|
|
uint64_t k1 = (uint64_t)(_key[1]);
|
|
uint64_t b = (uint64_t)src_sz << 56;
|
|
const uint64_t *in = (uint64_t*)src;
|
|
|
|
uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
|
|
uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
|
|
uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
|
|
uint64_t v3 = k1 ^ 0x7465646279746573ULL;
|
|
|
|
while (src_sz >= 8) {
|
|
uint64_t mi = (uint64_t)(*in);
|
|
in += 1;
|
|
src_sz -= 8;
|
|
v3 ^= mi;
|
|
DOUBLE_ROUND(v0,v1,v2,v3);
|
|
v0 ^= mi;
|
|
}
|
|
|
|
uint64_t t = 0;
|
|
uint8_t *pt = (uint8_t *)&t;
|
|
uint8_t *m = (uint8_t *)in;
|
|
|
|
switch (src_sz) {
|
|
case 7: pt[6] = m[6];
|
|
case 6: pt[5] = m[5];
|
|
case 5: pt[4] = m[4];
|
|
case 4: *((uint32_t*)&pt[0]) = *((uint32_t*)&m[0]); break;
|
|
case 3: pt[2] = m[2];
|
|
case 2: pt[1] = m[1];
|
|
case 1: pt[0] = m[0];
|
|
}
|
|
|
|
b |= (uint64_t)(t);
|
|
|
|
v3 ^= b;
|
|
DOUBLE_ROUND(v0,v1,v2,v3);
|
|
v0 ^= b;
|
|
v2 ^= 0xff;
|
|
DOUBLE_ROUND(v0,v1,v2,v3);
|
|
DOUBLE_ROUND(v0,v1,v2,v3);
|
|
|
|
return (v0 ^ v1) ^ (v2 ^ v3);
|
|
}
|
|
|
|
/*
|
|
* DJB2 Hash Function.
|
|
*/
|
|
size_t BytesHasher(unsigned char *str)
|
|
{
|
|
size_t hash = 5381;
|
|
int c;
|
|
|
|
while (c = *str++)
|
|
{
|
|
hash = ((hash << 5) + hash) + c;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
size_t UInt32Hasher(uint32_t v)
|
|
{
|
|
// fmix32 from MurmurHash by Austin Appleby (public domain)
|
|
v ^= v >> 16;
|
|
v *= 0x85ebca6b;
|
|
v ^= v >> 13;
|
|
v *= 0xc2b2ae35;
|
|
v ^= v >> 16;
|
|
return (size_t) v;
|
|
}
|
|
|
|
size_t UInt64Hasher(uint64_t v)
|
|
{
|
|
// fmix32 from MurmurHash by Austin Appleby (public domain)
|
|
v ^= v >> 33;
|
|
v *= (uint64_t) 0xff51afd7ed558ccdull;
|
|
v ^= v >> 33;
|
|
v *= (uint64_t) 0xc4ceb9fe1a85ec53ull;
|
|
v ^= v >> 33;
|
|
return (size_t) v;
|
|
}
|
|
|
|
size_t XXH128Cmp(const XXH128_hash_t *h1, const XXH128_hash_t *h2)
|
|
{
|
|
if (h1->high64 < h2->high64)
|
|
{
|
|
return 1;
|
|
}
|
|
else if (h1->high64 > h2->high64)
|
|
{
|
|
return 0;
|
|
}
|
|
else if (h1->low64 < h2->low64)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void HashCombine(size_t seed, size_t value)
|
|
{
|
|
/// From CityHash (https://github.com/google/cityhash)
|
|
const size_t mult = 0x9ddfea08eb382d69ull;
|
|
size_t a = (value ^ seed) * mult;
|
|
a ^= (a >> 47);
|
|
size_t b = (seed ^ a) * mult;
|
|
b ^= (b >> 47);
|
|
seed = b * mult;
|
|
}
|
|
|
|
size_t Hash64WithSeed(const void *ptr, size_t size, size_t seed)
|
|
{
|
|
return (size_t)XXH3_64bits_withSeed(ptr, size, (unsigned long long)seed);
|
|
}
|
|
|
|
size_t Hash64(const void *ptr, size_t size)
|
|
{
|
|
return (size_t)XXH3_64bits(ptr, size);
|
|
}
|
|
|
|
size_t HashStrWithSeed(const char *str, size_t seed)
|
|
{
|
|
return Hash64WithSeed(str, strlen(str), seed);
|
|
}
|
|
|
|
size_t HashStr(const char *str)
|
|
{
|
|
return Hash64(str, strlen(str));
|
|
}
|
|
|
|
// XXH128_hash_t HashKernel(const char *str)
|
|
// {
|
|
// const char *offset = strstr(str, "body:");
|
|
// if (UNLIKELY(!offset))
|
|
// {
|
|
// offset = strchr(str, '{');
|
|
// if (UNLIKELY(!offset))
|
|
// assert(!"hash_kernel(): invalid input!");
|
|
// }
|
|
//
|
|
// return XXH128(offset, strlen(offset), 0);
|
|
// }
|
|
|
|
HashMap *HashMap_Create(uint32_t num)
|
|
{
|
|
HashMap *o = (HashMap *)malloc(sizeof(HashMap));
|
|
|
|
o->storage = NULL;
|
|
|
|
num = (num < kMinNumberOfBuckets) ? kMinNumberOfBuckets : num;
|
|
|
|
size_t num_bytes = sizeof(TItem) * num;
|
|
// note: 64 to match CPU cache line size
|
|
size_t alignment = MAX(ALIANOF(TItem, CACHE_LINE_SIZE), CACHE_LINE_SIZE);
|
|
num_bytes = ALIGN(num_bytes, alignment);
|
|
assert((num_bytes % alignment) == 0);
|
|
|
|
void* raw = ALIGNED_ALLOC(num_bytes, alignment);
|
|
assert(raw);
|
|
|
|
o->storage = raw;
|
|
EXLBR_ASSERT(IS_POINTER_ALIGNED(o->storage, ALIGNOF(TItem)));
|
|
|
|
o->num_of_buckets = num;
|
|
o->num_of_elements = 0;
|
|
|
|
TItem* item = o->storage;
|
|
const TItem* end_item = item + num;
|
|
|
|
for (; item != end_item; item++)
|
|
{
|
|
(item, TKeyInfo::getEmpty());
|
|
}
|
|
|
|
return o;
|
|
}
|
|
|
|
void *HashMap_Find(HashMap *o, void *key)
|
|
{
|
|
const uint32_t num = o->num_of_buckets;
|
|
TItem* const first = o->storage;
|
|
TItem* const end = first + num;
|
|
const uint64_t hash_value = TKeyInfo::hash(key);
|
|
uint32_t bucketIndex = uint32_t(hashValue) & (numBuckets - 1);
|
|
TItem* startItem = firstItem + bucketIndex;
|
|
TItem* EXLBR_RESTRICT currentItem = startItem;
|
|
do
|
|
{
|
|
if (EXLBR_LIKELY(currentItem->isEqual(key)))
|
|
{
|
|
return currentItem;
|
|
}
|
|
|
|
if (currentItem->isEmpty())
|
|
{
|
|
return endItem;
|
|
}
|
|
|
|
currentItem++;
|
|
currentItem = (currentItem == endItem) ? firstItem : currentItem;
|
|
} while (currentItem != startItem);
|
|
return endItem;
|
|
}
|
|
|
|
uint32_t HashMap_Size(HashMap *o)
|
|
{
|
|
return o->num_of_elements;
|
|
}
|
|
|
|
uint32_t HashMap_Capacity(HashMap *o)
|
|
{
|
|
return o->num_of_buckets;
|
|
}
|
|
|
|
int HashMap_Empty(HashMap *o)
|
|
{
|
|
return o->num_of_elements == 0 ? 1 : 0;
|
|
}
|
|
|
|
int HashMap_Reserve(HashMap *o, size_t num)
|
|
{
|
|
if (num == 0 || num < o->num_of_buckets)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
POW2INT(num);
|
|
|
|
size_t num_buckets = num_of_buckets;
|
|
TItem* storage = o->storage;
|
|
TItem* item = storage;
|
|
const TItem* enditem = item + num_buckets;
|
|
bool isInlineStorage = isUsingInlineStorage();
|
|
|
|
num = HashMap_Create(num);
|
|
|
|
HashMap_Reinsert(o, num, item, enditem);
|
|
|
|
if (!o->is_internal_storage)
|
|
{
|
|
EXLBR_FREE(storage);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|