toys/simple-http-server-c/source/hash_map.c

402 lines
8.4 KiB
C

// Reference:
// https://abseil.io/about/design/swisstables#swiss-tables-design-notes
// https://github.com/gtmshrm/Swissmap
// https://github.com/Tessil/robin-map
// https://github.com/google/highwayhash
// https://peps.python.org/pep-0456
#include "hash_map.h"
#include <string.h>
#if defined(_WIN32)
#include <xmmintrin.h>
#define ALIGNED_ALLOC(size_in_bytes, aligment) _mm_malloc(size_in_bytes, aligment)
#else
#include <stdlib.h>
#define ALIGNED_ALLOC(size_in_bytes, aligment) aligned_alloc(aligment, size_in_bytes)
#endif /* _WIN32 */
#if defined(__GNUC__) || defined(__clang__)
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif /* __GNUC__ || __clang__ */
// Based on XXHASH
#include "xxhash.h"
//static inline T getTombstone() noexcept;
//static inline T getEmpty() noexcept;
//static inline uint64_t hash(const T& key) noexcept;
//static inline bool isEqual(const T& lhs, const T& rhs) noexcept;
//static inline bool isValid(const T& key) noexcept;
struct BucketEntry
{
};
#define POW2UINT(v) do { \
(v)--; \
(v) |= (v) >> 1; \
(v) |= (v) >> 2; \
(v) |= (v) >> 4; \
(v) |= (v) >> 8; \
(v) |= (v) >> 16; \
(v)++; \
} while (0)
#define CACHE_LINE_SIZE ((size_t)(64))
#define ALIAN(cursor, aligment) (((size_t)(cursor) + ((size_t)(alignment) - 1)) & ~((size_t)(alignment) - 1))
#define ALIANOF(s, aligment) (((size_t)(s) + ((size_t)(alignment) - 1)) & ~((size_t)(alignment) - 1))
#define IS_POINTER_ALIGNED(cursor, alignment) (((uintpr_t)(cursor) & ((alignment) - 1)) == 0)
#define MAX(a,b) (a) > (b) ? (a) : (b)
static const size_t kPrimes[] = {
1u,
5u,
17u,
29u,
37u,
53u,
67u,
79u,
97u,
131u,
193u,
257u,
389u,
521u,
769u,
1031u,
1543u,
2053u,
3079u,
6151u,
12289u,
24593u,
49157u,
#if SIZE_MAX >= ULONG_MAX
98317ul,
196613ul,
393241ul,
786433ul,
1572869ul,
3145739ul,
6291469ul,
12582917ul,
25165843ul,
50331653ul,
100663319ul,
201326611ul,
402653189ul,
805306457ul,
1610612741ul,
3221225473ul,
4294967291ul,
#endif
#if SIZE_MAX >= ULLONG_MAX
6442450939ull,
12884901893ull,
25769803751ull,
51539607551ull,
103079215111ull,
206158430209ull,
412316860441ull,
824633720831ull,
1649267441651ull,
3298534883309ull,
6597069766657ull,
#endif
};
const uint32_t kMinNumberOfBuckets = 16;
const uint32_t kPrimeMinNumberOfBuckets = 5;
// Open Addressing Hash Table
struct HashMap
{
HashMapGrowthPolicy policy;
void *storage;
uint32_t num_of_buckets;
uint32_t num_of_elements;
int is_internal_storage;
};
/*
* SipHash-2-4
*/
#define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
#define HALF_ROUND(a,b,c,d,s,t) \
do { \
a += b; c += d; \
b = ROTATE(b, s) ^ a; \
d = ROTATE(d, t) ^ c; \
a = ROTATE(a, 32); \
} while (0)
#define DOUBLE_ROUND(v0,v1,v2,v3) \
do { \
HALF_ROUND(v0,v1,v2,v3,13,16); \
HALF_ROUND(v2,v1,v0,v3,17,21); \
HALF_ROUND(v0,v1,v2,v3,13,16); \
HALF_ROUND(v2,v1,v0,v3,17,21); \
} while (0)
uint64_t SipHash24(const void *src, unsigned long src_sz, const char key[16])
{
const uint64_t *_key = (uint64_t *)key;
uint64_t k0 = (uint64_t)(_key[0]);
uint64_t k1 = (uint64_t)(_key[1]);
uint64_t b = (uint64_t)src_sz << 56;
const uint64_t *in = (uint64_t*)src;
uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
uint64_t v3 = k1 ^ 0x7465646279746573ULL;
while (src_sz >= 8) {
uint64_t mi = (uint64_t)(*in);
in += 1;
src_sz -= 8;
v3 ^= mi;
DOUBLE_ROUND(v0,v1,v2,v3);
v0 ^= mi;
}
uint64_t t = 0;
uint8_t *pt = (uint8_t *)&t;
uint8_t *m = (uint8_t *)in;
switch (src_sz) {
case 7: pt[6] = m[6];
case 6: pt[5] = m[5];
case 5: pt[4] = m[4];
case 4: *((uint32_t*)&pt[0]) = *((uint32_t*)&m[0]); break;
case 3: pt[2] = m[2];
case 2: pt[1] = m[1];
case 1: pt[0] = m[0];
}
b |= (uint64_t)(t);
v3 ^= b;
DOUBLE_ROUND(v0,v1,v2,v3);
v0 ^= b;
v2 ^= 0xff;
DOUBLE_ROUND(v0,v1,v2,v3);
DOUBLE_ROUND(v0,v1,v2,v3);
return (v0 ^ v1) ^ (v2 ^ v3);
}
/*
* DJB2 Hash Function.
*/
size_t BytesHasher(unsigned char *str)
{
size_t hash = 5381;
int c;
while (c = *str++)
{
hash = ((hash << 5) + hash) + c;
}
return hash;
}
size_t UInt32Hasher(uint32_t v)
{
// fmix32 from MurmurHash by Austin Appleby (public domain)
v ^= v >> 16;
v *= 0x85ebca6b;
v ^= v >> 13;
v *= 0xc2b2ae35;
v ^= v >> 16;
return (size_t) v;
}
size_t UInt64Hasher(uint64_t v)
{
// fmix32 from MurmurHash by Austin Appleby (public domain)
v ^= v >> 33;
v *= (uint64_t) 0xff51afd7ed558ccdull;
v ^= v >> 33;
v *= (uint64_t) 0xc4ceb9fe1a85ec53ull;
v ^= v >> 33;
return (size_t) v;
}
size_t XXH128Cmp(const XXH128_hash_t *h1, const XXH128_hash_t *h2)
{
if (h1->high64 < h2->high64)
{
return 1;
}
else if (h1->high64 > h2->high64)
{
return 0;
}
else if (h1->low64 < h2->low64)
{
return 1;
}
return 0;
}
void HashCombine(size_t seed, size_t value)
{
/// From CityHash (https://github.com/google/cityhash)
const size_t mult = 0x9ddfea08eb382d69ull;
size_t a = (value ^ seed) * mult;
a ^= (a >> 47);
size_t b = (seed ^ a) * mult;
b ^= (b >> 47);
seed = b * mult;
}
size_t Hash64WithSeed(const void *ptr, size_t size, size_t seed)
{
return (size_t)XXH3_64bits_withSeed(ptr, size, (unsigned long long)seed);
}
size_t Hash64(const void *ptr, size_t size)
{
return (size_t)XXH3_64bits(ptr, size);
}
size_t HashStrWithSeed(const char *str, size_t seed)
{
return Hash64WithSeed(str, strlen(str), seed);
}
size_t HashStr(const char *str)
{
return Hash64(str, strlen(str));
}
// XXH128_hash_t HashKernel(const char *str)
// {
// const char *offset = strstr(str, "body:");
// if (UNLIKELY(!offset))
// {
// offset = strchr(str, '{');
// if (UNLIKELY(!offset))
// assert(!"hash_kernel(): invalid input!");
// }
//
// return XXH128(offset, strlen(offset), 0);
// }
HashMap *HashMap_Create(uint32_t num)
{
HashMap *o = (HashMap *)malloc(sizeof(HashMap));
o->storage = NULL;
num = (num < kMinNumberOfBuckets) ? kMinNumberOfBuckets : num;
size_t num_bytes = sizeof(TItem) * num;
// note: 64 to match CPU cache line size
size_t alignment = MAX(ALIANOF(TItem, CACHE_LINE_SIZE), CACHE_LINE_SIZE);
num_bytes = ALIGN(num_bytes, alignment);
assert((num_bytes % alignment) == 0);
void* raw = ALIGNED_ALLOC(num_bytes, alignment);
assert(raw);
o->storage = raw;
EXLBR_ASSERT(IS_POINTER_ALIGNED(o->storage, ALIGNOF(TItem)));
o->num_of_buckets = num;
o->num_of_elements = 0;
TItem* item = o->storage;
const TItem* end_item = item + num;
for (; item != end_item; item++)
{
(item, TKeyInfo::getEmpty());
}
return o;
}
void *HashMap_Find(HashMap *o, void *key)
{
const uint32_t num = o->num_of_buckets;
TItem* const first = o->storage;
TItem* const end = first + num;
const uint64_t hash_value = TKeyInfo::hash(key);
uint32_t bucketIndex = uint32_t(hashValue) & (numBuckets - 1);
TItem* startItem = firstItem + bucketIndex;
TItem* EXLBR_RESTRICT currentItem = startItem;
do
{
if (EXLBR_LIKELY(currentItem->isEqual(key)))
{
return currentItem;
}
if (currentItem->isEmpty())
{
return endItem;
}
currentItem++;
currentItem = (currentItem == endItem) ? firstItem : currentItem;
} while (currentItem != startItem);
return endItem;
}
uint32_t HashMap_Size(HashMap *o)
{
return o->num_of_elements;
}
uint32_t HashMap_Capacity(HashMap *o)
{
return o->num_of_buckets;
}
int HashMap_Empty(HashMap *o)
{
return o->num_of_elements == 0 ? 1 : 0;
}
int HashMap_Reserve(HashMap *o, size_t num)
{
if (num == 0 || num < o->num_of_buckets)
{
return 0;
}
POW2INT(num);
size_t num_buckets = num_of_buckets;
TItem* storage = o->storage;
TItem* item = storage;
const TItem* enditem = item + num_buckets;
bool isInlineStorage = isUsingInlineStorage();
num = HashMap_Create(num);
HashMap_Reinsert(o, num, item, enditem);
if (!o->is_internal_storage)
{
EXLBR_FREE(storage);
}
return 1;
}