map: use strcmp and update wyhash

pull/4289/head
ka-weihe 2020-04-08 00:02:15 +02:00 committed by GitHub
parent 59ac0bd46b
commit 7406abe3b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 77 additions and 44 deletions

View File

@ -7,6 +7,9 @@ import (
strings strings
hash.wyhash hash.wyhash
) )
fn C.strcmp(byteptr, byteptr) int
/* /*
This is a very fast hashmap implementation. It has several properties that in This is a very fast hashmap implementation. It has several properties that in
combination makes it very fast. Here is a short explanation of each property. combination makes it very fast. Here is a short explanation of each property.
@ -241,7 +244,7 @@ fn (m mut map) set(key string, value voidptr) {
// While we might have a match // While we might have a match
for meta == m.metas[index] { for meta == m.metas[index] {
kv_index := m.metas[index + 1] kv_index := m.metas[index + 1]
if key == m.key_values.data[kv_index].key { if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
C.memcpy(m.key_values.data[kv_index].value, value, m.value_bytes) C.memcpy(m.key_values.data[kv_index].value, value, m.value_bytes)
return return
} }
@ -317,7 +320,7 @@ fn (m map) get3(key string, zero voidptr) voidptr {
index,meta = m.meta_less(index, meta) index,meta = m.meta_less(index, meta)
for meta == m.metas[index] { for meta == m.metas[index] {
kv_index := m.metas[index + 1] kv_index := m.metas[index + 1]
if key == m.key_values.data[kv_index].key { if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
out := malloc(m.value_bytes) out := malloc(m.value_bytes)
C.memcpy(out, m.key_values.data[kv_index].value, m.value_bytes) C.memcpy(out, m.key_values.data[kv_index].value, m.value_bytes)
return out return out
@ -336,7 +339,7 @@ fn (m map) exists(key string) bool {
index,meta = m.meta_less(index, meta) index,meta = m.meta_less(index, meta)
for meta == m.metas[index] { for meta == m.metas[index] {
kv_index := m.metas[index + 1] kv_index := m.metas[index + 1]
if key == m.key_values.data[kv_index].key { if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
return true return true
} }
index += 2 index += 2
@ -351,7 +354,7 @@ pub fn (m mut map) delete(key string) {
// Perform backwards shifting // Perform backwards shifting
for meta == m.metas[index] { for meta == m.metas[index] {
kv_index := m.metas[index + 1] kv_index := m.metas[index + 1]
if key == m.key_values.data[kv_index].key { if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
for (m.metas[index + 2]>>hashbits) > 1 { for (m.metas[index + 2]>>hashbits) > 1 {
m.metas[index] = m.metas[index + 2] - probe_inc m.metas[index] = m.metas[index + 2] - probe_inc
m.metas[index + 1] = m.metas[index + 3] m.metas[index + 1] = m.metas[index + 3]

View File

@ -212,12 +212,22 @@ void _vcleanup();
#define _ARR_LEN(a) ( (sizeof(a)) / (sizeof(a[0])) ) #define _ARR_LEN(a) ( (sizeof(a)) / (sizeof(a[0])) )
// ============== wyhash ============== // ============== wyhash ==============
//Author: Wang Yi //Author: Wang Yi
#ifndef wyhash_version_gamma #ifndef wyhash_version_gamma
#define wyhash_version_gamma #define wyhash_version_gamma
#define WYHASH_CONDOM 0
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
const uint64_t _wyp0=0xa0761d6478bd642full, _wyp1=0xe7037ed1a0b428dbull; const uint64_t _wyp0=0xa0761d6478bd642full, _wyp1=0xe7037ed1a0b428dbull;
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
#define _likely_(x) __builtin_expect(x, 1)
#else
#define _likely_(x) (x)
#endif
#ifndef WYHASH_LITTLE_ENDIAN #ifndef WYHASH_LITTLE_ENDIAN
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define WYHASH_LITTLE_ENDIAN 1 #define WYHASH_LITTLE_ENDIAN 1
@ -226,58 +236,78 @@ const uint64_t _wyp0=0xa0761d6478bd642full, _wyp1=0xe7037ed1a0b428dbull;
#endif #endif
#endif #endif
#if (WYHASH_LITTLE_ENDIAN) #if (WYHASH_LITTLE_ENDIAN)
static inline uint64_t _wyread64(const uint8_t *p){ uint64_t v; memcpy(&v, p, 8); return v;} static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
#else #else
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
static inline uint64_t _wyread64(const uint8_t *p){ uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);} static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
static inline uint64_t _wyread64(const uint8_t *p){ uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);} static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
#endif
#endif
static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];}
static inline uint64_t _wyrotr(uint64_t v, unsigned k) { return (v >> k) | (v << (64 - k));}
static inline void _wymix128(uint64_t A, uint64_t B, uint64_t *C, uint64_t *D){
A^=*C; B^=*D;
#ifdef UNOFFICIAL_WYHASH_32BIT
uint64_t hh=(A>>32)*(B>>32), hl=(A>>32)*(unsigned)B, lh=(unsigned)A*(B>>32), ll=(uint64_t)(unsigned)A*(unsigned)B;
*C=_wyrotr(hl,32)^hh; *D=_wyrotr(lh,32)^ll;
#else #else
static inline uint64_t _wyread64(const uint8_t *p){ uint64_t v; memcpy(&v, p, 8); return v;} #ifdef __SIZEOF_INT128__
__uint128_t r=A; r*=B; *C=(uint64_t)r; *D=(uint64_t)(r>>64);
#elif defined(_MSC_VER) && defined(_M_X64)
A=_umul128(A,B,&B); *C=A; *D=B;
#else
uint64_t ha=A>>32, hb=B>>32, la=(uint32_t)A, lb=(uint32_t)B, hi, lo;
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
*C=lo; *D=hi;
#endif #endif
#endif #endif
}
static inline uint64_t _wyrot32(uint64_t x){ return (x>>32)|(x<<32); }
static inline uint64_t _wymix64(uint64_t x){ return x*_wyrot32(x); }
static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed){ static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed){
const uint8_t *p=(const uint8_t *)key; const uint8_t *p=(const uint8_t *)key;
uint64_t see1=seed, len0=len; uint64_t i=len, see1=seed;
for(;len>16;len-=16,p+=16){ start:
seed=_wymix64(_wyread64(p)^seed^_wyp0); if(_likely_(i<=16)){
see1=_wymix64(_wyread64(p+8)^see1^_wyp1); #ifndef WYHASH_CONDOM
uint64_t shift=(i<8)*((8-i)<<3);
//WARNING: intended reading outside buffer, trading for speed.
_wymix128((_wyr8(p)<<shift)^_wyp0,(_wyr8(p+i-8)>>shift)^_wyp1, &seed, &see1);
#else
if(_likely_(i<=8)){
if(_likely_(i>=4)) _wymix128(_wyr4(p)^_wyp0,_wyr4(p+i-4)^_wyp1, &seed, &see1);
else if (_likely_(i)) _wymix128(_wyr3(p,i)^_wyp0,_wyp1, &seed, &see1);
else _wymix128(_wyp0,_wyp1, &seed, &see1);
}
else _wymix128(_wyr8(p)^_wyp0,_wyr8(p+i-8)^_wyp1, &seed, &see1);
#endif
_wymix128(len,_wyp0, &seed, &see1);
return seed^see1;
} }
//intended unsafe read, trade for great speed. _wymix128(_wyr8(p)^_wyp0,_wyr8(p+8)^_wyp1, &seed, &see1);
uint64_t d0=_wyread64(p), d1=_wyread64(p+len-8); i-=16; p+=16; goto start;
len=(len<8)*((8-len)<<3);
d0<<=len; d1>>=len;
seed=_wymix64(d0^seed^_wyp0);
see1=_wymix64(d1^see1^_wyp1);
return _wyrot32(_wymix64(len0^seed^see1))
^_wymix64(_wyp1^_wyrot32(seed)^see1);
} }
static inline uint64_t wyhash64(uint64_t A, uint64_t B){
static inline unsigned wyhash2(unsigned A, unsigned B){ _wymix128(_wyp0,_wyp1,&A,&B);
uint64_t c=(((uint64_t)A)<<32)|B; _wymix128(0,0,&A,&B);
c=_wymix64(_wymix64(c^_wyp0)); return A^B;
return (c>>32)^(unsigned)c;
} }
static inline uint64_t wyrand(uint64_t *seed){
static inline unsigned wyrand(uint64_t *seed){ *seed+=_wyp0;
*seed+=_wyp0; uint64_t a=0, b=0;
uint64_t x=_wymix64(*seed^_wyp1); _wymix128(*seed,*seed^_wyp1,&a,&b);
return (x>>32)^(unsigned)x; return a^b;
} }
static inline double wy2u01(uint64_t r) {
static inline float wy2u01(unsigned r){ const double _wynorm=1.0/(1ull<<52);
const float _wynorm=1.0f/(1ull<<23); return (r>>12)*_wynorm;
return (r>>9)*_wynorm;
} }
static inline double wy2gau(uint64_t r) {
static inline float wy2gau(unsigned r){ const double _wynorm=1.0/(1ull<<20);
const float _wynorm=1.0f/(1ull<<9); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;
return ((r&0x3ff)+((r>>10)&0x3ff)+((r>>20)&0x3ff))*_wynorm-3.0f;
} }
#endif #endif