From 8a97fb890fe6d0f7c5ec1cf2a5ca48495a9fe7b0 Mon Sep 17 00:00:00 2001 From: ka-weihe Date: Wed, 24 Mar 2021 16:57:38 +0100 Subject: [PATCH] wyhash: update to final_version_3 (part 1) (#9451) --- vlib/v/gen/c/cheaders.v | 260 ++++++++++++++++++++++++++-------------- 1 file changed, 167 insertions(+), 93 deletions(-) diff --git a/vlib/v/gen/c/cheaders.v b/vlib/v/gen/c/cheaders.v index a8661675b0..ab0c70c9bf 100644 --- a/vlib/v/gen/c/cheaders.v +++ b/vlib/v/gen/c/cheaders.v @@ -345,113 +345,187 @@ void _vcleanup(); #define _ARR_LEN(a) ( (sizeof(a)) / (sizeof(a[0])) ) // ============== wyhash ============== -//Author: Wang Yi -#ifndef wyhash_version_gamma - #define wyhash_version_gamma - #define WYHASH_CONDOM 0 - #include - #include - #if defined(_MSC_VER) && defined(_M_X64) - #include - #pragma intrinsic(_umul128) - #endif +#ifndef wyhash_final_version_3 +#define wyhash_final_version_3 - //const uint64_t _wyp0=0xa0761d6478bd642full, _wyp1=0xe7037ed1a0b428dbull; - #define _wyp0 ((uint64_t)0xa0761d6478bd642full) - #define _wyp1 ((uint64_t)0xe7037ed1a0b428dbull) +#ifndef WYHASH_CONDOM +//protections that produce different results: +//1: normal valid behavior +//2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication" +#define WYHASH_CONDOM 1 +#endif - #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) || defined(__TINYC__) - #define _likely_(x) __builtin_expect(x, 1) - #define _unlikely_(x) __builtin_expect((x), 0) +#ifndef WYHASH_32BIT_MUM +//0: normal version, slow on 32 bit systems +//1: faster on 32 bit systems but produces different results, incompatible with wy2u0k function +#define WYHASH_32BIT_MUM 0 +#endif + +//includes +#include +#include +#if defined(_MSC_VER) && defined(_M_X64) + #include + #pragma intrinsic(_umul128) +#endif + +//likely and unlikely macros +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + #define _likely_(x) __builtin_expect(x,1) + #define _unlikely_(x) __builtin_expect(x,0) +#else + #define _likely_(x) (x) + #define _unlikely_(x) (x) +#endif + +//128bit multiply function +static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); } +static inline void _wymum(uint64_t *A, uint64_t *B){ +#if(WYHASH_32BIT_MUM) + uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(uint32_t)*B, lh=(uint32_t)*A*(*B>>32), ll=(uint64_t)(uint32_t)*A*(uint32_t)*B; + #if(WYHASH_CONDOM>1) + *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll; #else - #define _likely_(x) (x) - #define _unlikely_(x) (x) + *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll; #endif +#elif defined(__SIZEOF_INT128__) + __uint128_t r=*A; r*=*B; + #if(WYHASH_CONDOM>1) + *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); + #else + *A=(uint64_t)r; *B=(uint64_t)(r>>64); + #endif +#elif defined(_MSC_VER) && defined(_M_X64) + #if(WYHASH_CONDOM>1) + uint64_t a, b; + a=_umul128(*A,*B,&b); + *A^=a; *B^=b; + #else + *A=_umul128(*A,*B,B); + #endif +#else + uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; + uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; + #if(WYHASH_CONDOM>1) + *A^=lo; *B^=hi; + #else + *A=lo; *B=hi; + #endif +#endif +} - #if defined(TARGET_ORDER_IS_LITTLE) +//multiply and xor mix function, aka MUM +static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; } + +//endian macros +#ifndef WYHASH_LITTLE_ENDIAN + #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #define WYHASH_LITTLE_ENDIAN 1 - #elif defined(TARGET_ORDER_IS_BIG) + #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #define WYHASH_LITTLE_ENDIAN 0 - #endif - - #if (WYHASH_LITTLE_ENDIAN) - static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;} - static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;} #else - #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) - static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);} - static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);} - #elif defined(_MSC_VER) - static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);} - static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);} - #elif defined(__TINYC__) - static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return bswap_64(v);} - static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return bswap_32(v);} - #endif + #warning could not determine endianness! Falling back to little endian. + #define WYHASH_LITTLE_ENDIAN 1 #endif +#endif - static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];} - static inline uint64_t _wyrotr(uint64_t v, unsigned k) { return (v >> k) | (v << (64 - k));} - static inline void _wymix128(uint64_t A, uint64_t B, uint64_t *C, uint64_t *D){ - A^=*C; B^=*D; - #ifdef UNOFFICIAL_WYHASH_32BIT - uint64_t hh=(A>>32)*(B>>32), hl=(A>>32)*(unsigned)B, lh=(unsigned)A*(B>>32), ll=(uint64_t)(unsigned)A*(unsigned)B; - *C=_wyrotr(hl,32)^hh; *D=_wyrotr(lh,32)^ll; - #else - #ifdef __SIZEOF_INT128__ - __uint128_t r=A; r*=B; *C=(uint64_t)r; *D=(uint64_t)(r>>64); - #elif defined(_MSC_VER) && defined(_M_X64) - A=_umul128(A,B,&B); *C=A; *D=B; - #else - uint64_t ha=A>>32, hb=B>>32, la=(uint32_t)A, lb=(uint32_t)B, hi, lo; - uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; - *C=lo; *D=hi; - #endif - #endif +//read functions +#if (WYHASH_LITTLE_ENDIAN) +static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;} +static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;} +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) +static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);} +static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return __builtin_bswap32(v);} +#elif defined(_MSC_VER) +static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);} +static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return _byteswap_ulong(v);} +#else +static inline uint64_t _wyr8(const uint8_t *p) { + uint64_t v; memcpy(&v, p, 8); + return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000)); +} +static inline uint64_t _wyr4(const uint8_t *p) { + uint32_t v; memcpy(&v, p, 4); + return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000)); +} +#endif +static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} + +//wyhash main function +static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){ + const uint8_t *p=(const uint8_t *)key; seed^=*secret; uint64_t a, b; + if(_likely_(len<=16)){ + if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); } + else if(_likely_(len>0)){ a=_wyr3(p,len); b=0;} + else a=b=0; } - static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed){ - const uint8_t *p=(const uint8_t *)key; - uint64_t i=len, see1=seed; - start: - if (_likely_(i<=16)) { - #ifndef WYHASH_CONDOM - uint64_t shift = (i<8)*((8-i)<<3); - //WARNING: intended reading outside buffer, trading for speed. - _wymix128((_wyr8(p)<>shift)^_wyp1, &seed, &see1); - #else - if (_likely_(i<=8)) { - if (_likely_(i>=4)) _wymix128(_wyr4(p)^_wyp0,_wyr4(p+i-4)^_wyp1, &seed, &see1); - else if (_likely_(i)) _wymix128(_wyr3(p,i)^_wyp0,_wyp1, &seed, &see1); - else _wymix128(_wyp0,_wyp1, &seed, &see1); - } - else _wymix128(_wyr8(p)^_wyp0,_wyr8(p+i-8)^_wyp1, &seed, &see1); - #endif - _wymix128(len,_wyp0, &seed, &see1); - return seed^see1; + else{ + size_t i=len; + if(_unlikely_(i>48)){ + uint64_t see1=seed, see2=seed; + do{ + seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); + see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1); + see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2); + p+=48; i-=48; + }while(_likely_(i>48)); + seed^=see1^see2; } - _wymix128(_wyr8(p)^_wyp0,_wyr8(p+8)^_wyp1, &seed, &see1); - i-=16; p+=16; goto start; + while(_unlikely_(i>16)){ seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); i-=16; p+=16; } + a=_wyr8(p+i-16); b=_wyr8(p+i-8); } - static inline uint64_t wyhash64(uint64_t A, uint64_t B){ - _wymix128(_wyp0,_wyp1,&A,&B); - _wymix128(0,0,&A,&B); - return A^B; - } - static inline uint64_t wyrand(uint64_t *seed){ - *seed+=_wyp0; - uint64_t a=0, b=0; - _wymix128(*seed,*seed^_wyp1,&a,&b); - return a^b; - } - static inline double wy2u01(uint64_t r) { - const double _wynorm=1.0/(1ull<<52); - return (r>>12)*_wynorm; - } - static inline double wy2gau(uint64_t r) { - const double _wynorm=1.0/(1ull<<20); - return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0; + return _wymix(secret[1]^len,_wymix(a^secret[1],b^seed)); +} +//the default secret parameters +static const uint64_t _wyp[4] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull}; + +//a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand +static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);} + +//The wyrand PRNG that pass BigCrush and PractRand +static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);} + +//convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash. +static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;} + +//convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution. It can be combined with wyrand, wyhash64 or wyhash. +static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;} + +#if(!WYHASH_32BIT_MUM) +//fast range integer random number generation on [0,k) credit to Daniel Lemire. May not work when WYHASH_32BIT_MUM=1. It can be combined with wyrand, wyhash64 or wyhash. +static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; } +#endif + +//make your own secret +static inline void make_secret(uint64_t seed, uint64_t *secret){ + uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 }; + for(size_t i=0;i<4;i++){ + uint8_t ok; + do{ + ok=1; secret[i]=0; + for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])<> 1) & 0x5555555555555555; + x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); + x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f; + x = (x * 0x0101010101010101) >> 56; + if(x!=32){ ok=0; break; } +#endif + } + if(!ok)continue; + for(uint64_t j=3;j<0x100000000ull;j+=2) if(secret[i]%j==0){ ok=0; break; } + }while(!ok); } +} #endif void v_free(voidptr ptr);