map: make compilable with V2
parent
bd0548548a
commit
66639924a4
|
@ -10,26 +10,26 @@ import (
|
|||
)
|
||||
|
||||
/*
|
||||
This is a very fast hashmap implementation. It has several properties that in
|
||||
combination makes it very fast. Here is a short explanation of each property.
|
||||
This is a very fast hashmap implementation. It has several properties that in
|
||||
combination makes it very fast. Here is a short explanation of each property.
|
||||
After reading this you should have a basic understanding of how it works:
|
||||
|
||||
1. |Hash-function (Wyhash)|. Wyhash is the fastest hash-function passing SMHash-
|
||||
er, so it was an easy choice.
|
||||
|
||||
2. |Open addressing (Robin Hood Hashing)|. With this method a hash collision is
|
||||
2. |Open addressing (Robin Hood Hashing)|. With this method a hash collision is
|
||||
resolved by probing. As opposed to linear probing, Robin Hood hashing has a sim-
|
||||
ple but clever twist: As new keys are inserted, old keys are shifted around in a
|
||||
way such that all keys stay reasonably close to the slot they originally hash to.
|
||||
|
||||
3. |Memory layout|. Key-value pairs are stored in a `DenseArray`, with an avera-
|
||||
ge of roughly 6.25% unused memory, as opposed to most other dynamic array imple-
|
||||
mentations with a growth factor of 1.5 or 2. The key-values keep their index in
|
||||
the array - they are not probed. Instead, this implementation uses another array
|
||||
"metas" storing "metas" (meta-data). Each Key-value has a corresponding meta. A
|
||||
mentations with a growth factor of 1.5 or 2. The key-values keep their index in
|
||||
the array - they are not probed. Instead, this implementation uses another array
|
||||
"metas" storing "metas" (meta-data). Each Key-value has a corresponding meta. A
|
||||
meta stores a reference to its key-value, and its index in "metas" is determined
|
||||
by the hash of the key and probing. A meta also stores bits from the hash (for
|
||||
faster rehashing etc.) and how far away it is from the index it was originally
|
||||
by the hash of the key and probing. A meta also stores bits from the hash (for
|
||||
faster rehashing etc.) and how far away it is from the index it was originally
|
||||
hashed to (probe_count). probe_count is 0 if empty, 1 if not probed, 2 if probed
|
||||
by 1.
|
||||
|
||||
|
@ -37,22 +37,22 @@ meta (64 bit) = kv_index (32 bit) | probe_count (8 bits) | hashbits (24 bits)
|
|||
metas = [meta, 0, meta, 0, meta, meta, meta, 0, ...]
|
||||
key_values = [kv, kv, kv, kv, kv, ...]
|
||||
|
||||
4. |Power of two size array|. The size of metas is a power of two. This makes it
|
||||
possible to find a bucket from a hash code you can use hash & (SIZE -1) instead
|
||||
4. |Power of two size array|. The size of metas is a power of two. This makes it
|
||||
possible to find a bucket from a hash code you can use hash & (SIZE -1) instead
|
||||
of abs(hash) % SIZE. Modulo is extremely expensive so using '&' is a big perfor-
|
||||
mance improvement. The general concern with this is that you only use the lower
|
||||
bits of the hash and can cause many collisions. This is solved by using very go-
|
||||
od hash-function.
|
||||
od hash-function.
|
||||
|
||||
5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick
|
||||
5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick
|
||||
is to allocate extra metas > max(probe_count), so you never have to do any boun-
|
||||
ds-checking because the extra metas ensures that an element will never go beyond
|
||||
index the last index.
|
||||
ds-checking because the extra metas ensures that an element will never go beyond
|
||||
index the last index.
|
||||
|
||||
6. |Cached rehashing|. When the load_factor of the map exceeds the max_load_fac-
|
||||
tor the size of metas is doubled and all the elements need to be "rehashed" to
|
||||
find the index in the new array. Instead of rehashing complete, it simply uses
|
||||
the hashbits stored in the meta.
|
||||
find the index in the new array. Instead of rehashing complete, it simply uses
|
||||
the hashbits stored in the meta.
|
||||
*/
|
||||
|
||||
const (
|
||||
|
@ -66,7 +66,7 @@ const (
|
|||
init_capicity = 1 << init_log_capicity
|
||||
// Initial max load-factor
|
||||
init_max_load_factor = 0.8
|
||||
// Minimum Load-factor.
|
||||
// Minimum Load-factor.
|
||||
// Number is picked to make delete O(1) amortized
|
||||
min_load_factor = 0.3
|
||||
// Initial range cap
|
||||
|
@ -77,7 +77,7 @@ const (
|
|||
// Bitmask to select all the hashbits
|
||||
hash_mask = u32(0x00FFFFFF)
|
||||
// Used for incrementing the probe-count
|
||||
probe_inc = u32(0x01000000)
|
||||
probe_inc = u32(0x01000000)
|
||||
// Bitmask for maximum probe count
|
||||
max_probe = u32(0xFF000000)
|
||||
)
|
||||
|
@ -209,7 +209,7 @@ fn meta_less(metas &u32, i u64, m u32) (u64, u32){
|
|||
[inline]
|
||||
fn (m mut map) meta_greater(ms &u32, i u64, me u32, kvi u32) &u32 {
|
||||
mut metas := ms
|
||||
mut meta := me
|
||||
mut meta := me
|
||||
mut index := i
|
||||
mut kv_index := kvi
|
||||
for metas[index] != 0 {
|
||||
|
@ -230,13 +230,13 @@ fn (m mut map) meta_greater(ms &u32, i u64, me u32, kvi u32) &u32 {
|
|||
if (probe_count << 1) == m.extra_metas {
|
||||
m.extra_metas += extra_metas_inc
|
||||
mem_size := (m.cap + 2 + m.extra_metas)
|
||||
metas = &u32(realloc(metas, sizeof(u32) * mem_size))
|
||||
memset(metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc)
|
||||
metas = &u32(C.realloc(metas, sizeof(u32) * mem_size))
|
||||
C.memset(metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc)
|
||||
// Should almost never happen
|
||||
if probe_count == 252 {
|
||||
panic("Probe overflow")
|
||||
}
|
||||
}
|
||||
}
|
||||
return metas
|
||||
}
|
||||
|
||||
|
@ -259,7 +259,7 @@ fn (m mut map) set(key string, value voidptr) {
|
|||
}
|
||||
// Match not possible anymore
|
||||
kv := KeyValue{
|
||||
key: key
|
||||
key: key
|
||||
value: malloc(m.value_bytes)
|
||||
}
|
||||
C.memcpy(kv.value, value, m.value_bytes)
|
||||
|
@ -445,4 +445,4 @@ pub fn (m map_string) str() string {
|
|||
}
|
||||
sb.writeln('}')
|
||||
return sb.str()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue