map: make compilable with V2

pull/4072/head
Alexander Medvednikov 2020-03-19 07:05:20 +01:00
parent bd0548548a
commit 66639924a4
1 changed files with 24 additions and 24 deletions

View File

@ -10,26 +10,26 @@ import (
)
/*
This is a very fast hashmap implementation. It has several properties that in
combination makes it very fast. Here is a short explanation of each property.
This is a very fast hashmap implementation. It has several properties that in
combination makes it very fast. Here is a short explanation of each property.
After reading this you should have a basic understanding of how it works:
1. |Hash-function (Wyhash)|. Wyhash is the fastest hash-function passing SMHash-
er, so it was an easy choice.
2. |Open addressing (Robin Hood Hashing)|. With this method a hash collision is
2. |Open addressing (Robin Hood Hashing)|. With this method a hash collision is
resolved by probing. As opposed to linear probing, Robin Hood hashing has a sim-
ple but clever twist: As new keys are inserted, old keys are shifted around in a
way such that all keys stay reasonably close to the slot they originally hash to.
3. |Memory layout|. Key-value pairs are stored in a `DenseArray`, with an avera-
ge of roughly 6.25% unused memory, as opposed to most other dynamic array imple-
mentations with a growth factor of 1.5 or 2. The key-values keep their index in
the array - they are not probed. Instead, this implementation uses another array
"metas" storing "metas" (meta-data). Each Key-value has a corresponding meta. A
mentations with a growth factor of 1.5 or 2. The key-values keep their index in
the array - they are not probed. Instead, this implementation uses another array
"metas" storing "metas" (meta-data). Each Key-value has a corresponding meta. A
meta stores a reference to its key-value, and its index in "metas" is determined
by the hash of the key and probing. A meta also stores bits from the hash (for
faster rehashing etc.) and how far away it is from the index it was originally
by the hash of the key and probing. A meta also stores bits from the hash (for
faster rehashing etc.) and how far away it is from the index it was originally
hashed to (probe_count). probe_count is 0 if empty, 1 if not probed, 2 if probed
by 1.
@ -37,22 +37,22 @@ meta (64 bit) = kv_index (32 bit) | probe_count (8 bits) | hashbits (24 bits)
metas = [meta, 0, meta, 0, meta, meta, meta, 0, ...]
key_values = [kv, kv, kv, kv, kv, ...]
4. |Power of two size array|. The size of metas is a power of two. This makes it
possible to find a bucket from a hash code you can use hash & (SIZE -1) instead
4. |Power of two size array|. The size of metas is a power of two. This makes it
possible to find a bucket from a hash code you can use hash & (SIZE -1) instead
of abs(hash) % SIZE. Modulo is extremely expensive so using '&' is a big perfor-
mance improvement. The general concern with this is that you only use the lower
bits of the hash and can cause many collisions. This is solved by using very go-
od hash-function.
od hash-function.
5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick
5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick
is to allocate extra metas > max(probe_count), so you never have to do any boun-
ds-checking because the extra metas ensures that an element will never go beyond
index the last index.
ds-checking because the extra metas ensures that an element will never go beyond
index the last index.
6. |Cached rehashing|. When the load_factor of the map exceeds the max_load_fac-
tor the size of metas is doubled and all the elements need to be "rehashed" to
find the index in the new array. Instead of rehashing complete, it simply uses
the hashbits stored in the meta.
find the index in the new array. Instead of rehashing complete, it simply uses
the hashbits stored in the meta.
*/
const (
@ -66,7 +66,7 @@ const (
init_capicity = 1 << init_log_capicity
// Initial max load-factor
init_max_load_factor = 0.8
// Minimum Load-factor.
// Minimum Load-factor.
// Number is picked to make delete O(1) amortized
min_load_factor = 0.3
// Initial range cap
@ -77,7 +77,7 @@ const (
// Bitmask to select all the hashbits
hash_mask = u32(0x00FFFFFF)
// Used for incrementing the probe-count
probe_inc = u32(0x01000000)
probe_inc = u32(0x01000000)
// Bitmask for maximum probe count
max_probe = u32(0xFF000000)
)
@ -209,7 +209,7 @@ fn meta_less(metas &u32, i u64, m u32) (u64, u32){
[inline]
fn (m mut map) meta_greater(ms &u32, i u64, me u32, kvi u32) &u32 {
mut metas := ms
mut meta := me
mut meta := me
mut index := i
mut kv_index := kvi
for metas[index] != 0 {
@ -230,13 +230,13 @@ fn (m mut map) meta_greater(ms &u32, i u64, me u32, kvi u32) &u32 {
if (probe_count << 1) == m.extra_metas {
m.extra_metas += extra_metas_inc
mem_size := (m.cap + 2 + m.extra_metas)
metas = &u32(realloc(metas, sizeof(u32) * mem_size))
memset(metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc)
metas = &u32(C.realloc(metas, sizeof(u32) * mem_size))
C.memset(metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc)
// Should almost never happen
if probe_count == 252 {
panic("Probe overflow")
}
}
}
return metas
}
@ -259,7 +259,7 @@ fn (m mut map) set(key string, value voidptr) {
}
// Match not possible anymore
kv := KeyValue{
key: key
key: key
value: malloc(m.value_bytes)
}
C.memcpy(kv.value, value, m.value_bytes)
@ -445,4 +445,4 @@ pub fn (m map_string) str() string {
}
sb.writeln('}')
return sb.str()
}
}