map: fast_string_eq and improved comments
* improved comments and fast_string_eq * make it pass CI * enumerate traits * Add parameter back * remove space * remove parameter * Allow bootstrap compilation in one step with old vc (add new_map/2 shim).pull/4369/head^2
parent
79dad0bca9
commit
e247690fe1
|
@ -8,55 +8,59 @@ import (
|
||||||
hash.wyhash
|
hash.wyhash
|
||||||
)
|
)
|
||||||
|
|
||||||
fn C.strcmp(byteptr, byteptr) int
|
fn C.memcmp(byteptr, byteptr, int) int
|
||||||
|
|
||||||
/*
|
/*
|
||||||
This is a very fast hashmap implementation. It has several properties that in
|
This is a highly optimized hashmap implementation. It has several traits that
|
||||||
combination makes it very fast. Here is a short explanation of each property.
|
in combination makes it very fast and memory efficient. Here is a short expl-
|
||||||
After reading this you should have a basic understanding of how it works:
|
anation of each trait. After reading this you should have a basic understand-
|
||||||
|
ing of how it functions:
|
||||||
|
|
||||||
1. |Hash-function (Wyhash)|. Wyhash is the fastest hash-function passing SMHash-
|
1. Hash-function: Wyhash. Wyhash is the fastest hash-function for short keys
|
||||||
er, so it was an easy choice.
|
passing SMHasher, so it was an obvious choice.
|
||||||
|
|
||||||
2. |Open addressing (Robin Hood Hashing)|. With this method, a hash collision is
|
2. Open addressing: Robin Hood Hashing. With this method, a hash-collision is
|
||||||
resolved by probing. As opposed to linear probing, Robin Hood hashing has a sim-
|
resolved by probing. As opposed to linear probing, Robin Hood hashing has a
|
||||||
ple but clever twist: As new keys are inserted, old keys are shifted around in a
|
simple but clever twist: As new keys are inserted, old keys are shifted arou-
|
||||||
way such that all keys stay reasonably close to the slot they originally hash to.
|
nd in a way such that all keys stay reasonably close to the slot they origin-
|
||||||
|
ally hash to. A new key may displace a key already inserted if its probe cou-
|
||||||
|
nt is larger than that of the key at the current position.
|
||||||
|
|
||||||
3. |Memory layout|. Key-value pairs are stored in a `DenseArray`, with an avera-
|
3. Memory layout: key-value pairs are stored in a `DenseArray`. This is a dy-
|
||||||
ge of roughly 6.25% unused memory, as opposed to most other dynamic array imple-
|
namic array with a very low volume of unused memory, at the cost of more rea-
|
||||||
mentations with a growth factor of 1.5 or 2. The key-values keep their index in
|
llocations when inserting elements. It also preserves the order of the key-v-
|
||||||
the array - they are not probed. Instead, this implementation uses another array
|
alues. This array is named `key_values`. Instead of probing a new key-value,
|
||||||
"metas" storing "meta"s (meta-data). Each Key-value has a corresponding meta. A
|
this map probes two 32-bit numbers collectively. The first number has its 8
|
||||||
meta stores a reference to its key-value, and its index in "metas" is determined
|
most significant bits reserved for the probe-count and the remaining 24 bits
|
||||||
by the hash of the key and probing. A meta also stores bits from the hash (for
|
are cached bits from the hash which are utilized for faster re-hashing. This
|
||||||
faster rehashing etc.) and how far away it is from the index it was originally
|
number is often referred to as `meta`. The other 32-bit number is the index
|
||||||
hashed to (probe_count). probe_count is 0 if empty, 1 if not probed, 2 if probed
|
at which the key-value was pushed to in `key_values`. Both of these numbers
|
||||||
by 1, etc..
|
are stored in a sparse array `metas`. The `meta`s and `kv_index`s are stored
|
||||||
|
at even and odd indices, respectively:
|
||||||
|
|
||||||
meta (64 bit) = kv_index (32 bit) | probe_count (8 bits) | hashbits (24 bits)
|
metas = [meta, kv_index, 0, 0, meta, kv_index, 0, 0, meta, kv_index, ...]
|
||||||
metas = [meta, 0, meta, 0, meta, meta, meta, 0, ...]
|
key_values = [kv, kv, kv, ...]
|
||||||
key_values = [kv, kv, kv, kv, kv, ...]
|
|
||||||
|
|
||||||
4. |Power of two size array|. The size of metas is a power of two. This makes it
|
4. The size of metas is a power of two. This enables the use of bitwise AND
|
||||||
possible to find a bucket from a hash code by using "hash & (SIZE -1)" instead
|
to convert the 64-bit hash to a bucket/index that doesn't overflow metas. If
|
||||||
of "abs(hash) % SIZE". Modulo is extremely expensive so using '&' is a big perf-
|
the size is power of two you can use "hash & (SIZE - 1)" instead of "hash %
|
||||||
ormance improvement. The general concern with this is that you only use the low-
|
SIZE". Modulo is extremely expensive so using '&' is a big performance impro-
|
||||||
er bits of the hash and that can cause more collisions. This is solved by using
|
vement. The general concern with this approach is that you only make use of
|
||||||
good hash-function.
|
the lower bits of the hash which can cause more collisions. This is solved by
|
||||||
|
using a well-dispersed hash-function.
|
||||||
|
|
||||||
5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick
|
5. The hashmap keeps track of the highest probe_count. The trick is to alloc-
|
||||||
is to allocate extra_metas > max(probe_count), so you never have to do any boun-
|
ate `extra_metas` > max(probe_count), so you never have to do any bounds-che-
|
||||||
ds-checking because the extra metas ensures that an element will never go beyond
|
cking since the extra meta memory ensures that a meta will never go beyond
|
||||||
the last index.
|
the last index.
|
||||||
|
|
||||||
6. |Cached rehashing|. When the load_factor of the map exceeds the max_load_fac-
|
6. Cached rehashing. When the `load_factor` of the map exceeds the `max_load_
|
||||||
tor the size of metas is doubled and all the elements need to be "rehashed" to
|
factor` the size of metas is doubled and all the key-values are "rehashed" to
|
||||||
find the index in the new array. Instead of rehashing completely, it simply uses
|
find the index for their meta's in the new array. Instead of rehashing compl-
|
||||||
the hashbits stored in the meta.
|
etely, it simply uses the cached-hashbits stored in the meta, resulting in
|
||||||
|
much faster rehashing.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// Number of bits from the hash stored for each entry
|
// Number of bits from the hash stored for each entry
|
||||||
hashbits = 24
|
hashbits = 24
|
||||||
|
@ -79,6 +83,17 @@ const (
|
||||||
probe_inc = u32(0x01000000)
|
probe_inc = u32(0x01000000)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// This function is intended to be fast when
|
||||||
|
// the strings are very likely to be equal
|
||||||
|
// TODO: add branch prediction hints
|
||||||
|
[inline]
|
||||||
|
fn fast_string_eq(a, b string) bool {
|
||||||
|
if a.len != b.len {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return C.memcmp(a.str, b.str, b.len) == 0
|
||||||
|
}
|
||||||
|
|
||||||
struct KeyValue {
|
struct KeyValue {
|
||||||
key string
|
key string
|
||||||
mut:
|
mut:
|
||||||
|
@ -107,7 +122,7 @@ fn new_dense_array() DenseArray {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Push element to array and return index
|
// Push element to array and return index
|
||||||
// The growth-factor is roughly 12.5 `(x + (x >> 3))`
|
// The growth-factor is roughly 1.125 `(x + (x >> 3))`
|
||||||
[inline]
|
[inline]
|
||||||
fn (d mut DenseArray) push(kv KeyValue) u32 {
|
fn (d mut DenseArray) push(kv KeyValue) u32 {
|
||||||
if d.cap == d.size {
|
if d.cap == d.size {
|
||||||
|
@ -142,7 +157,7 @@ pub struct map {
|
||||||
// Byte size of value
|
// Byte size of value
|
||||||
value_bytes int
|
value_bytes int
|
||||||
mut:
|
mut:
|
||||||
// highest even index in the hashtable
|
// highest even index in the hashtable
|
||||||
cap u32
|
cap u32
|
||||||
// Number of cached hashbits left for rehasing
|
// Number of cached hashbits left for rehasing
|
||||||
cached_hashbits byte
|
cached_hashbits byte
|
||||||
|
@ -151,18 +166,22 @@ mut:
|
||||||
// Array storing key-values (ordered)
|
// Array storing key-values (ordered)
|
||||||
key_values DenseArray
|
key_values DenseArray
|
||||||
// Pointer to meta-data:
|
// Pointer to meta-data:
|
||||||
// Odd indices stores index in `key_values`.
|
// Odd indices store kv_index.
|
||||||
// Even indices stores probe_count and hashbits.
|
// Even indices store probe_count and hashbits.
|
||||||
metas &u32
|
metas &u32
|
||||||
// Extra metas that allows for no ranging when incrementing
|
// Extra metas that allows for no ranging when incrementing
|
||||||
// index in the hashmap
|
// index in the hashmap
|
||||||
extra_metas u32
|
extra_metas u32
|
||||||
pub mut:
|
pub mut:
|
||||||
// Number of key-values currently in the hashmap
|
// Number of key-values currently in the hashmap
|
||||||
size int
|
size int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: remove this after vc is regenerated.
|
||||||
fn new_map(n, value_bytes int) map {
|
fn new_map(n, value_bytes int) map {
|
||||||
|
return new_map_1(value_bytes)
|
||||||
|
}
|
||||||
|
fn new_map_1(value_bytes int) map {
|
||||||
return map{
|
return map{
|
||||||
value_bytes: value_bytes
|
value_bytes: value_bytes
|
||||||
cap: init_cap
|
cap: init_cap
|
||||||
|
@ -176,7 +195,7 @@ fn new_map(n, value_bytes int) map {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_map_init(n, value_bytes int, keys &string, values voidptr) map {
|
fn new_map_init(n, value_bytes int, keys &string, values voidptr) map {
|
||||||
mut out := new_map(n, value_bytes)
|
mut out := new_map_1(value_bytes)
|
||||||
for i in 0 .. n {
|
for i in 0 .. n {
|
||||||
out.set(keys[i], byteptr(values) + i * value_bytes)
|
out.set(keys[i], byteptr(values) + i * value_bytes)
|
||||||
}
|
}
|
||||||
|
@ -244,7 +263,7 @@ fn (m mut map) set(key string, value voidptr) {
|
||||||
// While we might have a match
|
// While we might have a match
|
||||||
for meta == m.metas[index] {
|
for meta == m.metas[index] {
|
||||||
kv_index := m.metas[index + 1]
|
kv_index := m.metas[index + 1]
|
||||||
if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
|
if fast_string_eq(key, m.key_values.data[kv_index].key) {
|
||||||
C.memcpy(m.key_values.data[kv_index].value, value, m.value_bytes)
|
C.memcpy(m.key_values.data[kv_index].value, value, m.value_bytes)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -320,7 +339,7 @@ fn (m map) get3(key string, zero voidptr) voidptr {
|
||||||
index,meta = m.meta_less(index, meta)
|
index,meta = m.meta_less(index, meta)
|
||||||
for meta == m.metas[index] {
|
for meta == m.metas[index] {
|
||||||
kv_index := m.metas[index + 1]
|
kv_index := m.metas[index + 1]
|
||||||
if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
|
if fast_string_eq(key, m.key_values.data[kv_index].key) {
|
||||||
out := malloc(m.value_bytes)
|
out := malloc(m.value_bytes)
|
||||||
C.memcpy(out, m.key_values.data[kv_index].value, m.value_bytes)
|
C.memcpy(out, m.key_values.data[kv_index].value, m.value_bytes)
|
||||||
return out
|
return out
|
||||||
|
@ -332,14 +351,11 @@ fn (m map) get3(key string, zero voidptr) voidptr {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (m map) exists(key string) bool {
|
fn (m map) exists(key string) bool {
|
||||||
if m.value_bytes == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
mut index,mut meta := m.key_to_index(key)
|
mut index,mut meta := m.key_to_index(key)
|
||||||
index,meta = m.meta_less(index, meta)
|
index,meta = m.meta_less(index, meta)
|
||||||
for meta == m.metas[index] {
|
for meta == m.metas[index] {
|
||||||
kv_index := m.metas[index + 1]
|
kv_index := m.metas[index + 1]
|
||||||
if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
|
if fast_string_eq(key, m.key_values.data[kv_index].key) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
index += 2
|
index += 2
|
||||||
|
@ -354,7 +370,7 @@ pub fn (m mut map) delete(key string) {
|
||||||
// Perform backwards shifting
|
// Perform backwards shifting
|
||||||
for meta == m.metas[index] {
|
for meta == m.metas[index] {
|
||||||
kv_index := m.metas[index + 1]
|
kv_index := m.metas[index + 1]
|
||||||
if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 {
|
if fast_string_eq(key, m.key_values.data[kv_index].key) {
|
||||||
for (m.metas[index + 2]>>hashbits) > 1 {
|
for (m.metas[index + 2]>>hashbits) > 1 {
|
||||||
m.metas[index] = m.metas[index + 2] - probe_inc
|
m.metas[index] = m.metas[index + 2] - probe_inc
|
||||||
m.metas[index + 1] = m.metas[index + 3]
|
m.metas[index + 1] = m.metas[index + 3]
|
||||||
|
@ -380,11 +396,9 @@ pub fn (m mut map) delete(key string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: add optimization in case of no deletes
|
||||||
pub fn (m &map) keys() []string {
|
pub fn (m &map) keys() []string {
|
||||||
mut keys := [''].repeat(m.size)
|
mut keys := [''].repeat(m.size)
|
||||||
if m.value_bytes == 0 {
|
|
||||||
return keys
|
|
||||||
}
|
|
||||||
mut j := 0
|
mut j := 0
|
||||||
for i := u32(0); i < m.key_values.size; i++ {
|
for i := u32(0); i < m.key_values.size; i++ {
|
||||||
if m.key_values.data[i].key.str == 0 {
|
if m.key_values.data[i].key.str == 0 {
|
||||||
|
@ -408,10 +422,6 @@ pub fn (m map) free() {
|
||||||
free(m.key_values.data)
|
free(m.key_values.data)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn (m map) print() {
|
|
||||||
println('TODO')
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn (m map_string) str() string {
|
pub fn (m map_string) str() string {
|
||||||
if m.size == 0 {
|
if m.size == 0 {
|
||||||
return '{}'
|
return '{}'
|
||||||
|
|
|
@ -1127,7 +1127,7 @@ fn (g mut Gen) expr(node ast.Expr) {
|
||||||
}
|
}
|
||||||
g.write('})')
|
g.write('})')
|
||||||
} else {
|
} else {
|
||||||
g.write('new_map(1, sizeof($value_typ_str))')
|
g.write('new_map_1(sizeof($value_typ_str))')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ast.None {
|
ast.None {
|
||||||
|
@ -2902,7 +2902,7 @@ fn (g Gen) type_default(typ table.Type) string {
|
||||||
}
|
}
|
||||||
if sym.kind == .map {
|
if sym.kind == .map {
|
||||||
value_type_str := g.typ(sym.map_info().value_type)
|
value_type_str := g.typ(sym.map_info().value_type)
|
||||||
return 'new_map(1, sizeof($value_type_str))'
|
return 'new_map_1(sizeof($value_type_str))'
|
||||||
}
|
}
|
||||||
// Always set pointers to 0
|
// Always set pointers to 0
|
||||||
if table.type_is_ptr(typ) {
|
if table.type_is_ptr(typ) {
|
||||||
|
|
|
@ -60,7 +60,7 @@ int main(int argc, char** argv) {
|
||||||
});
|
});
|
||||||
Foo af_idx_el = (*(Foo*)array_get(arr_foo, 0));
|
Foo af_idx_el = (*(Foo*)array_get(arr_foo, 0));
|
||||||
string foo_a = af_idx_el.a;
|
string foo_a = af_idx_el.a;
|
||||||
map_string_string m1 = new_map(1, sizeof(string));
|
map_string_string m1 = new_map(sizeof(string));
|
||||||
map_string_int m2 = new_map_init(2, sizeof(int), (string[2]){tos3("v"), tos3("lang"), }, (int[2]){1, 2, });
|
map_string_int m2 = new_map_init(2, sizeof(int), (string[2]){tos3("v"), tos3("lang"), }, (int[2]){1, 2, });
|
||||||
string ma1 = tos3("hello");
|
string ma1 = tos3("hello");
|
||||||
string ma2 = tos3("vlang");
|
string ma2 = tos3("vlang");
|
||||||
|
|
Loading…
Reference in New Issue