From e247690fe1e074e54f0354a73f9679a34ffd842e Mon Sep 17 00:00:00 2001 From: ka-weihe Date: Sun, 12 Apr 2020 14:10:06 +0200 Subject: [PATCH] map: fast_string_eq and improved comments * improved comments and fast_string_eq * make it pass CI * enumerate traits * Add parameter back * remove space * remove parameter * Allow bootstrap compilation in one step with old vc (add new_map/2 shim). --- vlib/builtin/map.v | 124 +++++++++++++++++++++++-------------------- vlib/v/gen/cgen.v | 4 +- vlib/v/gen/tests/4.c | 2 +- 3 files changed, 70 insertions(+), 60 deletions(-) diff --git a/vlib/builtin/map.v b/vlib/builtin/map.v index a897e1f8bb..670f295178 100644 --- a/vlib/builtin/map.v +++ b/vlib/builtin/map.v @@ -8,55 +8,59 @@ import ( hash.wyhash ) -fn C.strcmp(byteptr, byteptr) int +fn C.memcmp(byteptr, byteptr, int) int /* -This is a very fast hashmap implementation. It has several properties that in -combination makes it very fast. Here is a short explanation of each property. -After reading this you should have a basic understanding of how it works: +This is a highly optimized hashmap implementation. It has several traits that +in combination makes it very fast and memory efficient. Here is a short expl- +anation of each trait. After reading this you should have a basic understand- +ing of how it functions: -1. |Hash-function (Wyhash)|. Wyhash is the fastest hash-function passing SMHash- -er, so it was an easy choice. +1. Hash-function: Wyhash. Wyhash is the fastest hash-function for short keys +passing SMHasher, so it was an obvious choice. -2. |Open addressing (Robin Hood Hashing)|. With this method, a hash collision is -resolved by probing. As opposed to linear probing, Robin Hood hashing has a sim- -ple but clever twist: As new keys are inserted, old keys are shifted around in a -way such that all keys stay reasonably close to the slot they originally hash to. +2. Open addressing: Robin Hood Hashing. With this method, a hash-collision is +resolved by probing. As opposed to linear probing, Robin Hood hashing has a +simple but clever twist: As new keys are inserted, old keys are shifted arou- +nd in a way such that all keys stay reasonably close to the slot they origin- +ally hash to. A new key may displace a key already inserted if its probe cou- +nt is larger than that of the key at the current position. -3. |Memory layout|. Key-value pairs are stored in a `DenseArray`, with an avera- -ge of roughly 6.25% unused memory, as opposed to most other dynamic array imple- -mentations with a growth factor of 1.5 or 2. The key-values keep their index in -the array - they are not probed. Instead, this implementation uses another array -"metas" storing "meta"s (meta-data). Each Key-value has a corresponding meta. A -meta stores a reference to its key-value, and its index in "metas" is determined -by the hash of the key and probing. A meta also stores bits from the hash (for -faster rehashing etc.) and how far away it is from the index it was originally -hashed to (probe_count). probe_count is 0 if empty, 1 if not probed, 2 if probed -by 1, etc.. +3. Memory layout: key-value pairs are stored in a `DenseArray`. This is a dy- +namic array with a very low volume of unused memory, at the cost of more rea- +llocations when inserting elements. It also preserves the order of the key-v- +alues. This array is named `key_values`. Instead of probing a new key-value, +this map probes two 32-bit numbers collectively. The first number has its 8 +most significant bits reserved for the probe-count and the remaining 24 bits +are cached bits from the hash which are utilized for faster re-hashing. This +number is often referred to as `meta`. The other 32-bit number is the index +at which the key-value was pushed to in `key_values`. Both of these numbers +are stored in a sparse array `metas`. The `meta`s and `kv_index`s are stored +at even and odd indices, respectively: -meta (64 bit) = kv_index (32 bit) | probe_count (8 bits) | hashbits (24 bits) -metas = [meta, 0, meta, 0, meta, meta, meta, 0, ...] -key_values = [kv, kv, kv, kv, kv, ...] +metas = [meta, kv_index, 0, 0, meta, kv_index, 0, 0, meta, kv_index, ...] +key_values = [kv, kv, kv, ...] -4. |Power of two size array|. The size of metas is a power of two. This makes it -possible to find a bucket from a hash code by using "hash & (SIZE -1)" instead -of "abs(hash) % SIZE". Modulo is extremely expensive so using '&' is a big perf- -ormance improvement. The general concern with this is that you only use the low- -er bits of the hash and that can cause more collisions. This is solved by using -good hash-function. +4. The size of metas is a power of two. This enables the use of bitwise AND +to convert the 64-bit hash to a bucket/index that doesn't overflow metas. If +the size is power of two you can use "hash & (SIZE - 1)" instead of "hash % +SIZE". Modulo is extremely expensive so using '&' is a big performance impro- +vement. The general concern with this approach is that you only make use of +the lower bits of the hash which can cause more collisions. This is solved by +using a well-dispersed hash-function. -5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick -is to allocate extra_metas > max(probe_count), so you never have to do any boun- -ds-checking because the extra metas ensures that an element will never go beyond +5. The hashmap keeps track of the highest probe_count. The trick is to alloc- +ate `extra_metas` > max(probe_count), so you never have to do any bounds-che- +cking since the extra meta memory ensures that a meta will never go beyond the last index. -6. |Cached rehashing|. When the load_factor of the map exceeds the max_load_fac- -tor the size of metas is doubled and all the elements need to be "rehashed" to -find the index in the new array. Instead of rehashing completely, it simply uses -the hashbits stored in the meta. +6. Cached rehashing. When the `load_factor` of the map exceeds the `max_load_ +factor` the size of metas is doubled and all the key-values are "rehashed" to +find the index for their meta's in the new array. Instead of rehashing compl- +etely, it simply uses the cached-hashbits stored in the meta, resulting in +much faster rehashing. */ - const ( // Number of bits from the hash stored for each entry hashbits = 24 @@ -79,6 +83,17 @@ const ( probe_inc = u32(0x01000000) ) +// This function is intended to be fast when +// the strings are very likely to be equal +// TODO: add branch prediction hints +[inline] +fn fast_string_eq(a, b string) bool { + if a.len != b.len { + return false + } + return C.memcmp(a.str, b.str, b.len) == 0 +} + struct KeyValue { key string mut: @@ -107,7 +122,7 @@ fn new_dense_array() DenseArray { } // Push element to array and return index -// The growth-factor is roughly 12.5 `(x + (x >> 3))` +// The growth-factor is roughly 1.125 `(x + (x >> 3))` [inline] fn (d mut DenseArray) push(kv KeyValue) u32 { if d.cap == d.size { @@ -142,7 +157,7 @@ pub struct map { // Byte size of value value_bytes int mut: -// highest even index in the hashtable + // highest even index in the hashtable cap u32 // Number of cached hashbits left for rehasing cached_hashbits byte @@ -151,18 +166,22 @@ mut: // Array storing key-values (ordered) key_values DenseArray // Pointer to meta-data: - // Odd indices stores index in `key_values`. - // Even indices stores probe_count and hashbits. + // Odd indices store kv_index. + // Even indices store probe_count and hashbits. metas &u32 // Extra metas that allows for no ranging when incrementing // index in the hashmap extra_metas u32 pub mut: -// Number of key-values currently in the hashmap + // Number of key-values currently in the hashmap size int } +// TODO: remove this after vc is regenerated. fn new_map(n, value_bytes int) map { + return new_map_1(value_bytes) +} +fn new_map_1(value_bytes int) map { return map{ value_bytes: value_bytes cap: init_cap @@ -176,7 +195,7 @@ fn new_map(n, value_bytes int) map { } fn new_map_init(n, value_bytes int, keys &string, values voidptr) map { - mut out := new_map(n, value_bytes) + mut out := new_map_1(value_bytes) for i in 0 .. n { out.set(keys[i], byteptr(values) + i * value_bytes) } @@ -244,7 +263,7 @@ fn (m mut map) set(key string, value voidptr) { // While we might have a match for meta == m.metas[index] { kv_index := m.metas[index + 1] - if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 { + if fast_string_eq(key, m.key_values.data[kv_index].key) { C.memcpy(m.key_values.data[kv_index].value, value, m.value_bytes) return } @@ -320,7 +339,7 @@ fn (m map) get3(key string, zero voidptr) voidptr { index,meta = m.meta_less(index, meta) for meta == m.metas[index] { kv_index := m.metas[index + 1] - if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 { + if fast_string_eq(key, m.key_values.data[kv_index].key) { out := malloc(m.value_bytes) C.memcpy(out, m.key_values.data[kv_index].value, m.value_bytes) return out @@ -332,14 +351,11 @@ fn (m map) get3(key string, zero voidptr) voidptr { } fn (m map) exists(key string) bool { - if m.value_bytes == 0 { - return false - } mut index,mut meta := m.key_to_index(key) index,meta = m.meta_less(index, meta) for meta == m.metas[index] { kv_index := m.metas[index + 1] - if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 { + if fast_string_eq(key, m.key_values.data[kv_index].key) { return true } index += 2 @@ -354,7 +370,7 @@ pub fn (m mut map) delete(key string) { // Perform backwards shifting for meta == m.metas[index] { kv_index := m.metas[index + 1] - if C.strcmp(key.str, m.key_values.data[kv_index].key.str) == 0 { + if fast_string_eq(key, m.key_values.data[kv_index].key) { for (m.metas[index + 2]>>hashbits) > 1 { m.metas[index] = m.metas[index + 2] - probe_inc m.metas[index + 1] = m.metas[index + 3] @@ -380,11 +396,9 @@ pub fn (m mut map) delete(key string) { } } +// TODO: add optimization in case of no deletes pub fn (m &map) keys() []string { mut keys := [''].repeat(m.size) - if m.value_bytes == 0 { - return keys - } mut j := 0 for i := u32(0); i < m.key_values.size; i++ { if m.key_values.data[i].key.str == 0 { @@ -408,10 +422,6 @@ pub fn (m map) free() { free(m.key_values.data) } -pub fn (m map) print() { - println('TODO') -} - pub fn (m map_string) str() string { if m.size == 0 { return '{}' diff --git a/vlib/v/gen/cgen.v b/vlib/v/gen/cgen.v index 9ab5c40ba0..0696f612bb 100644 --- a/vlib/v/gen/cgen.v +++ b/vlib/v/gen/cgen.v @@ -1127,7 +1127,7 @@ fn (g mut Gen) expr(node ast.Expr) { } g.write('})') } else { - g.write('new_map(1, sizeof($value_typ_str))') + g.write('new_map_1(sizeof($value_typ_str))') } } ast.None { @@ -2902,7 +2902,7 @@ fn (g Gen) type_default(typ table.Type) string { } if sym.kind == .map { value_type_str := g.typ(sym.map_info().value_type) - return 'new_map(1, sizeof($value_type_str))' + return 'new_map_1(sizeof($value_type_str))' } // Always set pointers to 0 if table.type_is_ptr(typ) { diff --git a/vlib/v/gen/tests/4.c b/vlib/v/gen/tests/4.c index 7b38e36b3e..6f86fe4b77 100644 --- a/vlib/v/gen/tests/4.c +++ b/vlib/v/gen/tests/4.c @@ -60,7 +60,7 @@ int main(int argc, char** argv) { }); Foo af_idx_el = (*(Foo*)array_get(arr_foo, 0)); string foo_a = af_idx_el.a; - map_string_string m1 = new_map(1, sizeof(string)); + map_string_string m1 = new_map(sizeof(string)); map_string_int m2 = new_map_init(2, sizeof(int), (string[2]){tos3("v"), tos3("lang"), }, (int[2]){1, 2, }); string ma1 = tos3("hello"); string ma2 = tos3("vlang");