From 23dd6e22941d8890f28a90ed22d5240e925b282e Mon Sep 17 00:00:00 2001 From: ka-weihe Date: Fri, 31 Jan 2020 02:29:35 +0100 Subject: [PATCH] hashmap: optimization of rehashing --- vlib/builtin/hashmap/hashmap.v | 69 ++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/vlib/builtin/hashmap/hashmap.v b/vlib/builtin/hashmap/hashmap.v index e37c813d48..178c292c2c 100644 --- a/vlib/builtin/hashmap/hashmap.v +++ b/vlib/builtin/hashmap/hashmap.v @@ -6,17 +6,24 @@ module hashmap import hash.wyhash const ( - initial_size = 2 << 4 + log_size = 5 + n_hashbits = 24 + window_size = 16 + initial_size = 1 << log_size initial_cap = initial_size - 1 - probe_offset = u16(256) - load_factor = 0.8 + default_load_factor = 0.8 + hashbit_mask = u32(0xFFFFFF) + probe_offset = u32(0x1000000) + max_probe = u32(0xFF000000) ) pub struct Hashmap { mut: - info &u16 + cap u32 + shift byte + window byte + info &u32 key_values &KeyValue - cap int pub mut: load_factor f32 size int @@ -30,10 +37,12 @@ mut: pub fn new_hashmap() Hashmap { return Hashmap{ - info: &u16(calloc(sizeof(u16) * initial_size)) - key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size)) cap: initial_cap - load_factor: 0.8 + shift: log_size + window: window_size + info: &u32(calloc(sizeof(u32) * initial_size)) + key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size)) + load_factor: default_load_factor size: 0 } } @@ -44,7 +53,7 @@ pub fn (h mut Hashmap) set(key string, value int) { h.rehash() } hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut info := u16((hash >> 56) | probe_offset) + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) mut index := hash & h.cap // While probe count is less for info < h.info[index] { @@ -71,7 +80,7 @@ pub fn (h mut Hashmap) set(key string, value int) { h.info[index] = info info = tmp_info // Swap KeyValue - tmp_kv := h.key_values[index] + tmp_kv := h.key_values[index] h.key_values[index] = current_kv current_kv = tmp_kv } @@ -79,7 +88,7 @@ pub fn (h mut Hashmap) set(key string, value int) { info += probe_offset } // Should almost never happen - if (info & 0xFF00) == 0xFF00 { + if (info & max_probe) == max_probe { h.rehash() h.set(current_kv.key, current_kv.value) return @@ -91,14 +100,29 @@ pub fn (h mut Hashmap) set(key string, value int) { fn (h mut Hashmap) rehash() { old_cap := h.cap + h.window-- + // check if any hashbits are left + if h.window == 0 { + h.shift += window_size + } + // double the size of the hashmap h.cap = ((h.cap + 1) << 1) - 1 mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1))) - mut new_info := &u16(calloc(sizeof(u16) * (h.cap + 1))) + mut new_info := &u32(calloc(sizeof(u32) * (h.cap + 1))) for i in 0 .. (old_cap + 1) { if h.info[i] != 0 { mut kv := h.key_values[i] - hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0) - mut info := u16((hash >> 56) | probe_offset) + mut hash := u64(0) + mut info := u32(0) + if h.window == 0 { + hash = wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0) + info = u32(((hash >> h.shift) & hashbit_mask) | probe_offset) + } + else { + original := u64(i - ((h.info[i] >> n_hashbits) - 1)) & (h.cap >> 1) + hash = original | (h.info[i] << h.shift) + info = (h.info[i] & hashbit_mask) | probe_offset + } mut index := hash & h.cap // While probe count is less for info < new_info[index] { @@ -114,7 +138,7 @@ fn (h mut Hashmap) rehash() { new_info[index] = info info = tmp_info // Swap KeyValue - tmp_kv := new_key_values[index] + tmp_kv := new_key_values[index] new_key_values[index] = kv kv = tmp_kv } @@ -122,7 +146,7 @@ fn (h mut Hashmap) rehash() { info += probe_offset } // Should almost never happen - if (info & 0xFF00) == 0xFF00 { + if (info & max_probe) == max_probe { h.rehash() h.set(kv.key, kv.value) return @@ -131,6 +155,11 @@ fn (h mut Hashmap) rehash() { new_key_values[index] = kv } } + if h.window == 0 { + h.window = window_size + } + free(h.key_values) + free(h.info) h.key_values = new_key_values h.info = new_info } @@ -138,7 +167,7 @@ fn (h mut Hashmap) rehash() { pub fn (h mut Hashmap) delete(key string) { hash := wyhash.wyhash_c(key.str, u64(key.len), 0) mut index := hash & h.cap - mut info := u16((hash >> 56) | probe_offset) + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) for info < h.info[index] { index = (index + 1) & h.cap info += probe_offset @@ -149,7 +178,7 @@ pub fn (h mut Hashmap) delete(key string) { mut old_index := index index = (index + 1) & h.cap mut current_info := h.info[index] - for (current_info >> 8) > 1 { + for (current_info >> n_hashbits) > 1 { h.info[old_index] = current_info - probe_offset h.key_values[old_index] = h.key_values[index] old_index = index @@ -168,7 +197,7 @@ pub fn (h mut Hashmap) delete(key string) { pub fn (h Hashmap) get(key string) int { hash := wyhash.wyhash_c(key.str, u64(key.len), 0) mut index := hash & h.cap - mut info := u16((hash >> 56) | probe_offset) + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) for info < h.info[index] { index = (index + 1) & h.cap info += probe_offset @@ -186,7 +215,7 @@ pub fn (h Hashmap) get(key string) int { pub fn (h Hashmap) exists(key string) bool { hash := wyhash.wyhash_c(key.str, u64(key.len), 0) mut index := hash & h.cap - mut info := u16((hash >> 56) | probe_offset) + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) for info < h.info[index] { index = (index + 1) & h.cap info += probe_offset