diff --git a/tools/bench/hashmap.v b/tools/bench/hashmap.v new file mode 100644 index 0000000000..ba9070aa6e --- /dev/null +++ b/tools/bench/hashmap.v @@ -0,0 +1,112 @@ +import rand +import time +import builtin.hashmap + +fn hashmap_set_bench(arr []string, repeat int) { + start_time := time.ticks() + for _ in 0..repeat { + mut b := hashmap.new_hashmap() + for x in arr { + b.set(x, 1) + } + } + end_time := time.ticks() - start_time + println("* hashmap_set: ${end_time} ms") +} + +fn map_set_bench(arr []string, repeat int) { + start_time := time.ticks() + for _ in 0..repeat { + mut b := map[string]int + for x in arr { + b[x] = 1 + } + } + end_time := time.ticks() - start_time + println("* map_set: ${end_time} ms") +} + +fn hashmap_get_bench(arr []string, repeat int) { + mut b := hashmap.new_hashmap() + for x in arr { + b.set(x, 1) + } + start_time := time.ticks() + for _ in 0..repeat { + for x in arr { + b.get(x) + } + } + end_time := time.ticks() - start_time + println("* hashmap_get: ${end_time} ms") +} + +fn map_get_bench(arr []string, repeat int) { + mut b := map[string]int + for x in arr { + b[x] = 1 + } + start_time := time.ticks() + for _ in 0..repeat { + for x in arr { + b[x] + } + } + end_time := time.ticks() - start_time + println("* map_get: ${end_time} ms") +} + +fn benchmark_many_keys() { + key_len := 30 + repeat := 1 + for i := 2048; i <= 10000000; i = i * 2 { + mut arr := []string + for _ in 0..i { + mut buf := []byte + for j in 0..key_len { + buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) + } + s := string(buf) + arr << s + } + println("$arr.len keys of length $key_len") + // Uncomment the benchmark you would like to benchmark + // Run one or two at a time while memory leaks is a thing + hashmap_get_bench(arr, repeat) + map_get_bench(arr, repeat) + // hashmap_set_bench(arr, repeat) + // map_set_bench(arr, repeat) + println('') + } +} + +fn benchmark_few_keys() { + key_len := 30 + repeat := 10000 + println("Benchmarks are repeated $repeat times") + for i := 16; i <= 2048; i = i * 2 { + mut arr := []string + for _ in 0..i { + mut buf := []byte + for j in 0..key_len { + buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) + } + s := string(buf) + arr << s + } + println("$arr.len keys of length $key_len") + // Uncomment the benchmark you would like to benchmark + // Run one or two at a time while memory leaks is a thing + hashmap_get_bench(arr, repeat) + map_get_bench(arr, repeat) + // hashmap_set_bench(arr, repeat) + // map_set_bench(arr, repeat) + println('') + } +} + +fn main() { + // Uncomment below to benchmark on many keys + // benchmark_many_keys() + benchmark_few_keys() +} \ No newline at end of file diff --git a/vlib/builtin/hashmap/hashmap.v b/vlib/builtin/hashmap/hashmap.v index 9874b8d025..e37c813d48 100644 --- a/vlib/builtin/hashmap/hashmap.v +++ b/vlib/builtin/hashmap/hashmap.v @@ -3,6 +3,8 @@ // that can be found in the LICENSE file. module hashmap +import hash.wyhash + const ( initial_size = 2 << 4 initial_cap = initial_size - 1 @@ -10,23 +12,6 @@ const ( load_factor = 0.8 ) -// hash-function should not be in this file -const ( - fnv64_prime = 1099511628211 - fnv64_offset_basis = 14695981039346656037 - fnv32_offset_basis = u32(2166136261) - fnv32_prime = u32(16777619) -) - -[inline] -fn fnv1a64(data string) u64 { - mut hash := fnv64_offset_basis - for i := 0; i < data.len; i++ { - hash = (hash ^ u64(data[i])) * fnv64_prime - } - return hash -} - pub struct Hashmap { mut: info &u16 @@ -54,14 +39,11 @@ pub fn new_hashmap() Hashmap { } pub fn (h mut Hashmap) set(key string, value int) { - // The load factor is 0.5. - // It will be adjustable in the future and with - // a higher default settings to lower memory usage. + // load_factor can be adjusted. if (f32(h.size) / f32(h.cap)) > h.load_factor { h.rehash() } - // Hash-function will be swapped for wyhash - hash := fnv1a64(key) + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) mut info := u16((hash >> 56) | probe_offset) mut index := hash & h.cap // While probe count is less @@ -115,7 +97,7 @@ fn (h mut Hashmap) rehash() { for i in 0 .. (old_cap + 1) { if h.info[i] != 0 { mut kv := h.key_values[i] - hash := fnv1a64(kv.key) + hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0) mut info := u16((hash >> 56) | probe_offset) mut index := hash & h.cap // While probe count is less @@ -154,7 +136,7 @@ fn (h mut Hashmap) rehash() { } pub fn (h mut Hashmap) delete(key string) { - hash := fnv1a64(key) + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) mut index := hash & h.cap mut info := u16((hash >> 56) | probe_offset) for info < h.info[index] { @@ -184,7 +166,7 @@ pub fn (h mut Hashmap) delete(key string) { } pub fn (h Hashmap) get(key string) int { - hash := fnv1a64(key) + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) mut index := hash & h.cap mut info := u16((hash >> 56) | probe_offset) for info < h.info[index] { @@ -202,7 +184,7 @@ pub fn (h Hashmap) get(key string) int { } pub fn (h Hashmap) exists(key string) bool { - hash := fnv1a64(key) + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) mut index := hash & h.cap mut info := u16((hash >> 56) | probe_offset) for info < h.info[index] {