hashmap: change hash-function to wyhash and add benchmarks

pull/3594/head
ka-weihe 2020-01-29 04:06:05 +01:00 committed by GitHub
parent 5c29e7b257
commit a14a5fbf95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 120 additions and 26 deletions

View File

@ -0,0 +1,112 @@
import rand
import time
import builtin.hashmap
fn hashmap_set_bench(arr []string, repeat int) {
start_time := time.ticks()
for _ in 0..repeat {
mut b := hashmap.new_hashmap()
for x in arr {
b.set(x, 1)
}
}
end_time := time.ticks() - start_time
println("* hashmap_set: ${end_time} ms")
}
fn map_set_bench(arr []string, repeat int) {
start_time := time.ticks()
for _ in 0..repeat {
mut b := map[string]int
for x in arr {
b[x] = 1
}
}
end_time := time.ticks() - start_time
println("* map_set: ${end_time} ms")
}
fn hashmap_get_bench(arr []string, repeat int) {
mut b := hashmap.new_hashmap()
for x in arr {
b.set(x, 1)
}
start_time := time.ticks()
for _ in 0..repeat {
for x in arr {
b.get(x)
}
}
end_time := time.ticks() - start_time
println("* hashmap_get: ${end_time} ms")
}
fn map_get_bench(arr []string, repeat int) {
mut b := map[string]int
for x in arr {
b[x] = 1
}
start_time := time.ticks()
for _ in 0..repeat {
for x in arr {
b[x]
}
}
end_time := time.ticks() - start_time
println("* map_get: ${end_time} ms")
}
fn benchmark_many_keys() {
key_len := 30
repeat := 1
for i := 2048; i <= 10000000; i = i * 2 {
mut arr := []string
for _ in 0..i {
mut buf := []byte
for j in 0..key_len {
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
}
s := string(buf)
arr << s
}
println("$arr.len keys of length $key_len")
// Uncomment the benchmark you would like to benchmark
// Run one or two at a time while memory leaks is a thing
hashmap_get_bench(arr, repeat)
map_get_bench(arr, repeat)
// hashmap_set_bench(arr, repeat)
// map_set_bench(arr, repeat)
println('')
}
}
fn benchmark_few_keys() {
key_len := 30
repeat := 10000
println("Benchmarks are repeated $repeat times")
for i := 16; i <= 2048; i = i * 2 {
mut arr := []string
for _ in 0..i {
mut buf := []byte
for j in 0..key_len {
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
}
s := string(buf)
arr << s
}
println("$arr.len keys of length $key_len")
// Uncomment the benchmark you would like to benchmark
// Run one or two at a time while memory leaks is a thing
hashmap_get_bench(arr, repeat)
map_get_bench(arr, repeat)
// hashmap_set_bench(arr, repeat)
// map_set_bench(arr, repeat)
println('')
}
}
fn main() {
// Uncomment below to benchmark on many keys
// benchmark_many_keys()
benchmark_few_keys()
}

View File

@ -3,6 +3,8 @@
// that can be found in the LICENSE file. // that can be found in the LICENSE file.
module hashmap module hashmap
import hash.wyhash
const ( const (
initial_size = 2 << 4 initial_size = 2 << 4
initial_cap = initial_size - 1 initial_cap = initial_size - 1
@ -10,23 +12,6 @@ const (
load_factor = 0.8 load_factor = 0.8
) )
// hash-function should not be in this file
const (
fnv64_prime = 1099511628211
fnv64_offset_basis = 14695981039346656037
fnv32_offset_basis = u32(2166136261)
fnv32_prime = u32(16777619)
)
[inline]
fn fnv1a64(data string) u64 {
mut hash := fnv64_offset_basis
for i := 0; i < data.len; i++ {
hash = (hash ^ u64(data[i])) * fnv64_prime
}
return hash
}
pub struct Hashmap { pub struct Hashmap {
mut: mut:
info &u16 info &u16
@ -54,14 +39,11 @@ pub fn new_hashmap() Hashmap {
} }
pub fn (h mut Hashmap) set(key string, value int) { pub fn (h mut Hashmap) set(key string, value int) {
// The load factor is 0.5. // load_factor can be adjusted.
// It will be adjustable in the future and with
// a higher default settings to lower memory usage.
if (f32(h.size) / f32(h.cap)) > h.load_factor { if (f32(h.size) / f32(h.cap)) > h.load_factor {
h.rehash() h.rehash()
} }
// Hash-function will be swapped for wyhash hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
hash := fnv1a64(key)
mut info := u16((hash >> 56) | probe_offset) mut info := u16((hash >> 56) | probe_offset)
mut index := hash & h.cap mut index := hash & h.cap
// While probe count is less // While probe count is less
@ -115,7 +97,7 @@ fn (h mut Hashmap) rehash() {
for i in 0 .. (old_cap + 1) { for i in 0 .. (old_cap + 1) {
if h.info[i] != 0 { if h.info[i] != 0 {
mut kv := h.key_values[i] mut kv := h.key_values[i]
hash := fnv1a64(kv.key) hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
mut info := u16((hash >> 56) | probe_offset) mut info := u16((hash >> 56) | probe_offset)
mut index := hash & h.cap mut index := hash & h.cap
// While probe count is less // While probe count is less
@ -154,7 +136,7 @@ fn (h mut Hashmap) rehash() {
} }
pub fn (h mut Hashmap) delete(key string) { pub fn (h mut Hashmap) delete(key string) {
hash := fnv1a64(key) hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut index := hash & h.cap mut index := hash & h.cap
mut info := u16((hash >> 56) | probe_offset) mut info := u16((hash >> 56) | probe_offset)
for info < h.info[index] { for info < h.info[index] {
@ -184,7 +166,7 @@ pub fn (h mut Hashmap) delete(key string) {
} }
pub fn (h Hashmap) get(key string) int { pub fn (h Hashmap) get(key string) int {
hash := fnv1a64(key) hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut index := hash & h.cap mut index := hash & h.cap
mut info := u16((hash >> 56) | probe_offset) mut info := u16((hash >> 56) | probe_offset)
for info < h.info[index] { for info < h.info[index] {
@ -202,7 +184,7 @@ pub fn (h Hashmap) get(key string) int {
} }
pub fn (h Hashmap) exists(key string) bool { pub fn (h Hashmap) exists(key string) bool {
hash := fnv1a64(key) hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut index := hash & h.cap mut index := hash & h.cap
mut info := u16((hash >> 56) | probe_offset) mut info := u16((hash >> 56) | probe_offset)
for info < h.info[index] { for info < h.info[index] {