hashmap: change hash-function to wyhash and add benchmarks
parent
5c29e7b257
commit
a14a5fbf95
|
@ -0,0 +1,112 @@
|
|||
import rand
|
||||
import time
|
||||
import builtin.hashmap
|
||||
|
||||
fn hashmap_set_bench(arr []string, repeat int) {
|
||||
start_time := time.ticks()
|
||||
for _ in 0..repeat {
|
||||
mut b := hashmap.new_hashmap()
|
||||
for x in arr {
|
||||
b.set(x, 1)
|
||||
}
|
||||
}
|
||||
end_time := time.ticks() - start_time
|
||||
println("* hashmap_set: ${end_time} ms")
|
||||
}
|
||||
|
||||
fn map_set_bench(arr []string, repeat int) {
|
||||
start_time := time.ticks()
|
||||
for _ in 0..repeat {
|
||||
mut b := map[string]int
|
||||
for x in arr {
|
||||
b[x] = 1
|
||||
}
|
||||
}
|
||||
end_time := time.ticks() - start_time
|
||||
println("* map_set: ${end_time} ms")
|
||||
}
|
||||
|
||||
fn hashmap_get_bench(arr []string, repeat int) {
|
||||
mut b := hashmap.new_hashmap()
|
||||
for x in arr {
|
||||
b.set(x, 1)
|
||||
}
|
||||
start_time := time.ticks()
|
||||
for _ in 0..repeat {
|
||||
for x in arr {
|
||||
b.get(x)
|
||||
}
|
||||
}
|
||||
end_time := time.ticks() - start_time
|
||||
println("* hashmap_get: ${end_time} ms")
|
||||
}
|
||||
|
||||
fn map_get_bench(arr []string, repeat int) {
|
||||
mut b := map[string]int
|
||||
for x in arr {
|
||||
b[x] = 1
|
||||
}
|
||||
start_time := time.ticks()
|
||||
for _ in 0..repeat {
|
||||
for x in arr {
|
||||
b[x]
|
||||
}
|
||||
}
|
||||
end_time := time.ticks() - start_time
|
||||
println("* map_get: ${end_time} ms")
|
||||
}
|
||||
|
||||
fn benchmark_many_keys() {
|
||||
key_len := 30
|
||||
repeat := 1
|
||||
for i := 2048; i <= 10000000; i = i * 2 {
|
||||
mut arr := []string
|
||||
for _ in 0..i {
|
||||
mut buf := []byte
|
||||
for j in 0..key_len {
|
||||
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
|
||||
}
|
||||
s := string(buf)
|
||||
arr << s
|
||||
}
|
||||
println("$arr.len keys of length $key_len")
|
||||
// Uncomment the benchmark you would like to benchmark
|
||||
// Run one or two at a time while memory leaks is a thing
|
||||
hashmap_get_bench(arr, repeat)
|
||||
map_get_bench(arr, repeat)
|
||||
// hashmap_set_bench(arr, repeat)
|
||||
// map_set_bench(arr, repeat)
|
||||
println('')
|
||||
}
|
||||
}
|
||||
|
||||
fn benchmark_few_keys() {
|
||||
key_len := 30
|
||||
repeat := 10000
|
||||
println("Benchmarks are repeated $repeat times")
|
||||
for i := 16; i <= 2048; i = i * 2 {
|
||||
mut arr := []string
|
||||
for _ in 0..i {
|
||||
mut buf := []byte
|
||||
for j in 0..key_len {
|
||||
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
|
||||
}
|
||||
s := string(buf)
|
||||
arr << s
|
||||
}
|
||||
println("$arr.len keys of length $key_len")
|
||||
// Uncomment the benchmark you would like to benchmark
|
||||
// Run one or two at a time while memory leaks is a thing
|
||||
hashmap_get_bench(arr, repeat)
|
||||
map_get_bench(arr, repeat)
|
||||
// hashmap_set_bench(arr, repeat)
|
||||
// map_set_bench(arr, repeat)
|
||||
println('')
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Uncomment below to benchmark on many keys
|
||||
// benchmark_many_keys()
|
||||
benchmark_few_keys()
|
||||
}
|
|
@ -3,6 +3,8 @@
|
|||
// that can be found in the LICENSE file.
|
||||
module hashmap
|
||||
|
||||
import hash.wyhash
|
||||
|
||||
const (
|
||||
initial_size = 2 << 4
|
||||
initial_cap = initial_size - 1
|
||||
|
@ -10,23 +12,6 @@ const (
|
|||
load_factor = 0.8
|
||||
)
|
||||
|
||||
// hash-function should not be in this file
|
||||
const (
|
||||
fnv64_prime = 1099511628211
|
||||
fnv64_offset_basis = 14695981039346656037
|
||||
fnv32_offset_basis = u32(2166136261)
|
||||
fnv32_prime = u32(16777619)
|
||||
)
|
||||
|
||||
[inline]
|
||||
fn fnv1a64(data string) u64 {
|
||||
mut hash := fnv64_offset_basis
|
||||
for i := 0; i < data.len; i++ {
|
||||
hash = (hash ^ u64(data[i])) * fnv64_prime
|
||||
}
|
||||
return hash
|
||||
}
|
||||
|
||||
pub struct Hashmap {
|
||||
mut:
|
||||
info &u16
|
||||
|
@ -54,14 +39,11 @@ pub fn new_hashmap() Hashmap {
|
|||
}
|
||||
|
||||
pub fn (h mut Hashmap) set(key string, value int) {
|
||||
// The load factor is 0.5.
|
||||
// It will be adjustable in the future and with
|
||||
// a higher default settings to lower memory usage.
|
||||
// load_factor can be adjusted.
|
||||
if (f32(h.size) / f32(h.cap)) > h.load_factor {
|
||||
h.rehash()
|
||||
}
|
||||
// Hash-function will be swapped for wyhash
|
||||
hash := fnv1a64(key)
|
||||
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||
mut info := u16((hash >> 56) | probe_offset)
|
||||
mut index := hash & h.cap
|
||||
// While probe count is less
|
||||
|
@ -115,7 +97,7 @@ fn (h mut Hashmap) rehash() {
|
|||
for i in 0 .. (old_cap + 1) {
|
||||
if h.info[i] != 0 {
|
||||
mut kv := h.key_values[i]
|
||||
hash := fnv1a64(kv.key)
|
||||
hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
|
||||
mut info := u16((hash >> 56) | probe_offset)
|
||||
mut index := hash & h.cap
|
||||
// While probe count is less
|
||||
|
@ -154,7 +136,7 @@ fn (h mut Hashmap) rehash() {
|
|||
}
|
||||
|
||||
pub fn (h mut Hashmap) delete(key string) {
|
||||
hash := fnv1a64(key)
|
||||
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||
mut index := hash & h.cap
|
||||
mut info := u16((hash >> 56) | probe_offset)
|
||||
for info < h.info[index] {
|
||||
|
@ -184,7 +166,7 @@ pub fn (h mut Hashmap) delete(key string) {
|
|||
}
|
||||
|
||||
pub fn (h Hashmap) get(key string) int {
|
||||
hash := fnv1a64(key)
|
||||
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||
mut index := hash & h.cap
|
||||
mut info := u16((hash >> 56) | probe_offset)
|
||||
for info < h.info[index] {
|
||||
|
@ -202,7 +184,7 @@ pub fn (h Hashmap) get(key string) int {
|
|||
}
|
||||
|
||||
pub fn (h Hashmap) exists(key string) bool {
|
||||
hash := fnv1a64(key)
|
||||
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||
mut index := hash & h.cap
|
||||
mut info := u16((hash >> 56) | probe_offset)
|
||||
for info < h.info[index] {
|
||||
|
|
Loading…
Reference in New Issue