hashmap: change hash-function to wyhash and add benchmarks
parent
5c29e7b257
commit
a14a5fbf95
|
@ -0,0 +1,112 @@
|
||||||
|
import rand
|
||||||
|
import time
|
||||||
|
import builtin.hashmap
|
||||||
|
|
||||||
|
fn hashmap_set_bench(arr []string, repeat int) {
|
||||||
|
start_time := time.ticks()
|
||||||
|
for _ in 0..repeat {
|
||||||
|
mut b := hashmap.new_hashmap()
|
||||||
|
for x in arr {
|
||||||
|
b.set(x, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_time := time.ticks() - start_time
|
||||||
|
println("* hashmap_set: ${end_time} ms")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_set_bench(arr []string, repeat int) {
|
||||||
|
start_time := time.ticks()
|
||||||
|
for _ in 0..repeat {
|
||||||
|
mut b := map[string]int
|
||||||
|
for x in arr {
|
||||||
|
b[x] = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_time := time.ticks() - start_time
|
||||||
|
println("* map_set: ${end_time} ms")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hashmap_get_bench(arr []string, repeat int) {
|
||||||
|
mut b := hashmap.new_hashmap()
|
||||||
|
for x in arr {
|
||||||
|
b.set(x, 1)
|
||||||
|
}
|
||||||
|
start_time := time.ticks()
|
||||||
|
for _ in 0..repeat {
|
||||||
|
for x in arr {
|
||||||
|
b.get(x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_time := time.ticks() - start_time
|
||||||
|
println("* hashmap_get: ${end_time} ms")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_get_bench(arr []string, repeat int) {
|
||||||
|
mut b := map[string]int
|
||||||
|
for x in arr {
|
||||||
|
b[x] = 1
|
||||||
|
}
|
||||||
|
start_time := time.ticks()
|
||||||
|
for _ in 0..repeat {
|
||||||
|
for x in arr {
|
||||||
|
b[x]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_time := time.ticks() - start_time
|
||||||
|
println("* map_get: ${end_time} ms")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn benchmark_many_keys() {
|
||||||
|
key_len := 30
|
||||||
|
repeat := 1
|
||||||
|
for i := 2048; i <= 10000000; i = i * 2 {
|
||||||
|
mut arr := []string
|
||||||
|
for _ in 0..i {
|
||||||
|
mut buf := []byte
|
||||||
|
for j in 0..key_len {
|
||||||
|
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
|
||||||
|
}
|
||||||
|
s := string(buf)
|
||||||
|
arr << s
|
||||||
|
}
|
||||||
|
println("$arr.len keys of length $key_len")
|
||||||
|
// Uncomment the benchmark you would like to benchmark
|
||||||
|
// Run one or two at a time while memory leaks is a thing
|
||||||
|
hashmap_get_bench(arr, repeat)
|
||||||
|
map_get_bench(arr, repeat)
|
||||||
|
// hashmap_set_bench(arr, repeat)
|
||||||
|
// map_set_bench(arr, repeat)
|
||||||
|
println('')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn benchmark_few_keys() {
|
||||||
|
key_len := 30
|
||||||
|
repeat := 10000
|
||||||
|
println("Benchmarks are repeated $repeat times")
|
||||||
|
for i := 16; i <= 2048; i = i * 2 {
|
||||||
|
mut arr := []string
|
||||||
|
for _ in 0..i {
|
||||||
|
mut buf := []byte
|
||||||
|
for j in 0..key_len {
|
||||||
|
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
|
||||||
|
}
|
||||||
|
s := string(buf)
|
||||||
|
arr << s
|
||||||
|
}
|
||||||
|
println("$arr.len keys of length $key_len")
|
||||||
|
// Uncomment the benchmark you would like to benchmark
|
||||||
|
// Run one or two at a time while memory leaks is a thing
|
||||||
|
hashmap_get_bench(arr, repeat)
|
||||||
|
map_get_bench(arr, repeat)
|
||||||
|
// hashmap_set_bench(arr, repeat)
|
||||||
|
// map_set_bench(arr, repeat)
|
||||||
|
println('')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Uncomment below to benchmark on many keys
|
||||||
|
// benchmark_many_keys()
|
||||||
|
benchmark_few_keys()
|
||||||
|
}
|
|
@ -3,6 +3,8 @@
|
||||||
// that can be found in the LICENSE file.
|
// that can be found in the LICENSE file.
|
||||||
module hashmap
|
module hashmap
|
||||||
|
|
||||||
|
import hash.wyhash
|
||||||
|
|
||||||
const (
|
const (
|
||||||
initial_size = 2 << 4
|
initial_size = 2 << 4
|
||||||
initial_cap = initial_size - 1
|
initial_cap = initial_size - 1
|
||||||
|
@ -10,23 +12,6 @@ const (
|
||||||
load_factor = 0.8
|
load_factor = 0.8
|
||||||
)
|
)
|
||||||
|
|
||||||
// hash-function should not be in this file
|
|
||||||
const (
|
|
||||||
fnv64_prime = 1099511628211
|
|
||||||
fnv64_offset_basis = 14695981039346656037
|
|
||||||
fnv32_offset_basis = u32(2166136261)
|
|
||||||
fnv32_prime = u32(16777619)
|
|
||||||
)
|
|
||||||
|
|
||||||
[inline]
|
|
||||||
fn fnv1a64(data string) u64 {
|
|
||||||
mut hash := fnv64_offset_basis
|
|
||||||
for i := 0; i < data.len; i++ {
|
|
||||||
hash = (hash ^ u64(data[i])) * fnv64_prime
|
|
||||||
}
|
|
||||||
return hash
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Hashmap {
|
pub struct Hashmap {
|
||||||
mut:
|
mut:
|
||||||
info &u16
|
info &u16
|
||||||
|
@ -54,14 +39,11 @@ pub fn new_hashmap() Hashmap {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn (h mut Hashmap) set(key string, value int) {
|
pub fn (h mut Hashmap) set(key string, value int) {
|
||||||
// The load factor is 0.5.
|
// load_factor can be adjusted.
|
||||||
// It will be adjustable in the future and with
|
|
||||||
// a higher default settings to lower memory usage.
|
|
||||||
if (f32(h.size) / f32(h.cap)) > h.load_factor {
|
if (f32(h.size) / f32(h.cap)) > h.load_factor {
|
||||||
h.rehash()
|
h.rehash()
|
||||||
}
|
}
|
||||||
// Hash-function will be swapped for wyhash
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||||
hash := fnv1a64(key)
|
|
||||||
mut info := u16((hash >> 56) | probe_offset)
|
mut info := u16((hash >> 56) | probe_offset)
|
||||||
mut index := hash & h.cap
|
mut index := hash & h.cap
|
||||||
// While probe count is less
|
// While probe count is less
|
||||||
|
@ -115,7 +97,7 @@ fn (h mut Hashmap) rehash() {
|
||||||
for i in 0 .. (old_cap + 1) {
|
for i in 0 .. (old_cap + 1) {
|
||||||
if h.info[i] != 0 {
|
if h.info[i] != 0 {
|
||||||
mut kv := h.key_values[i]
|
mut kv := h.key_values[i]
|
||||||
hash := fnv1a64(kv.key)
|
hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
|
||||||
mut info := u16((hash >> 56) | probe_offset)
|
mut info := u16((hash >> 56) | probe_offset)
|
||||||
mut index := hash & h.cap
|
mut index := hash & h.cap
|
||||||
// While probe count is less
|
// While probe count is less
|
||||||
|
@ -154,7 +136,7 @@ fn (h mut Hashmap) rehash() {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn (h mut Hashmap) delete(key string) {
|
pub fn (h mut Hashmap) delete(key string) {
|
||||||
hash := fnv1a64(key)
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||||
mut index := hash & h.cap
|
mut index := hash & h.cap
|
||||||
mut info := u16((hash >> 56) | probe_offset)
|
mut info := u16((hash >> 56) | probe_offset)
|
||||||
for info < h.info[index] {
|
for info < h.info[index] {
|
||||||
|
@ -184,7 +166,7 @@ pub fn (h mut Hashmap) delete(key string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn (h Hashmap) get(key string) int {
|
pub fn (h Hashmap) get(key string) int {
|
||||||
hash := fnv1a64(key)
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||||
mut index := hash & h.cap
|
mut index := hash & h.cap
|
||||||
mut info := u16((hash >> 56) | probe_offset)
|
mut info := u16((hash >> 56) | probe_offset)
|
||||||
for info < h.info[index] {
|
for info < h.info[index] {
|
||||||
|
@ -202,7 +184,7 @@ pub fn (h Hashmap) get(key string) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn (h Hashmap) exists(key string) bool {
|
pub fn (h Hashmap) exists(key string) bool {
|
||||||
hash := fnv1a64(key)
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
||||||
mut index := hash & h.cap
|
mut index := hash & h.cap
|
||||||
mut info := u16((hash >> 56) | probe_offset)
|
mut info := u16((hash >> 56) | probe_offset)
|
||||||
for info < h.info[index] {
|
for info < h.info[index] {
|
||||||
|
|
Loading…
Reference in New Issue