hashmap: new and fast hashmap with dynamic size

pull/3558/head
ka-weihe 2020-01-24 20:13:17 +01:00 committed by Alexander Medvednikov
parent 219239eadc
commit 6fd175d9be
2 changed files with 229 additions and 108 deletions

View File

@ -2,120 +2,242 @@
// Use of this source code is governed by an MIT license // Use of this source code is governed by an MIT license
// that can be found in the LICENSE file. // that can be found in the LICENSE file.
module hashmap module hashmap
/*
This is work in progress.
A very early test version of the Hashmap with a fixed size.
Only works with string keys and int values for now.
I added this to improve performance of the V compiler,
which uses lots of O(log n) map get's. Turned out with N < 10 000
the performance gains are basically non-existent.
*/
struct Hashmap {
cap int
keys []string
table []Hashmapentry
elm_size int
pub mut:
nr_collisions int
}
struct Hashmapentry {
mut:
key string
val int
next &Hashmapentry // linked list for collisions
}
const ( const (
min_cap = 2<<10 initial_size = 2<<4
max_cap = 2<<20 initial_cap = initial_size - 1
) load_factor = 0.5
probe_offset = u16(256)
const( fnv64_prime = 1099511628211
fnv64_prime = 1099511628211
fnv64_offset_basis = 14695981039346656037 fnv64_offset_basis = 14695981039346656037
fnv32_offset_basis = u32(2166136261)
fnv32_prime = u32(16777619)
) )
const( pub struct Hashmap {
fnv32_offset_basis = u32(2166136261) mut:
fnv32_prime = u32(16777619) info &u16
) key_values &KeyValue
cap int
pub fn new_hashmap(planned_nr_items int) Hashmap { pub mut:
mut cap := planned_nr_items * 5 size int
if cap < min_cap {
cap = min_cap
}
if cap > max_cap {
cap = max_cap
}
return Hashmap{
cap: cap
elm_size: 4
table: make(cap, cap, sizeof(Hashmapentry))
}
} }
pub fn (m mut Hashmap) set(key string, val int) { struct KeyValue {
// mut hash := int(b_fabs(key.hash())) key string
// idx := hash % m.cap mut:
idx := int(fnv1a32(key) % m.cap) value int
if m.table[idx].key.len != 0 {
// println('\nset() idx=$idx key="$key" hash="$hash" val=$val')
m.nr_collisions++
// println('collision:' + m.table[idx].key)
mut e := &m.table[idx]
for e.next != 0 {
e = e.next
}
e.next = &Hashmapentry{
key,val,0}
}
else {
m.table[idx] = Hashmapentry{
key,val,0}
}
}
pub fn (m &Hashmap) get(key string) int {
// mut hash := int(b_fabs(key.hash()))
// idx := hash % m.cap
idx := int(fnv1a32(key) % m.cap)
mut e := &m.table[idx]
for e.next != 0 {
// todo unsafe {
if e.key == key {
return e.val
}
e = e.next
}
return e.val
}
[inline]
fn b_fabs(v int) f64 {
return if v < 0 { -v } else { v }
}
// inline functions here for speed
// rather than full impl in vlib
[inline]
fn fnv1a32(data string) u32 {
mut hash := fnv32_offset_basis
for i := 0; i < data.len; i++ {
hash = (hash ^ u32(data[i])) * fnv32_prime
}
return hash
} }
[inline] [inline]
fn fnv1a64(data string) u64 { fn fnv1a64(data string) u64 {
mut hash := fnv64_offset_basis mut hash := fnv64_offset_basis
for i := 0; i < data.len; i++ { for i := 0; i < data.len; i++ {
hash = (hash ^ u64(data[i])) * fnv64_prime hash = (hash ^ u64(data[i])) * fnv64_prime
} }
return hash return hash
}
pub fn new_hashmap() Hashmap {
return Hashmap{
info: &u16(calloc(sizeof(u16) * initial_size))
key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size))
cap: initial_cap
size: 0
}
}
pub fn (h mut Hashmap) set(key string, value int) {
// The load factor is 0.5.
// It will be adjustable in the future and with
// a higher default settings to lower memory usage.
if (h.size<<1) == (h.cap - 1) {
h.rehash()
}
// Hash-function will be swapped for wyhash
hash := fnv1a64(key)
mut info := u16((hash>>56) | probe_offset)
mut index := hash & h.cap
// While probe count is less
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
// While we might have a match
for info == h.info[index] {
if key == h.key_values[index].key {
h.key_values[index].value = value
return
}
index = (index + 1) & h.cap
info += probe_offset
}
// Match is not possible anymore.
// Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood).
mut current_key := key
mut current_value := value
for h.info[index] != 0 {
if info > h.info[index] {
tmp_kv := h.key_values[index]
tmp_info := h.info[index]
h.key_values[index] = KeyValue{
current_key,current_value}
h.info[index] = info
current_key = tmp_kv.key
current_value = tmp_kv.value
info = tmp_info
}
index = (index + 1) & h.cap
info += probe_offset
}
// Should almost never happen
if (info & 0xFF00) == 0xFF00 {
h.rehash()
h.set(current_key, current_value)
return
}
h.info[index] = info
h.key_values[index] = KeyValue{
current_key,current_value}
h.size++
}
fn (h mut Hashmap) rehash() {
old_cap := h.cap
h.cap = ((h.cap + 1)<<1) - 1
mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1)))
mut new_info := &u16(calloc(sizeof(u16) * (h.cap + 1)))
for i in 0 .. (old_cap + 1) {
if h.info[i] != 0 {
key := h.key_values[i].key
value := h.key_values[i].value
hash := fnv1a64(key)
mut info := u16((hash>>56) | probe_offset)
mut index := hash & h.cap
// While probe count is less
for info < new_info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
// While we might have a match
for info == new_info[index] {
if key == new_key_values[index].key {
new_key_values[index].value = value
return
}
index = (index + 1) & h.cap
info += probe_offset
}
// Match is not possible anymore.
// Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood).
mut current_key := key
mut current_value := value
for new_info[index] != 0 {
if info > new_info[index] {
tmp_kv := new_key_values[index]
tmp_info := new_info[index]
new_key_values[index] = KeyValue{
current_key,current_value}
new_info[index] = info
current_key = tmp_kv.key
current_value = tmp_kv.value
info = tmp_info
}
index = (index + 1) & h.cap
info += probe_offset
}
// Should almost never happen
if (info & 0xFF00) == 0xFF00 {
h.rehash()
h.set(current_key, current_value)
return
}
new_info[index] = info
new_key_values[index] = KeyValue{
current_key,current_value}
}
}
h.key_values = new_key_values
h.info = new_info
}
pub fn (h mut Hashmap) delete(key string) {
hash := fnv1a64(key)
mut index := hash & h.cap
mut info := u16((hash>>56) | probe_offset)
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
// Perform backwards shifting
for info == h.info[index] {
if key == h.key_values[index].key {
mut old_index := index
index = (index + 1) & h.cap
mut current_info := h.info[index]
for (current_info>>8) > 1 {
h.info[old_index] = current_info - probe_offset
h.key_values[old_index] = h.key_values[index]
old_index = index
index = (index + 1) & h.cap
current_info = h.info[index]
}
h.info[old_index] = 0
h.size--
return
}
index = (index + 1) & h.cap
info += probe_offset
}
}
pub fn (h Hashmap) get(key string) int {
hash := fnv1a64(key)
mut index := hash & h.cap
mut info := u16((hash>>56) | probe_offset)
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
for info == h.info[index] {
if key == h.key_values[index].key {
return h.key_values[index].value
}
index = (index + 1) & h.cap
info += probe_offset
}
return 0
}
pub fn (h Hashmap) exists(key string) bool {
hash := fnv1a64(key)
mut index := hash & h.cap
mut info := u16((hash>>56) | probe_offset)
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
for info == h.info[index] {
if key == h.key_values[index].key {
return true
}
index = (index + 1) & h.cap
info += probe_offset
}
return false
}
pub fn (h Hashmap) keys() []string {
size := h.size
mut keys := [''].repeat(size)
mut j := 0
for i in 0 .. (h.cap + 1) {
if h.info[i] != 0 {
keys[j] = h.key_values[i].key
j++
}
}
return keys
} }

View File

@ -3,7 +3,7 @@ module hashmap
import rand import rand
fn test_random_strings() { fn test_random_strings() {
mut m := new_hashmap(1000) mut m := new_hashmap()
for i in 0..1000 { for i in 0..1000 {
mut buf := []byte mut buf := []byte
for j in 0..10 { for j in 0..10 {
@ -21,12 +21,11 @@ fn test_random_strings() {
fn test_large_hashmap() { fn test_large_hashmap() {
N := 300 * 1000 N := 300 * 1000
mut nums := new_hashmap(N) mut nums := new_hashmap()
for i := 0; i < N; i++ { for i := 0; i < N; i++ {
key := i.str() key := i.str()
nums.set(key, i) nums.set(key, i)
} }
println('nr collisions: $nums.nr_collisions')
for i := 0; i < N; i++ { for i := 0; i < N; i++ {
key := i.str() key := i.str()
assert nums.get(key) == i assert nums.get(key) == i