hashmap: new and fast hashmap with dynamic size
							parent
							
								
									219239eadc
								
							
						
					
					
						commit
						6fd175d9be
					
				| 
						 | 
				
			
			@ -2,113 +2,31 @@
 | 
			
		|||
// Use of this source code is governed by an MIT license
 | 
			
		||||
// that can be found in the LICENSE file.
 | 
			
		||||
module hashmap
 | 
			
		||||
/*
 | 
			
		||||
	This is work in progress.
 | 
			
		||||
	A very early test version of the Hashmap with a fixed size.
 | 
			
		||||
	Only works with string keys and int values for now.
 | 
			
		||||
 | 
			
		||||
	I added this to improve performance of the V compiler,
 | 
			
		||||
	which uses lots of O(log n) map get's. Turned out with N < 10 000
 | 
			
		||||
	the performance gains are basically non-existent.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
struct Hashmap {
 | 
			
		||||
	cap           int
 | 
			
		||||
	keys          []string
 | 
			
		||||
	table         []Hashmapentry
 | 
			
		||||
	elm_size      int
 | 
			
		||||
pub mut:
 | 
			
		||||
	nr_collisions int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct Hashmapentry {
 | 
			
		||||
mut:
 | 
			
		||||
	key  string
 | 
			
		||||
	val  int
 | 
			
		||||
	next &Hashmapentry // linked list for collisions
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	min_cap = 2<<10
 | 
			
		||||
	max_cap = 2<<20
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const(
 | 
			
		||||
	initial_size = 2<<4
 | 
			
		||||
	initial_cap = initial_size - 1
 | 
			
		||||
	load_factor = 0.5
 | 
			
		||||
	probe_offset = u16(256)
 | 
			
		||||
	fnv64_prime = 1099511628211
 | 
			
		||||
	fnv64_offset_basis = 14695981039346656037
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const(
 | 
			
		||||
	fnv32_offset_basis = u32(2166136261)
 | 
			
		||||
	fnv32_prime = u32(16777619)
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
pub fn new_hashmap(planned_nr_items int) Hashmap {
 | 
			
		||||
	mut cap := planned_nr_items * 5
 | 
			
		||||
	if cap < min_cap {
 | 
			
		||||
		cap = min_cap
 | 
			
		||||
	}
 | 
			
		||||
	if cap > max_cap {
 | 
			
		||||
		cap = max_cap
 | 
			
		||||
	}
 | 
			
		||||
	return Hashmap{
 | 
			
		||||
		cap: cap
 | 
			
		||||
		elm_size: 4
 | 
			
		||||
		table: make(cap, cap, sizeof(Hashmapentry))
 | 
			
		||||
	}
 | 
			
		||||
pub struct Hashmap {
 | 
			
		||||
mut:
 | 
			
		||||
	info       &u16
 | 
			
		||||
	key_values &KeyValue
 | 
			
		||||
	cap        int
 | 
			
		||||
pub mut:
 | 
			
		||||
	size       int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn (m mut Hashmap) set(key string, val int) {
 | 
			
		||||
	// mut hash := int(b_fabs(key.hash()))
 | 
			
		||||
	// idx := hash % m.cap
 | 
			
		||||
	idx := int(fnv1a32(key) % m.cap)
 | 
			
		||||
	if m.table[idx].key.len != 0 {
 | 
			
		||||
		// println('\nset() idx=$idx key="$key" hash="$hash" val=$val')
 | 
			
		||||
		m.nr_collisions++
 | 
			
		||||
		// println('collision:' + m.table[idx].key)
 | 
			
		||||
		mut e := &m.table[idx]
 | 
			
		||||
		for e.next != 0 {
 | 
			
		||||
			e = e.next
 | 
			
		||||
		}
 | 
			
		||||
		e.next = &Hashmapentry{
 | 
			
		||||
			key,val,0}
 | 
			
		||||
	}
 | 
			
		||||
	else {
 | 
			
		||||
		m.table[idx] = Hashmapentry{
 | 
			
		||||
			key,val,0}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn (m &Hashmap) get(key string) int {
 | 
			
		||||
	// mut hash := int(b_fabs(key.hash()))
 | 
			
		||||
	// idx := hash % m.cap
 | 
			
		||||
	idx := int(fnv1a32(key) % m.cap)
 | 
			
		||||
	mut e := &m.table[idx]
 | 
			
		||||
	for e.next != 0 {
 | 
			
		||||
		// todo unsafe {
 | 
			
		||||
		if e.key == key {
 | 
			
		||||
			return e.val
 | 
			
		||||
		}
 | 
			
		||||
		e = e.next
 | 
			
		||||
	}
 | 
			
		||||
	return e.val
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[inline]
 | 
			
		||||
fn b_fabs(v int) f64 {
 | 
			
		||||
	return if v < 0 { -v } else { v }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// inline functions here for speed
 | 
			
		||||
// rather than full impl in vlib
 | 
			
		||||
[inline]
 | 
			
		||||
fn fnv1a32(data string) u32 {
 | 
			
		||||
    mut hash := fnv32_offset_basis
 | 
			
		||||
    for i := 0; i < data.len; i++ {
 | 
			
		||||
        hash = (hash ^ u32(data[i])) * fnv32_prime
 | 
			
		||||
    }
 | 
			
		||||
    return hash
 | 
			
		||||
struct KeyValue {
 | 
			
		||||
	key   string
 | 
			
		||||
mut:
 | 
			
		||||
	value int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[inline]
 | 
			
		||||
| 
						 | 
				
			
			@ -119,3 +37,207 @@ fn fnv1a64(data string) u64 {
 | 
			
		|||
	}
 | 
			
		||||
	return hash
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn new_hashmap() Hashmap {
 | 
			
		||||
	return Hashmap{
 | 
			
		||||
		info: &u16(calloc(sizeof(u16) * initial_size))
 | 
			
		||||
		key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size))
 | 
			
		||||
		cap: initial_cap
 | 
			
		||||
		size: 0
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn (h mut Hashmap) set(key string, value int) {
 | 
			
		||||
	// The load factor is 0.5.
 | 
			
		||||
	// It will be adjustable  in the future and with
 | 
			
		||||
	// a higher default settings to lower memory usage.
 | 
			
		||||
	if (h.size<<1) == (h.cap - 1) {
 | 
			
		||||
		h.rehash()
 | 
			
		||||
	}
 | 
			
		||||
	// Hash-function will be swapped for wyhash
 | 
			
		||||
	hash := fnv1a64(key)
 | 
			
		||||
	mut info := u16((hash>>56) | probe_offset)
 | 
			
		||||
	mut index := hash & h.cap
 | 
			
		||||
	// While probe count is less
 | 
			
		||||
	for info < h.info[index] {
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	// While we might have a match
 | 
			
		||||
	for info == h.info[index] {
 | 
			
		||||
		if key == h.key_values[index].key {
 | 
			
		||||
			h.key_values[index].value = value
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	// Match is not possible anymore.
 | 
			
		||||
	// Probe until an empty index is found.
 | 
			
		||||
	// Swap when probe count is higher/richer (Robin Hood).
 | 
			
		||||
	mut current_key := key
 | 
			
		||||
	mut current_value := value
 | 
			
		||||
	for h.info[index] != 0 {
 | 
			
		||||
		if info > h.info[index] {
 | 
			
		||||
			tmp_kv := h.key_values[index]
 | 
			
		||||
			tmp_info := h.info[index]
 | 
			
		||||
			h.key_values[index] = KeyValue{
 | 
			
		||||
				current_key,current_value}
 | 
			
		||||
			h.info[index] = info
 | 
			
		||||
			current_key = tmp_kv.key
 | 
			
		||||
			current_value = tmp_kv.value
 | 
			
		||||
			info = tmp_info
 | 
			
		||||
		}
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	// Should almost never happen
 | 
			
		||||
	if (info & 0xFF00) == 0xFF00 {
 | 
			
		||||
		h.rehash()
 | 
			
		||||
		h.set(current_key, current_value)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	h.info[index] = info
 | 
			
		||||
	h.key_values[index] = KeyValue{
 | 
			
		||||
		current_key,current_value}
 | 
			
		||||
	h.size++
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn (h mut Hashmap) rehash() {
 | 
			
		||||
	old_cap := h.cap
 | 
			
		||||
	h.cap = ((h.cap + 1)<<1) - 1
 | 
			
		||||
	mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1)))
 | 
			
		||||
	mut new_info := &u16(calloc(sizeof(u16) * (h.cap + 1)))
 | 
			
		||||
	for i in 0 .. (old_cap + 1) {
 | 
			
		||||
		if h.info[i] != 0 {
 | 
			
		||||
			key := h.key_values[i].key
 | 
			
		||||
			value := h.key_values[i].value
 | 
			
		||||
			hash := fnv1a64(key)
 | 
			
		||||
			mut info := u16((hash>>56) | probe_offset)
 | 
			
		||||
			mut index := hash & h.cap
 | 
			
		||||
			// While probe count is less
 | 
			
		||||
			for info < new_info[index] {
 | 
			
		||||
				index = (index + 1) & h.cap
 | 
			
		||||
				info += probe_offset
 | 
			
		||||
			}
 | 
			
		||||
			// While we might have a match
 | 
			
		||||
			for info == new_info[index] {
 | 
			
		||||
				if key == new_key_values[index].key {
 | 
			
		||||
					new_key_values[index].value = value
 | 
			
		||||
					return
 | 
			
		||||
				}
 | 
			
		||||
				index = (index + 1) & h.cap
 | 
			
		||||
				info += probe_offset
 | 
			
		||||
			}
 | 
			
		||||
			// Match is not possible anymore.
 | 
			
		||||
			// Probe until an empty index is found.
 | 
			
		||||
			// Swap when probe count is higher/richer (Robin Hood).
 | 
			
		||||
			mut current_key := key
 | 
			
		||||
			mut current_value := value
 | 
			
		||||
			for new_info[index] != 0 {
 | 
			
		||||
				if info > new_info[index] {
 | 
			
		||||
					tmp_kv := new_key_values[index]
 | 
			
		||||
					tmp_info := new_info[index]
 | 
			
		||||
					new_key_values[index] = KeyValue{
 | 
			
		||||
						current_key,current_value}
 | 
			
		||||
					new_info[index] = info
 | 
			
		||||
					current_key = tmp_kv.key
 | 
			
		||||
					current_value = tmp_kv.value
 | 
			
		||||
					info = tmp_info
 | 
			
		||||
				}
 | 
			
		||||
				index = (index + 1) & h.cap
 | 
			
		||||
				info += probe_offset
 | 
			
		||||
			}
 | 
			
		||||
			// Should almost never happen
 | 
			
		||||
			if (info & 0xFF00) == 0xFF00 {
 | 
			
		||||
				h.rehash()
 | 
			
		||||
				h.set(current_key, current_value)
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
			new_info[index] = info
 | 
			
		||||
			new_key_values[index] = KeyValue{
 | 
			
		||||
				current_key,current_value}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	h.key_values = new_key_values
 | 
			
		||||
	h.info = new_info
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn (h mut Hashmap) delete(key string) {
 | 
			
		||||
	hash := fnv1a64(key)
 | 
			
		||||
	mut index := hash & h.cap
 | 
			
		||||
	mut info := u16((hash>>56) | probe_offset)
 | 
			
		||||
	for info < h.info[index] {
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	// Perform backwards shifting
 | 
			
		||||
	for info == h.info[index] {
 | 
			
		||||
		if key == h.key_values[index].key {
 | 
			
		||||
			mut old_index := index
 | 
			
		||||
			index = (index + 1) & h.cap
 | 
			
		||||
			mut current_info := h.info[index]
 | 
			
		||||
			for (current_info>>8) > 1 {
 | 
			
		||||
				h.info[old_index] = current_info - probe_offset
 | 
			
		||||
				h.key_values[old_index] = h.key_values[index]
 | 
			
		||||
				old_index = index
 | 
			
		||||
				index = (index + 1) & h.cap
 | 
			
		||||
				current_info = h.info[index]
 | 
			
		||||
			}
 | 
			
		||||
			h.info[old_index] = 0
 | 
			
		||||
			h.size--
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn (h Hashmap) get(key string) int {
 | 
			
		||||
	hash := fnv1a64(key)
 | 
			
		||||
	mut index := hash & h.cap
 | 
			
		||||
	mut info := u16((hash>>56) | probe_offset)
 | 
			
		||||
	for info < h.info[index] {
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	for info == h.info[index] {
 | 
			
		||||
		if key == h.key_values[index].key {
 | 
			
		||||
			return h.key_values[index].value
 | 
			
		||||
		}
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn (h Hashmap) exists(key string) bool {
 | 
			
		||||
	hash := fnv1a64(key)
 | 
			
		||||
	mut index := hash & h.cap
 | 
			
		||||
	mut info := u16((hash>>56) | probe_offset)
 | 
			
		||||
	for info < h.info[index] {
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	for info == h.info[index] {
 | 
			
		||||
		if key == h.key_values[index].key {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
		index = (index + 1) & h.cap
 | 
			
		||||
		info += probe_offset
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn (h Hashmap) keys() []string {
 | 
			
		||||
	size := h.size
 | 
			
		||||
	mut keys := [''].repeat(size)
 | 
			
		||||
	mut j := 0
 | 
			
		||||
	for i in 0 .. (h.cap + 1) {
 | 
			
		||||
		if h.info[i] != 0 {
 | 
			
		||||
			keys[j] = h.key_values[i].key
 | 
			
		||||
			j++
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return keys
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@ module hashmap
 | 
			
		|||
import rand
 | 
			
		||||
 | 
			
		||||
fn test_random_strings() {
 | 
			
		||||
	mut m := new_hashmap(1000)
 | 
			
		||||
	mut m := new_hashmap()
 | 
			
		||||
	for i in 0..1000 {
 | 
			
		||||
		mut buf := []byte
 | 
			
		||||
		for j in 0..10 {
 | 
			
		||||
| 
						 | 
				
			
			@ -21,12 +21,11 @@ fn test_random_strings() {
 | 
			
		|||
 | 
			
		||||
fn test_large_hashmap() {
 | 
			
		||||
	N := 300 * 1000
 | 
			
		||||
	mut nums := new_hashmap(N)
 | 
			
		||||
	mut nums := new_hashmap()
 | 
			
		||||
	for i := 0; i < N; i++ {
 | 
			
		||||
	        key := i.str()
 | 
			
		||||
	        nums.set(key, i)
 | 
			
		||||
	}
 | 
			
		||||
	println('nr collisions: $nums.nr_collisions')
 | 
			
		||||
	for i := 0; i < N; i++ {
 | 
			
		||||
		key := i.str()
 | 
			
		||||
		assert nums.get(key) == i
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue