hashmap: new and fast hashmap with dynamic size
							parent
							
								
									219239eadc
								
							
						
					
					
						commit
						6fd175d9be
					
				|  | @ -2,113 +2,31 @@ | |||
| // Use of this source code is governed by an MIT license
 | ||||
| // that can be found in the LICENSE file.
 | ||||
| module hashmap | ||||
| /* | ||||
| 	This is work in progress. | ||||
| 	A very early test version of the Hashmap with a fixed size. | ||||
| 	Only works with string keys and int values for now. | ||||
| 
 | ||||
| 	I added this to improve performance of the V compiler, | ||||
| 	which uses lots of O(log n) map get's. Turned out with N < 10 000 | ||||
| 	the performance gains are basically non-existent. | ||||
| */ | ||||
| 
 | ||||
| 
 | ||||
| struct Hashmap { | ||||
| 	cap           int | ||||
| 	keys          []string | ||||
| 	table         []Hashmapentry | ||||
| 	elm_size      int | ||||
| pub mut: | ||||
| 	nr_collisions int | ||||
| } | ||||
| 
 | ||||
| struct Hashmapentry { | ||||
| mut: | ||||
| 	key  string | ||||
| 	val  int | ||||
| 	next &Hashmapentry // linked list for collisions
 | ||||
| } | ||||
| 
 | ||||
| const ( | ||||
| 	min_cap = 2<<10 | ||||
| 	max_cap = 2<<20 | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	initial_size = 2<<4 | ||||
| 	initial_cap = initial_size - 1 | ||||
| 	load_factor = 0.5 | ||||
| 	probe_offset = u16(256) | ||||
| 	fnv64_prime = 1099511628211 | ||||
| 	fnv64_offset_basis = 14695981039346656037 | ||||
| ) | ||||
| 
 | ||||
| const( | ||||
| 	fnv32_offset_basis = u32(2166136261) | ||||
| 	fnv32_prime = u32(16777619) | ||||
| ) | ||||
| 
 | ||||
| pub fn new_hashmap(planned_nr_items int) Hashmap { | ||||
| 	mut cap := planned_nr_items * 5 | ||||
| 	if cap < min_cap { | ||||
| 		cap = min_cap | ||||
| 	} | ||||
| 	if cap > max_cap { | ||||
| 		cap = max_cap | ||||
| 	} | ||||
| 	return Hashmap{ | ||||
| 		cap: cap | ||||
| 		elm_size: 4 | ||||
| 		table: make(cap, cap, sizeof(Hashmapentry)) | ||||
| 	} | ||||
| pub struct Hashmap { | ||||
| mut: | ||||
| 	info       &u16 | ||||
| 	key_values &KeyValue | ||||
| 	cap        int | ||||
| pub mut: | ||||
| 	size       int | ||||
| } | ||||
| 
 | ||||
| pub fn (m mut Hashmap) set(key string, val int) { | ||||
| 	// mut hash := int(b_fabs(key.hash()))
 | ||||
| 	// idx := hash % m.cap
 | ||||
| 	idx := int(fnv1a32(key) % m.cap) | ||||
| 	if m.table[idx].key.len != 0 { | ||||
| 		// println('\nset() idx=$idx key="$key" hash="$hash" val=$val')
 | ||||
| 		m.nr_collisions++ | ||||
| 		// println('collision:' + m.table[idx].key)
 | ||||
| 		mut e := &m.table[idx] | ||||
| 		for e.next != 0 { | ||||
| 			e = e.next | ||||
| 		} | ||||
| 		e.next = &Hashmapentry{ | ||||
| 			key,val,0} | ||||
| 	} | ||||
| 	else { | ||||
| 		m.table[idx] = Hashmapentry{ | ||||
| 			key,val,0} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| pub fn (m &Hashmap) get(key string) int { | ||||
| 	// mut hash := int(b_fabs(key.hash()))
 | ||||
| 	// idx := hash % m.cap
 | ||||
| 	idx := int(fnv1a32(key) % m.cap) | ||||
| 	mut e := &m.table[idx] | ||||
| 	for e.next != 0 { | ||||
| 		// todo unsafe {
 | ||||
| 		if e.key == key { | ||||
| 			return e.val | ||||
| 		} | ||||
| 		e = e.next | ||||
| 	} | ||||
| 	return e.val | ||||
| } | ||||
| 
 | ||||
| [inline] | ||||
| fn b_fabs(v int) f64 { | ||||
| 	return if v < 0 { -v } else { v } | ||||
| } | ||||
| 
 | ||||
| // inline functions here for speed
 | ||||
| // rather than full impl in vlib
 | ||||
| [inline] | ||||
| fn fnv1a32(data string) u32 { | ||||
|     mut hash := fnv32_offset_basis | ||||
|     for i := 0; i < data.len; i++ { | ||||
|         hash = (hash ^ u32(data[i])) * fnv32_prime | ||||
|     } | ||||
|     return hash | ||||
| struct KeyValue { | ||||
| 	key   string | ||||
| mut: | ||||
| 	value int | ||||
| } | ||||
| 
 | ||||
| [inline] | ||||
|  | @ -119,3 +37,207 @@ fn fnv1a64(data string) u64 { | |||
| 	} | ||||
| 	return hash | ||||
| } | ||||
| 
 | ||||
| pub fn new_hashmap() Hashmap { | ||||
| 	return Hashmap{ | ||||
| 		info: &u16(calloc(sizeof(u16) * initial_size)) | ||||
| 		key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size)) | ||||
| 		cap: initial_cap | ||||
| 		size: 0 | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| pub fn (h mut Hashmap) set(key string, value int) { | ||||
| 	// The load factor is 0.5.
 | ||||
| 	// It will be adjustable  in the future and with
 | ||||
| 	// a higher default settings to lower memory usage.
 | ||||
| 	if (h.size<<1) == (h.cap - 1) { | ||||
| 		h.rehash() | ||||
| 	} | ||||
| 	// Hash-function will be swapped for wyhash
 | ||||
| 	hash := fnv1a64(key) | ||||
| 	mut info := u16((hash>>56) | probe_offset) | ||||
| 	mut index := hash & h.cap | ||||
| 	// While probe count is less
 | ||||
| 	for info < h.info[index] { | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	// While we might have a match
 | ||||
| 	for info == h.info[index] { | ||||
| 		if key == h.key_values[index].key { | ||||
| 			h.key_values[index].value = value | ||||
| 			return | ||||
| 		} | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	// Match is not possible anymore.
 | ||||
| 	// Probe until an empty index is found.
 | ||||
| 	// Swap when probe count is higher/richer (Robin Hood).
 | ||||
| 	mut current_key := key | ||||
| 	mut current_value := value | ||||
| 	for h.info[index] != 0 { | ||||
| 		if info > h.info[index] { | ||||
| 			tmp_kv := h.key_values[index] | ||||
| 			tmp_info := h.info[index] | ||||
| 			h.key_values[index] = KeyValue{ | ||||
| 				current_key,current_value} | ||||
| 			h.info[index] = info | ||||
| 			current_key = tmp_kv.key | ||||
| 			current_value = tmp_kv.value | ||||
| 			info = tmp_info | ||||
| 		} | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	// Should almost never happen
 | ||||
| 	if (info & 0xFF00) == 0xFF00 { | ||||
| 		h.rehash() | ||||
| 		h.set(current_key, current_value) | ||||
| 		return | ||||
| 	} | ||||
| 	h.info[index] = info | ||||
| 	h.key_values[index] = KeyValue{ | ||||
| 		current_key,current_value} | ||||
| 	h.size++ | ||||
| } | ||||
| 
 | ||||
| fn (h mut Hashmap) rehash() { | ||||
| 	old_cap := h.cap | ||||
| 	h.cap = ((h.cap + 1)<<1) - 1 | ||||
| 	mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1))) | ||||
| 	mut new_info := &u16(calloc(sizeof(u16) * (h.cap + 1))) | ||||
| 	for i in 0 .. (old_cap + 1) { | ||||
| 		if h.info[i] != 0 { | ||||
| 			key := h.key_values[i].key | ||||
| 			value := h.key_values[i].value | ||||
| 			hash := fnv1a64(key) | ||||
| 			mut info := u16((hash>>56) | probe_offset) | ||||
| 			mut index := hash & h.cap | ||||
| 			// While probe count is less
 | ||||
| 			for info < new_info[index] { | ||||
| 				index = (index + 1) & h.cap | ||||
| 				info += probe_offset | ||||
| 			} | ||||
| 			// While we might have a match
 | ||||
| 			for info == new_info[index] { | ||||
| 				if key == new_key_values[index].key { | ||||
| 					new_key_values[index].value = value | ||||
| 					return | ||||
| 				} | ||||
| 				index = (index + 1) & h.cap | ||||
| 				info += probe_offset | ||||
| 			} | ||||
| 			// Match is not possible anymore.
 | ||||
| 			// Probe until an empty index is found.
 | ||||
| 			// Swap when probe count is higher/richer (Robin Hood).
 | ||||
| 			mut current_key := key | ||||
| 			mut current_value := value | ||||
| 			for new_info[index] != 0 { | ||||
| 				if info > new_info[index] { | ||||
| 					tmp_kv := new_key_values[index] | ||||
| 					tmp_info := new_info[index] | ||||
| 					new_key_values[index] = KeyValue{ | ||||
| 						current_key,current_value} | ||||
| 					new_info[index] = info | ||||
| 					current_key = tmp_kv.key | ||||
| 					current_value = tmp_kv.value | ||||
| 					info = tmp_info | ||||
| 				} | ||||
| 				index = (index + 1) & h.cap | ||||
| 				info += probe_offset | ||||
| 			} | ||||
| 			// Should almost never happen
 | ||||
| 			if (info & 0xFF00) == 0xFF00 { | ||||
| 				h.rehash() | ||||
| 				h.set(current_key, current_value) | ||||
| 				return | ||||
| 			} | ||||
| 			new_info[index] = info | ||||
| 			new_key_values[index] = KeyValue{ | ||||
| 				current_key,current_value} | ||||
| 		} | ||||
| 	} | ||||
| 	h.key_values = new_key_values | ||||
| 	h.info = new_info | ||||
| } | ||||
| 
 | ||||
| pub fn (h mut Hashmap) delete(key string) { | ||||
| 	hash := fnv1a64(key) | ||||
| 	mut index := hash & h.cap | ||||
| 	mut info := u16((hash>>56) | probe_offset) | ||||
| 	for info < h.info[index] { | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	// Perform backwards shifting
 | ||||
| 	for info == h.info[index] { | ||||
| 		if key == h.key_values[index].key { | ||||
| 			mut old_index := index | ||||
| 			index = (index + 1) & h.cap | ||||
| 			mut current_info := h.info[index] | ||||
| 			for (current_info>>8) > 1 { | ||||
| 				h.info[old_index] = current_info - probe_offset | ||||
| 				h.key_values[old_index] = h.key_values[index] | ||||
| 				old_index = index | ||||
| 				index = (index + 1) & h.cap | ||||
| 				current_info = h.info[index] | ||||
| 			} | ||||
| 			h.info[old_index] = 0 | ||||
| 			h.size-- | ||||
| 			return | ||||
| 		} | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| pub fn (h Hashmap) get(key string) int { | ||||
| 	hash := fnv1a64(key) | ||||
| 	mut index := hash & h.cap | ||||
| 	mut info := u16((hash>>56) | probe_offset) | ||||
| 	for info < h.info[index] { | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	for info == h.info[index] { | ||||
| 		if key == h.key_values[index].key { | ||||
| 			return h.key_values[index].value | ||||
| 		} | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	return 0 | ||||
| } | ||||
| 
 | ||||
| pub fn (h Hashmap) exists(key string) bool { | ||||
| 	hash := fnv1a64(key) | ||||
| 	mut index := hash & h.cap | ||||
| 	mut info := u16((hash>>56) | probe_offset) | ||||
| 	for info < h.info[index] { | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	for info == h.info[index] { | ||||
| 		if key == h.key_values[index].key { | ||||
| 			return true | ||||
| 		} | ||||
| 		index = (index + 1) & h.cap | ||||
| 		info += probe_offset | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
| 
 | ||||
| pub fn (h Hashmap) keys() []string { | ||||
| 	size := h.size | ||||
| 	mut keys := [''].repeat(size) | ||||
| 	mut j := 0 | ||||
| 	for i in 0 .. (h.cap + 1) { | ||||
| 		if h.info[i] != 0 { | ||||
| 			keys[j] = h.key_values[i].key | ||||
| 			j++ | ||||
| 		} | ||||
| 	} | ||||
| 	return keys | ||||
| } | ||||
|  |  | |||
|  | @ -3,7 +3,7 @@ module hashmap | |||
| import rand | ||||
| 
 | ||||
| fn test_random_strings() { | ||||
| 	mut m := new_hashmap(1000) | ||||
| 	mut m := new_hashmap() | ||||
| 	for i in 0..1000 { | ||||
| 		mut buf := []byte | ||||
| 		for j in 0..10 { | ||||
|  | @ -21,12 +21,11 @@ fn test_random_strings() { | |||
| 
 | ||||
| fn test_large_hashmap() { | ||||
| 	N := 300 * 1000 | ||||
| 	mut nums := new_hashmap(N) | ||||
| 	mut nums := new_hashmap() | ||||
| 	for i := 0; i < N; i++ { | ||||
| 	        key := i.str() | ||||
| 	        nums.set(key, i) | ||||
| 	} | ||||
| 	println('nr collisions: $nums.nr_collisions') | ||||
| 	for i := 0; i < N; i++ { | ||||
| 		key := i.str() | ||||
| 		assert nums.get(key) == i | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue