diff --git a/cmd/tools/bench/hashmap.v b/cmd/tools/bench/hashmap.v deleted file mode 100644 index ba9070aa6e..0000000000 --- a/cmd/tools/bench/hashmap.v +++ /dev/null @@ -1,112 +0,0 @@ -import rand -import time -import builtin.hashmap - -fn hashmap_set_bench(arr []string, repeat int) { - start_time := time.ticks() - for _ in 0..repeat { - mut b := hashmap.new_hashmap() - for x in arr { - b.set(x, 1) - } - } - end_time := time.ticks() - start_time - println("* hashmap_set: ${end_time} ms") -} - -fn map_set_bench(arr []string, repeat int) { - start_time := time.ticks() - for _ in 0..repeat { - mut b := map[string]int - for x in arr { - b[x] = 1 - } - } - end_time := time.ticks() - start_time - println("* map_set: ${end_time} ms") -} - -fn hashmap_get_bench(arr []string, repeat int) { - mut b := hashmap.new_hashmap() - for x in arr { - b.set(x, 1) - } - start_time := time.ticks() - for _ in 0..repeat { - for x in arr { - b.get(x) - } - } - end_time := time.ticks() - start_time - println("* hashmap_get: ${end_time} ms") -} - -fn map_get_bench(arr []string, repeat int) { - mut b := map[string]int - for x in arr { - b[x] = 1 - } - start_time := time.ticks() - for _ in 0..repeat { - for x in arr { - b[x] - } - } - end_time := time.ticks() - start_time - println("* map_get: ${end_time} ms") -} - -fn benchmark_many_keys() { - key_len := 30 - repeat := 1 - for i := 2048; i <= 10000000; i = i * 2 { - mut arr := []string - for _ in 0..i { - mut buf := []byte - for j in 0..key_len { - buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) - } - s := string(buf) - arr << s - } - println("$arr.len keys of length $key_len") - // Uncomment the benchmark you would like to benchmark - // Run one or two at a time while memory leaks is a thing - hashmap_get_bench(arr, repeat) - map_get_bench(arr, repeat) - // hashmap_set_bench(arr, repeat) - // map_set_bench(arr, repeat) - println('') - } -} - -fn benchmark_few_keys() { - key_len := 30 - repeat := 10000 - println("Benchmarks are repeated $repeat times") - for i := 16; i <= 2048; i = i * 2 { - mut arr := []string - for _ in 0..i { - mut buf := []byte - for j in 0..key_len { - buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) - } - s := string(buf) - arr << s - } - println("$arr.len keys of length $key_len") - // Uncomment the benchmark you would like to benchmark - // Run one or two at a time while memory leaks is a thing - hashmap_get_bench(arr, repeat) - map_get_bench(arr, repeat) - // hashmap_set_bench(arr, repeat) - // map_set_bench(arr, repeat) - println('') - } -} - -fn main() { - // Uncomment below to benchmark on many keys - // benchmark_many_keys() - benchmark_few_keys() -} \ No newline at end of file diff --git a/vlib/builtin/hashmap/hashmap.v b/vlib/builtin/hashmap/hashmap.v deleted file mode 100644 index 178e6f92a9..0000000000 --- a/vlib/builtin/hashmap/hashmap.v +++ /dev/null @@ -1,243 +0,0 @@ -// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved. -// Use of this source code is governed by an MIT license -// that can be found in the LICENSE file. -module hashmap - -import hash.wyhash - -const ( - log_size = 5 - n_hashbits = 24 - window_size = 16 - initial_size = 1 << log_size - initial_cap = initial_size - 1 - default_load_factor = 0.8 - hashbit_mask = u32(0xFFFFFF) - probe_offset = u32(0x1000000) - max_probe = u32(0xFF000000) -) - -pub struct Hashmap { -mut: - cap u32 - shift byte - window byte - info &u32 - key_values &KeyValue -pub mut: - load_factor f32 - size int -} - -struct KeyValue { - key string -mut: - value int -} - -pub fn new_hashmap() Hashmap { - return Hashmap{ - cap: initial_cap - shift: log_size - window: window_size - info: &u32(calloc(sizeof(u32) * initial_size)) - key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size)) - load_factor: default_load_factor - size: 0 - } -} - -pub fn (h mut Hashmap) set(key string, value int) { - // load_factor can be adjusted. - if (f32(h.size) / f32(h.cap)) > h.load_factor { - h.rehash() - } - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) - mut index := hash & h.cap - // While probe count is less - for info < h.info[index] { - index = (index + 1) & h.cap - info += probe_offset - } - // While we might have a match - for info == h.info[index] { - if key == h.key_values[index].key { - h.key_values[index].value = value - return - } - index = (index + 1) & h.cap - info += probe_offset - } - // Match is not possible anymore. - // Probe until an empty index is found. - // Swap when probe count is higher/richer (Robin Hood). - mut current_kv := KeyValue{key, value} - for h.info[index] != 0 { - if info > h.info[index] { - // Swap info word - tmp_info := h.info[index] - h.info[index] = info - info = tmp_info - // Swap KeyValue - tmp_kv := h.key_values[index] - h.key_values[index] = current_kv - current_kv = tmp_kv - } - index = (index + 1) & h.cap - info += probe_offset - } - // Should almost never happen - if (info & max_probe) == max_probe { - h.rehash() - h.set(current_kv.key, current_kv.value) - return - } - h.info[index] = info - h.key_values[index] = current_kv - h.size++ -} - -fn (h mut Hashmap) rehash() { - old_cap := h.cap - h.window-- - // check if any hashbits are left - if h.window == 0 { - h.shift += window_size - } - // double the size of the hashmap - h.cap = ((h.cap + 1) << 1) - 1 - mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1))) - mut new_info := &u32(calloc(sizeof(u32) * (h.cap + 1))) - for i in 0 .. (old_cap + 1) { - if h.info[i] != 0 { - mut kv := h.key_values[i] - mut hash := u64(0) - mut info := u32(0) - if h.window == 0 { - hash = wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0) - info = u32(((hash >> h.shift) & hashbit_mask) | probe_offset) - } - else { - original := u64(i - ((h.info[i] >> n_hashbits) - 1)) & (h.cap >> 1) - hash = original | (h.info[i] << h.shift) - info = (h.info[i] & hashbit_mask) | probe_offset - } - mut index := hash & h.cap - // While probe count is less - for info < new_info[index] { - index = (index + 1) & h.cap - info += probe_offset - } - // Probe until an empty index is found. - // Swap when probe count is higher/richer (Robin Hood). - for new_info[index] != 0 { - if info > new_info[index] { - // Swap info word - tmp_info := new_info[index] - new_info[index] = info - info = tmp_info - // Swap KeyValue - tmp_kv := new_key_values[index] - new_key_values[index] = kv - kv = tmp_kv - } - index = (index + 1) & h.cap - info += probe_offset - } - // Should almost never happen - if (info & max_probe) == max_probe { - h.rehash() - h.set(kv.key, kv.value) - return - } - new_info[index] = info - new_key_values[index] = kv - } - } - if h.window == 0 { - h.window = window_size - } - free(h.key_values) - free(h.info) - h.key_values = new_key_values - h.info = new_info -} - -pub fn (h mut Hashmap) delete(key string) { - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut index := hash & h.cap - mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) - for info < h.info[index] { - index = (index + 1) & h.cap - info += probe_offset - } - // Perform backwards shifting - for info == h.info[index] { - if key == h.key_values[index].key { - mut old_index := index - index = (index + 1) & h.cap - mut current_info := h.info[index] - for (current_info >> n_hashbits) > 1 { - h.info[old_index] = current_info - probe_offset - h.key_values[old_index] = h.key_values[index] - old_index = index - index = (index + 1) & h.cap - current_info = h.info[index] - } - h.info[old_index] = 0 - h.size-- - return - } - index = (index + 1) & h.cap - info += probe_offset - } -} - -pub fn (h Hashmap) get(key string) int { - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut index := hash & h.cap - mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) - for info < h.info[index] { - index = (index + 1) & h.cap - info += probe_offset - } - for info == h.info[index] { - if key == h.key_values[index].key { - return h.key_values[index].value - } - index = (index + 1) & h.cap - info += probe_offset - } - return 0 -} - -pub fn (h Hashmap) exists(key string) bool { - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut index := hash & h.cap - mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) - for info < h.info[index] { - index = (index + 1) & h.cap - info += probe_offset - } - for info == h.info[index] { - if key == h.key_values[index].key { - return true - } - index = (index + 1) & h.cap - info += probe_offset - } - return false -} - -pub fn (h Hashmap) keys() []string { - mut keys := [''].repeat(h.size) - mut j := 0 - for i in 0 .. (h.cap + 1) { - if h.info[i] != 0 { - keys[j] = h.key_values[i].key - j++ - } - } - return keys -} diff --git a/vlib/builtin/hashmap/hashmap_test.v b/vlib/builtin/hashmap/hashmap_test.v deleted file mode 100644 index cb0de6de86..0000000000 --- a/vlib/builtin/hashmap/hashmap_test.v +++ /dev/null @@ -1,33 +0,0 @@ -module hashmap - -import rand - -fn test_random_strings() { - mut m := new_hashmap() - for i in 0..1000 { - mut buf := []byte - for j in 0..10 { - buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) - } - s := string(buf) - //println(s) - m.set(s, i) - assert m.get(s) == i - } - m.set('foo', 12) - val := m.get('foo') - assert val == 12 -} - -fn test_large_hashmap() { - N := 300 * 1000 - mut nums := new_hashmap() - for i := 0; i < N; i++ { - key := i.str() - nums.set(key, i) - } - for i := 0; i < N; i++ { - key := i.str() - assert nums.get(key) == i - } -} diff --git a/vlib/builtin/map.v b/vlib/builtin/map.v index e50ca0f982..e36364d193 100644 --- a/vlib/builtin/map.v +++ b/vlib/builtin/map.v @@ -4,44 +4,73 @@ module builtin -import strings - -// B-trees are balanced search trees with all leaves at -// the same level. B-trees are generally faster than -// binary search trees due to the better locality of -// reference, since multiple keys are stored in one node. - -// The number for `degree` has been picked through vigor- -// ous benchmarking but can be changed to any number > 1. -// `degree` determines the size of each node. +import ( + strings + hash.wyhash +) const ( - degree = 6 - mid_index = degree - 1 - max_size = 2 * degree - 1 - children_bytes = sizeof(voidptr) * (max_size + 1) + // Number of bits from the hash stored for each entry + hashbits = 24 + // Number of bits from the hash stored for rehasing + cached_hashbits = 16 + // Initial log-number of buckets in the hashtable + init_log_capicity = 5 + // Initial number of buckets in the hashtable + init_capicity = 1< m.load_factor { + m.expand() } + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) + mut index := hash & m.range_cap + // While probe count is less + for probe_hash < m.probe_hash[index] { + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + // While we might have a match + for probe_hash == m.probe_hash[index] { + if key == m.key_values[index].key { + C.memcpy(m.key_values[index].value, value, m.value_bytes) + return + } + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + // Match is not possible anymore. + // Probe until an empty index is found. + // Swap when probe count is higher/richer (Robin Hood). + mut current_kv := KeyValue{key, malloc(m.value_bytes)} + C.memcpy(current_kv.value, value, m.value_bytes) + for m.probe_hash[index] != 0 { + if probe_hash > m.probe_hash[index] { + // Swap probe_hash + tmp_probe_hash := m.probe_hash[index] + m.probe_hash[index] = probe_hash + probe_hash = tmp_probe_hash + // Swap KeyValue + tmp_kv := m.key_values[index] + m.key_values[index] = current_kv + current_kv = tmp_kv + } + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + // Should almost never happen + if (probe_hash & max_probe) == max_probe { + m.expand() + m.set(current_kv.key, current_kv.value) + return + } + m.probe_hash[index] = probe_hash + m.key_values[index] = current_kv + m.size++ } -// This implementation does proactive insertion, meaning -// that splits are done top-down and not bottom-up. -fn (m mut map) set(key string, value voidptr) { - mut node := m.root - mut child_index := 0 - mut parent := &mapnode(0) - for { +fn (m mut map) expand() { + old_range_cap := m.range_cap + // double the size of the hashmap + m.range_cap = ((m.range_cap + 1)<<1) - 1 + // check if no hashbits are left + if m.window == 0 { + m.shift += cached_hashbits + m.rehash(old_range_cap) + m.window = cached_hashbits + } + else { + m.cached_rehash(old_range_cap) + } + m.window-- +} - if node.size == max_size { - if isnil(parent) { - parent = new_node() - m.root = parent +fn (m mut map) rehash(old_range_cap u32) { + probe_hash_bytes := sizeof(u32) * (m.range_cap + 1) + key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1) + memory := calloc(probe_hash_bytes + key_value_bytes) + mut new_key_values := &KeyValue(memory) + mut new_probe_hash := &u32(memory + key_value_bytes) + for i in 0 .. (old_range_cap + 1) { + if m.probe_hash[i] != 0 { + mut kv := m.key_values[i] + hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0) + mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) + mut index := hash & m.range_cap + // While probe count is less + for probe_hash < new_probe_hash[index] { + index = (index + 1) & m.range_cap + probe_hash += probe_inc } - parent.split_child(child_index, mut node) - if key == parent.keys[child_index] { - C.memcpy(parent.values[child_index], value, m.value_bytes) + // Probe until an empty index is found. + // Swap when probe count is higher/richer (Robin Hood). + for new_probe_hash[index] != 0 { + if probe_hash > new_probe_hash[index] { + // Swap probe_hash + tmp_probe_hash := new_probe_hash[index] + new_probe_hash[index] = probe_hash + probe_hash = tmp_probe_hash + // Swap KeyValue + tmp_kv := new_key_values[index] + new_key_values[index] = kv + kv = tmp_kv + } + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + // Should almost never happen + if (probe_hash & max_probe) == max_probe { + m.expand() + m.set(kv.key, kv.value) return } - node = if key < parent.keys[child_index] { - &mapnode(parent.children[child_index]) - } else { - &mapnode(parent.children[child_index + 1]) - } + new_probe_hash[index] = probe_hash + new_key_values[index] = kv } - mut i := 0 - for i < node.size && key > node.keys[i] { i++ } - if i != node.size && key == node.keys[i] { - C.memcpy(node.values[i], value, m.value_bytes) - return - } - if isnil(node.children) { - mut j := node.size - 1 - for j >= 0 && key < node.keys[j] { - node.keys[j + 1] = node.keys[j] - node.values[j + 1] = node.values[j] - j-- - } - node.keys[j + 1] = key - node.values[j + 1] = malloc(m.value_bytes) - C.memcpy(node.values[j + 1], value, m.value_bytes) - node.size++ - m.size++ - return - } - parent = node - child_index = i - node = &mapnode(node.children[child_index]) } + unsafe{ + free(m.key_values) + } + m.key_values = new_key_values + m.probe_hash = new_probe_hash } -fn (n mut mapnode) split_child(child_index int, y mut mapnode) { - mut z := new_node() - z.size = mid_index - y.size = mid_index - for j := mid_index - 1; j >= 0; j-- { - z.keys[j] = y.keys[j + degree] - z.values[j] = y.values[j + degree] - } - if !isnil(y.children) { - z.children = &voidptr(malloc(children_bytes)) - for jj := degree - 1; jj >= 0; jj-- { - z.children[jj] = y.children[jj + degree] +fn (m mut map) cached_rehash(old_range_cap u32) { + probe_hash_bytes := sizeof(u32) * (m.range_cap + 1) + key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1) + memory := calloc(probe_hash_bytes + key_value_bytes) + mut new_probe_hash := &u32(memory + key_value_bytes) + mut new_key_values := &KeyValue(memory) + for i in 0 .. (old_range_cap + 1) { + if m.probe_hash[i] != 0 { + mut kv := m.key_values[i] + mut probe_hash := m.probe_hash[i] + original := u64(i - ((probe_hash>>hashbits) - 1)) & (m.range_cap>>1) + hash := original | (probe_hash< new_probe_hash[index] { + // Swap probe_hash + tmp_probe_hash := new_probe_hash[index] + new_probe_hash[index] = probe_hash + probe_hash = tmp_probe_hash + // Swap KeyValue + tmp_kv := new_key_values[index] + new_key_values[index] = kv + kv = tmp_kv + } + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + // Should almost never happen + if (probe_hash & max_probe) == max_probe { + m.expand() + m.set(kv.key, kv.value) + return + } + new_probe_hash[index] = probe_hash + new_key_values[index] = kv } } - if isnil(n.children) { - n.children = &voidptr(malloc(children_bytes)) + unsafe{ + free(m.key_values) } - n.children[n.size + 1] = n.children[n.size] - for j := n.size; j > child_index; j-- { - n.keys[j] = n.keys[j - 1] - n.values[j] = n.values[j - 1] - n.children[j] = n.children[j - 1] + m.key_values = new_key_values + m.probe_hash = new_probe_hash +} + +pub fn (m mut map) delete(key string) { + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut index := hash & m.range_cap + mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) + for probe_hash < m.probe_hash[index] { + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + // Perform backwards shifting + for probe_hash == m.probe_hash[index] { + if key == m.key_values[index].key { + mut old_index := index + index = (index + 1) & m.range_cap + mut current_probe_hash := m.probe_hash[index] + for (current_probe_hash>>hashbits) > 1 { + m.probe_hash[old_index] = current_probe_hash - probe_inc + m.key_values[old_index] = m.key_values[index] + old_index = index + index = (index + 1) & m.range_cap + current_probe_hash = m.probe_hash[index] + } + m.probe_hash[old_index] = 0 + m.size-- + return + } + index = (index + 1) & m.range_cap + probe_hash += probe_inc } - n.keys[child_index] = y.keys[mid_index] - n.values[child_index] = y.values[mid_index] - n.children[child_index] = voidptr(y) - n.children[child_index + 1] = voidptr(z) - n.size++ } fn (m map) get(key string, out voidptr) bool { - mut node := m.root - for { - mut i := node.size - 1 - for i >= 0 && key < node.keys[i] { - i-- - } - if i != -1 && key == node.keys[i] { - C.memcpy(out, node.values[i], m.value_bytes) + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut index := hash & m.range_cap + mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) + for probe_hash < m.probe_hash[index] { + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + for probe_hash == m.probe_hash[index] { + if key == m.key_values[index].key { + C.memcpy(out, m.key_values[index].value, m.value_bytes) return true } - if isnil(node.children) { - break - } - node = &mapnode(node.children[i + 1]) + index = (index + 1) & m.range_cap + probe_hash += probe_inc } return false } fn (m map) exists(key string) bool { - if isnil(m.root) { // TODO: find out why root can be nil + if m.value_bytes == 0 { return false } - mut node := m.root - for { - mut i := node.size - 1 - for i >= 0 && key < node.keys[i] { i-- } - if i != -1 && key == node.keys[i] { + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut index := hash & m.range_cap + mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) + for probe_hash < m.probe_hash[index] { + index = (index + 1) & m.range_cap + probe_hash += probe_inc + } + for probe_hash == m.probe_hash[index] { + if key == m.key_values[index].key { return true } - if isnil(node.children) { - break - } - node = &mapnode(node.children[i + 1]) + index = (index + 1) & m.range_cap + probe_hash += probe_inc } return false } -fn (n mapnode) find_key(k string) int { - mut idx := 0 - for idx < n.size && n.keys[idx] < k { - idx++ - } - return idx -} - -fn (n mut mapnode) remove_key(k string) bool { - idx := n.find_key(k) - if idx < n.size && n.keys[idx] == k { - if isnil(n.children) { - n.remove_from_leaf(idx) - } else { - n.remove_from_non_leaf(idx) - } - return true - } else { - if isnil(n.children) { - return false - } - flag := if idx == n.size {true} else {false} - if (&mapnode(n.children[idx])).size < degree { - n.fill(idx) - } - - if flag && idx > n.size { - return (&mapnode(n.children[idx - 1])).remove_key(k) - } else { - return (&mapnode(n.children[idx])).remove_key(k) - } - } -} - -fn (n mut mapnode) remove_from_leaf(idx int) { - for i := idx + 1; i < n.size; i++ { - n.keys[i - 1] = n.keys[i] - n.values[i - 1] = n.values[i] - } - n.size-- -} - -fn (n mut mapnode) remove_from_non_leaf(idx int) { - k := n.keys[idx] - if &mapnode(n.children[idx]).size >= degree { - mut current := &mapnode(n.children[idx]) - for !isnil(current.children) { - current = &mapnode(current.children[current.size]) - } - predecessor := current.keys[current.size - 1] - n.keys[idx] = predecessor - n.values[idx] = current.values[current.size - 1] - (&mapnode(n.children[idx])).remove_key(predecessor) - } else if &mapnode(n.children[idx + 1]).size >= degree { - mut current := &mapnode(n.children[idx + 1]) - for !isnil(current.children) { - current = &mapnode(current.children[0]) - } - successor := current.keys[0] - n.keys[idx] = successor - n.values[idx] = current.values[0] - (&mapnode(n.children[idx + 1])).remove_key(successor) - } else { - n.merge(idx) - (&mapnode(n.children[idx])).remove_key(k) - } -} - -fn (n mut mapnode) fill(idx int) { - if idx != 0 && &mapnode(n.children[idx - 1]).size >= degree { - n.borrow_from_prev(idx) - } else if idx != n.size && &mapnode(n.children[idx + 1]).size >= degree { - n.borrow_from_next(idx) - } else if idx != n.size { - n.merge(idx) - } else { - n.merge(idx - 1) - } -} - -fn (n mut mapnode) borrow_from_prev(idx int) { - mut child := &mapnode(n.children[idx]) - mut sibling := &mapnode(n.children[idx - 1]) - for i := child.size - 1; i >= 0; i-- { - child.keys[i + 1] = child.keys[i] - child.values[i + 1] = child.values[i] - } - if !isnil(child.children) { - for i := child.size; i >= 0; i-- { - child.children[i + 1] = child.children[i] - } - } - child.keys[0] = n.keys[idx - 1] - child.values[0] = n.values[idx - 1] - if !isnil(child.children) { - child.children[0] = sibling.children[sibling.size] - } - n.keys[idx - 1] = sibling.keys[sibling.size - 1] - n.values[idx - 1] = sibling.values[sibling.size - 1] - child.size++ - sibling.size-- -} - -fn (n mut mapnode) borrow_from_next(idx int) { - mut child := &mapnode(n.children[idx]) - mut sibling := &mapnode(n.children[idx + 1]) - child.keys[child.size] = n.keys[idx] - child.values[child.size] = n.values[idx] - if !isnil(child.children) { - child.children[child.size + 1] = sibling.children[0] - } - n.keys[idx] = sibling.keys[0] - n.values[idx] = sibling.values[0] - for i := 1; i < sibling.size; i++ { - sibling.keys[i - 1] = sibling.keys[i] - sibling.values[i - 1] = sibling.values[i] - } - if !isnil(sibling.children) { - for i := 1; i <= sibling.size; i++ { - sibling.children[i - 1] = sibling.children[i] - } - } - child.size++ - sibling.size-- -} - -fn (n mut mapnode) merge(idx int) { - mut child := &mapnode(n.children[idx]) - sibling := &mapnode(n.children[idx + 1]) - child.keys[mid_index] = n.keys[idx] - child.values[mid_index] = n.values[idx] - for i := 0; i < sibling.size; i++ { - child.keys[i + degree] = sibling.keys[i] - child.values[i + degree] = sibling.values[i] - } - if !isnil(child.children) { - for i := 0; i <= sibling.size; i++ { - child.children[i + degree] = sibling.children[i] - } - } - for i := idx + 1; i < n.size; i++ { - n.keys[i - 1] = n.keys[i] - n.values[i - 1] = n.values[i] - } - for i := idx + 2; i <= n.size; i++ { - n.children[i - 1] = n.children[i] - } - child.size += sibling.size + 1 - n.size-- - // free(sibling) -} - -pub fn (m mut map) delete(key string) { - if m.root.size == 0 { - return - } - - removed := m.root.remove_key(key) - if removed { - m.size-- - } - - if m.root.size == 0 { - // tmp := t.root - if isnil(m.root.children) { - return - } else { - m.root = &mapnode(m.root.children[0]) - } - // free(tmp) - } -} - -// Insert all keys of the subtree into array `keys` -// starting at `at`. Keys are inserted in order. -fn (n mapnode) subkeys(keys mut []string, at int) int { - mut position := at - if !isnil(n.children) { - // Traverse children and insert - // keys inbetween children - for i in 0..n.size { - child := &mapnode(n.children[i]) - position += child.subkeys(mut keys, position) - keys[position] = n.keys[i] - position++ - } - // Insert the keys of the last child - child := &mapnode(n.children[n.size]) - position += child.subkeys(mut keys, position) - } else { - // If leaf, insert keys - for i in 0..n.size { - keys[position + i] = n.keys[i] - } - position += n.size - } - // Return # of added keys - return position - at -} - pub fn (m &map) keys() []string { mut keys := [''].repeat(m.size) - if isnil(m.root) || m.root.size == 0 { + if m.value_bytes == 0 { return keys } - m.root.subkeys(mut keys, 0) + mut j := 0 + for i in 0 .. (m.range_cap + 1) { + if m.probe_hash[i] != 0 { + keys[j] = m.key_values[i].key + j++ + } + } return keys } -fn (n mut mapnode) free() { - mut i := 0 - if isnil(n.children) { - i = 0 - for i < n.size { - i++ - } - } else { - i = 0 - for i < n.size { - &mapnode(n.children[i]).free() - i++ - } - &mapnode(n.children[i]).free() +pub fn (m mut map) set_load_factor(new_load_factor f32) { + if new_load_factor > 1.0 { + m.load_factor = 1.0 + } + else if new_load_factor < 0.1 { + m.load_factor = 0.1 + } + else { + m.load_factor = new_load_factor } - // free(n) } pub fn (m mut map) free() { - if isnil(m.root) { - return + unsafe{ + free(m.key_values) } - m.root.free() } pub fn (m map) print() { - println('<<<<<<<<') - //for i := 0; i < m.entries.len; i++ { - // entry := m.entries[i] - // println('$entry.key => $entry.val') - //} - /* - for i := 0; i < m.cap * m.value_bytes; i++ { - b := m.table[i] - print('$i: ') - C.printf('%02x', b) - println('') - } -*/ - println('>>>>>>>>>>') + println('TODO') } pub fn (m map_string) str() string { @@ -444,4 +371,4 @@ pub fn (m map_string) str() string { } sb.writeln('}') return sb.str() -} +} \ No newline at end of file diff --git a/vlib/builtin/sorted_map.v b/vlib/builtin/sorted_map.v new file mode 100644 index 0000000000..d2e9fad020 --- /dev/null +++ b/vlib/builtin/sorted_map.v @@ -0,0 +1,416 @@ +// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +module builtin + +// import strings + +// B-trees are balanced search trees with all leaves at +// the same level. B-trees are generally faster than +// binary search trees due to the better locality of +// reference, since multiple keys are stored in one node. + +// The number for `degree` has been picked through vigor- +// ous benchmarking but can be changed to any number > 1. +// `degree` determines the size of each node. +const ( + degree = 6 + mid_index = degree - 1 + max_size = 2 * degree - 1 + children_bytes = sizeof(voidptr) * (max_size + 1) +) + +pub struct SortedMap { + value_bytes int +mut: + root &mapnode +pub mut: + size int +} + +struct mapnode { +mut: + keys [11]string // TODO: Should use `max_size` + values [11]voidptr // TODO: Should use `max_size` + children &voidptr + size int +} + +fn new_sorted_map(n, value_bytes int) SortedMap { // TODO: Remove `n` + return SortedMap { + value_bytes: value_bytes + root: new_node() + size: 0 + } +} + +fn new_sorted_map_init(n, value_bytes int, keys &string, values voidptr) SortedMap { + mut out := new_sorted_map(n, value_bytes) + for i in 0 .. n { + out.set(keys[i], values + i * value_bytes) + } + return out +} + +// The tree is initialized with an empty node as root to +// avoid having to check whether the root is null for +// each insertion. +fn new_node() &mapnode { + return &mapnode { + children: 0 + size: 0 + } +} + +// This implementation does proactive insertion, meaning +// that splits are done top-down and not bottom-up. +fn (m mut SortedMap) set(key string, value voidptr) { + mut node := m.root + mut child_index := 0 + mut parent := &mapnode(0) + for { + if node.size == max_size { + if isnil(parent) { + parent = new_node() + m.root = parent + } + parent.split_child(child_index, mut node) + if key == parent.keys[child_index] { + C.memcpy(parent.values[child_index], value, m.value_bytes) + return + } + node = if key < parent.keys[child_index] { + &mapnode(parent.children[child_index]) + } else { + &mapnode(parent.children[child_index + 1]) + } + } + mut i := 0 + for i < node.size && key > node.keys[i] { i++ } + if i != node.size && key == node.keys[i] { + C.memcpy(node.values[i], value, m.value_bytes) + return + } + if isnil(node.children) { + mut j := node.size - 1 + for j >= 0 && key < node.keys[j] { + node.keys[j + 1] = node.keys[j] + node.values[j + 1] = node.values[j] + j-- + } + node.keys[j + 1] = key + node.values[j + 1] = malloc(m.value_bytes) + C.memcpy(node.values[j + 1], value, m.value_bytes) + node.size++ + m.size++ + return + } + parent = node + child_index = i + node = &mapnode(node.children[child_index]) + } +} + +fn (n mut mapnode) split_child(child_index int, y mut mapnode) { + mut z := new_node() + z.size = mid_index + y.size = mid_index + for j := mid_index - 1; j >= 0; j-- { + z.keys[j] = y.keys[j + degree] + z.values[j] = y.values[j + degree] + } + if !isnil(y.children) { + z.children = &voidptr(malloc(children_bytes)) + for jj := degree - 1; jj >= 0; jj-- { + z.children[jj] = y.children[jj + degree] + } + } + if isnil(n.children) { + n.children = &voidptr(malloc(children_bytes)) + } + n.children[n.size + 1] = n.children[n.size] + for j := n.size; j > child_index; j-- { + n.keys[j] = n.keys[j - 1] + n.values[j] = n.values[j - 1] + n.children[j] = n.children[j - 1] + } + n.keys[child_index] = y.keys[mid_index] + n.values[child_index] = y.values[mid_index] + n.children[child_index] = voidptr(y) + n.children[child_index + 1] = voidptr(z) + n.size++ +} + +fn (m SortedMap) get(key string, out voidptr) bool { + mut node := m.root + for { + mut i := node.size - 1 + for i >= 0 && key < node.keys[i] { i-- } + if i != -1 && key == node.keys[i] { + C.memcpy(out, node.values[i], m.value_bytes) + return true + } + if isnil(node.children) { + break + } + node = &mapnode(node.children[i + 1]) + } + return false +} + +fn (m SortedMap) exists(key string) bool { + if isnil(m.root) { // TODO: find out why root can be nil + return false + } + mut node := m.root + for { + mut i := node.size - 1 + for i >= 0 && key < node.keys[i] { i-- } + if i != -1 && key == node.keys[i] { + return true + } + if isnil(node.children) { + break + } + node = &mapnode(node.children[i + 1]) + } + return false +} + +fn (n mapnode) find_key(k string) int { + mut idx := 0 + for idx < n.size && n.keys[idx] < k { + idx++ + } + return idx +} + +fn (n mut mapnode) remove_key(k string) bool { + idx := n.find_key(k) + if idx < n.size && n.keys[idx] == k { + if isnil(n.children) { + n.remove_from_leaf(idx) + } else { + n.remove_from_non_leaf(idx) + } + return true + } else { + if isnil(n.children) { + return false + } + flag := if idx == n.size {true} else {false} + if (&mapnode(n.children[idx])).size < degree { + n.fill(idx) + } + + if flag && idx > n.size { + return (&mapnode(n.children[idx - 1])).remove_key(k) + } else { + return (&mapnode(n.children[idx])).remove_key(k) + } + } +} + +fn (n mut mapnode) remove_from_leaf(idx int) { + for i := idx + 1; i < n.size; i++ { + n.keys[i - 1] = n.keys[i] + n.values[i - 1] = n.values[i] + } + n.size-- +} + +fn (n mut mapnode) remove_from_non_leaf(idx int) { + k := n.keys[idx] + if &mapnode(n.children[idx]).size >= degree { + mut current := &mapnode(n.children[idx]) + for !isnil(current.children) { + current = &mapnode(current.children[current.size]) + } + predecessor := current.keys[current.size - 1] + n.keys[idx] = predecessor + n.values[idx] = current.values[current.size - 1] + (&mapnode(n.children[idx])).remove_key(predecessor) + } else if &mapnode(n.children[idx + 1]).size >= degree { + mut current := &mapnode(n.children[idx + 1]) + for !isnil(current.children) { + current = &mapnode(current.children[0]) + } + successor := current.keys[0] + n.keys[idx] = successor + n.values[idx] = current.values[0] + (&mapnode(n.children[idx + 1])).remove_key(successor) + } else { + n.merge(idx) + (&mapnode(n.children[idx])).remove_key(k) + } +} + +fn (n mut mapnode) fill(idx int) { + if idx != 0 && &mapnode(n.children[idx - 1]).size >= degree { + n.borrow_from_prev(idx) + } else if idx != n.size && &mapnode(n.children[idx + 1]).size >= degree { + n.borrow_from_next(idx) + } else if idx != n.size { + n.merge(idx) + } else { + n.merge(idx - 1) + } +} + +fn (n mut mapnode) borrow_from_prev(idx int) { + mut child := &mapnode(n.children[idx]) + mut sibling := &mapnode(n.children[idx - 1]) + for i := child.size - 1; i >= 0; i-- { + child.keys[i + 1] = child.keys[i] + child.values[i + 1] = child.values[i] + } + if !isnil(child.children) { + for i := child.size; i >= 0; i-- { + child.children[i + 1] = child.children[i] + } + } + child.keys[0] = n.keys[idx - 1] + child.values[0] = n.values[idx - 1] + if !isnil(child.children) { + child.children[0] = sibling.children[sibling.size] + } + n.keys[idx - 1] = sibling.keys[sibling.size - 1] + n.values[idx - 1] = sibling.values[sibling.size - 1] + child.size++ + sibling.size-- +} + +fn (n mut mapnode) borrow_from_next(idx int) { + mut child := &mapnode(n.children[idx]) + mut sibling := &mapnode(n.children[idx + 1]) + child.keys[child.size] = n.keys[idx] + child.values[child.size] = n.values[idx] + if !isnil(child.children) { + child.children[child.size + 1] = sibling.children[0] + } + n.keys[idx] = sibling.keys[0] + n.values[idx] = sibling.values[0] + for i := 1; i < sibling.size; i++ { + sibling.keys[i - 1] = sibling.keys[i] + sibling.values[i - 1] = sibling.values[i] + } + if !isnil(sibling.children) { + for i := 1; i <= sibling.size; i++ { + sibling.children[i - 1] = sibling.children[i] + } + } + child.size++ + sibling.size-- +} + +fn (n mut mapnode) merge(idx int) { + mut child := &mapnode(n.children[idx]) + sibling := &mapnode(n.children[idx + 1]) + child.keys[mid_index] = n.keys[idx] + child.values[mid_index] = n.values[idx] + for i := 0; i < sibling.size; i++ { + child.keys[i + degree] = sibling.keys[i] + child.values[i + degree] = sibling.values[i] + } + if !isnil(child.children) { + for i := 0; i <= sibling.size; i++ { + child.children[i + degree] = sibling.children[i] + } + } + for i := idx + 1; i < n.size; i++ { + n.keys[i - 1] = n.keys[i] + n.values[i - 1] = n.values[i] + } + for i := idx + 2; i <= n.size; i++ { + n.children[i - 1] = n.children[i] + } + child.size += sibling.size + 1 + n.size-- + // free(sibling) +} + +pub fn (m mut SortedMap) delete(key string) { + if m.root.size == 0 { + return + } + + removed := m.root.remove_key(key) + if removed { + m.size-- + } + + if m.root.size == 0 { + // tmp := t.root + if isnil(m.root.children) { + return + } else { + m.root = &mapnode(m.root.children[0]) + } + // free(tmp) + } +} + +// Insert all keys of the subtree into array `keys` +// starting at `at`. Keys are inserted in order. +fn (n mapnode) subkeys(keys mut []string, at int) int { + mut position := at + if !isnil(n.children) { + // Traverse children and insert + // keys inbetween children + for i in 0..n.size { + child := &mapnode(n.children[i]) + position += child.subkeys(mut keys, position) + keys[position] = n.keys[i] + position++ + } + // Insert the keys of the last child + child := &mapnode(n.children[n.size]) + position += child.subkeys(mut keys, position) + } else { + // If leaf, insert keys + for i in 0..n.size { + keys[position + i] = n.keys[i] + } + position += n.size + } + // Return # of added keys + return position - at +} + +pub fn (m &SortedMap) keys() []string { + mut keys := [''].repeat(m.size) + if isnil(m.root) || m.root.size == 0 { + return keys + } + m.root.subkeys(mut keys, 0) + return keys +} + +fn (n mut mapnode) free() { + println('TODO') +} + +pub fn (m mut SortedMap) free() { + if isnil(m.root) { + return + } + m.root.free() +} + +pub fn (m SortedMap) print() { + println('TODO') +} + +// pub fn (m map_string) str() string { +// if m.size == 0 { +// return '{}' +// } +// mut sb := strings.new_builder(50) +// sb.writeln('{') +// for key, val in m { +// sb.writeln(' "$key" => "$val"') +// } +// sb.writeln('}') +// return sb.str() +// } \ No newline at end of file