map: use hashmap instead of b-tree

pull/3795/head
ka-weihe 2020-02-20 17:28:20 +01:00 committed by GitHub
parent e35f8e9e23
commit 34d926350b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 697 additions and 742 deletions

View File

@ -1,112 +0,0 @@
import rand
import time
import builtin.hashmap
fn hashmap_set_bench(arr []string, repeat int) {
start_time := time.ticks()
for _ in 0..repeat {
mut b := hashmap.new_hashmap()
for x in arr {
b.set(x, 1)
}
}
end_time := time.ticks() - start_time
println("* hashmap_set: ${end_time} ms")
}
fn map_set_bench(arr []string, repeat int) {
start_time := time.ticks()
for _ in 0..repeat {
mut b := map[string]int
for x in arr {
b[x] = 1
}
}
end_time := time.ticks() - start_time
println("* map_set: ${end_time} ms")
}
fn hashmap_get_bench(arr []string, repeat int) {
mut b := hashmap.new_hashmap()
for x in arr {
b.set(x, 1)
}
start_time := time.ticks()
for _ in 0..repeat {
for x in arr {
b.get(x)
}
}
end_time := time.ticks() - start_time
println("* hashmap_get: ${end_time} ms")
}
fn map_get_bench(arr []string, repeat int) {
mut b := map[string]int
for x in arr {
b[x] = 1
}
start_time := time.ticks()
for _ in 0..repeat {
for x in arr {
b[x]
}
}
end_time := time.ticks() - start_time
println("* map_get: ${end_time} ms")
}
fn benchmark_many_keys() {
key_len := 30
repeat := 1
for i := 2048; i <= 10000000; i = i * 2 {
mut arr := []string
for _ in 0..i {
mut buf := []byte
for j in 0..key_len {
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
}
s := string(buf)
arr << s
}
println("$arr.len keys of length $key_len")
// Uncomment the benchmark you would like to benchmark
// Run one or two at a time while memory leaks is a thing
hashmap_get_bench(arr, repeat)
map_get_bench(arr, repeat)
// hashmap_set_bench(arr, repeat)
// map_set_bench(arr, repeat)
println('')
}
}
fn benchmark_few_keys() {
key_len := 30
repeat := 10000
println("Benchmarks are repeated $repeat times")
for i := 16; i <= 2048; i = i * 2 {
mut arr := []string
for _ in 0..i {
mut buf := []byte
for j in 0..key_len {
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
}
s := string(buf)
arr << s
}
println("$arr.len keys of length $key_len")
// Uncomment the benchmark you would like to benchmark
// Run one or two at a time while memory leaks is a thing
hashmap_get_bench(arr, repeat)
map_get_bench(arr, repeat)
// hashmap_set_bench(arr, repeat)
// map_set_bench(arr, repeat)
println('')
}
}
fn main() {
// Uncomment below to benchmark on many keys
// benchmark_many_keys()
benchmark_few_keys()
}

View File

@ -1,243 +0,0 @@
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module hashmap
import hash.wyhash
const (
log_size = 5
n_hashbits = 24
window_size = 16
initial_size = 1 << log_size
initial_cap = initial_size - 1
default_load_factor = 0.8
hashbit_mask = u32(0xFFFFFF)
probe_offset = u32(0x1000000)
max_probe = u32(0xFF000000)
)
pub struct Hashmap {
mut:
cap u32
shift byte
window byte
info &u32
key_values &KeyValue
pub mut:
load_factor f32
size int
}
struct KeyValue {
key string
mut:
value int
}
pub fn new_hashmap() Hashmap {
return Hashmap{
cap: initial_cap
shift: log_size
window: window_size
info: &u32(calloc(sizeof(u32) * initial_size))
key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size))
load_factor: default_load_factor
size: 0
}
}
pub fn (h mut Hashmap) set(key string, value int) {
// load_factor can be adjusted.
if (f32(h.size) / f32(h.cap)) > h.load_factor {
h.rehash()
}
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
mut index := hash & h.cap
// While probe count is less
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
// While we might have a match
for info == h.info[index] {
if key == h.key_values[index].key {
h.key_values[index].value = value
return
}
index = (index + 1) & h.cap
info += probe_offset
}
// Match is not possible anymore.
// Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood).
mut current_kv := KeyValue{key, value}
for h.info[index] != 0 {
if info > h.info[index] {
// Swap info word
tmp_info := h.info[index]
h.info[index] = info
info = tmp_info
// Swap KeyValue
tmp_kv := h.key_values[index]
h.key_values[index] = current_kv
current_kv = tmp_kv
}
index = (index + 1) & h.cap
info += probe_offset
}
// Should almost never happen
if (info & max_probe) == max_probe {
h.rehash()
h.set(current_kv.key, current_kv.value)
return
}
h.info[index] = info
h.key_values[index] = current_kv
h.size++
}
fn (h mut Hashmap) rehash() {
old_cap := h.cap
h.window--
// check if any hashbits are left
if h.window == 0 {
h.shift += window_size
}
// double the size of the hashmap
h.cap = ((h.cap + 1) << 1) - 1
mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1)))
mut new_info := &u32(calloc(sizeof(u32) * (h.cap + 1)))
for i in 0 .. (old_cap + 1) {
if h.info[i] != 0 {
mut kv := h.key_values[i]
mut hash := u64(0)
mut info := u32(0)
if h.window == 0 {
hash = wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
info = u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
}
else {
original := u64(i - ((h.info[i] >> n_hashbits) - 1)) & (h.cap >> 1)
hash = original | (h.info[i] << h.shift)
info = (h.info[i] & hashbit_mask) | probe_offset
}
mut index := hash & h.cap
// While probe count is less
for info < new_info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
// Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood).
for new_info[index] != 0 {
if info > new_info[index] {
// Swap info word
tmp_info := new_info[index]
new_info[index] = info
info = tmp_info
// Swap KeyValue
tmp_kv := new_key_values[index]
new_key_values[index] = kv
kv = tmp_kv
}
index = (index + 1) & h.cap
info += probe_offset
}
// Should almost never happen
if (info & max_probe) == max_probe {
h.rehash()
h.set(kv.key, kv.value)
return
}
new_info[index] = info
new_key_values[index] = kv
}
}
if h.window == 0 {
h.window = window_size
}
free(h.key_values)
free(h.info)
h.key_values = new_key_values
h.info = new_info
}
pub fn (h mut Hashmap) delete(key string) {
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut index := hash & h.cap
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
// Perform backwards shifting
for info == h.info[index] {
if key == h.key_values[index].key {
mut old_index := index
index = (index + 1) & h.cap
mut current_info := h.info[index]
for (current_info >> n_hashbits) > 1 {
h.info[old_index] = current_info - probe_offset
h.key_values[old_index] = h.key_values[index]
old_index = index
index = (index + 1) & h.cap
current_info = h.info[index]
}
h.info[old_index] = 0
h.size--
return
}
index = (index + 1) & h.cap
info += probe_offset
}
}
pub fn (h Hashmap) get(key string) int {
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut index := hash & h.cap
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
for info == h.info[index] {
if key == h.key_values[index].key {
return h.key_values[index].value
}
index = (index + 1) & h.cap
info += probe_offset
}
return 0
}
pub fn (h Hashmap) exists(key string) bool {
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut index := hash & h.cap
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
for info < h.info[index] {
index = (index + 1) & h.cap
info += probe_offset
}
for info == h.info[index] {
if key == h.key_values[index].key {
return true
}
index = (index + 1) & h.cap
info += probe_offset
}
return false
}
pub fn (h Hashmap) keys() []string {
mut keys := [''].repeat(h.size)
mut j := 0
for i in 0 .. (h.cap + 1) {
if h.info[i] != 0 {
keys[j] = h.key_values[i].key
j++
}
}
return keys
}

View File

@ -1,33 +0,0 @@
module hashmap
import rand
fn test_random_strings() {
mut m := new_hashmap()
for i in 0..1000 {
mut buf := []byte
for j in 0..10 {
buf << byte(rand.next(int(`z`) - int(`a`)) + `a`)
}
s := string(buf)
//println(s)
m.set(s, i)
assert m.get(s) == i
}
m.set('foo', 12)
val := m.get('foo')
assert val == 12
}
fn test_large_hashmap() {
N := 300 * 1000
mut nums := new_hashmap()
for i := 0; i < N; i++ {
key := i.str()
nums.set(key, i)
}
for i := 0; i < N; i++ {
key := i.str()
assert nums.get(key) == i
}
}

View File

@ -4,44 +4,73 @@
module builtin module builtin
import strings import (
strings
// B-trees are balanced search trees with all leaves at hash.wyhash
// the same level. B-trees are generally faster than )
// binary search trees due to the better locality of
// reference, since multiple keys are stored in one node.
// The number for `degree` has been picked through vigor-
// ous benchmarking but can be changed to any number > 1.
// `degree` determines the size of each node.
const ( const (
degree = 6 // Number of bits from the hash stored for each entry
mid_index = degree - 1 hashbits = 24
max_size = 2 * degree - 1 // Number of bits from the hash stored for rehasing
children_bytes = sizeof(voidptr) * (max_size + 1) cached_hashbits = 16
// Initial log-number of buckets in the hashtable
init_log_capicity = 5
// Initial number of buckets in the hashtable
init_capicity = 1<<init_log_capicity
// Initial load-factor
init_load_factor = 0.8
// Initial range cap
init_range_cap = init_capicity - 1
// Bitmask to select all the hashbits
hash_mask = u32(0x00FFFFFF)
// Used for incrementing the probe-count
probe_inc = u32(0x01000000)
// Bitmask for maximum probe count
max_probe = u32(0xFF000000)
) )
pub struct map { pub struct map {
// Byte size of value
value_bytes int value_bytes int
mut: mut:
root &mapnode // Index of the highest index in the hashtable
range_cap u32
// Number of cached hashbits left for rehasing
window byte
// Used for right-shifting out used hashbits
shift byte
// Pointer to Key-value memory
key_values &KeyValue
// Pointer to probe_hash memory. Each Key-value has a
// corresponding probe_hash-DWORD. Upper-bits are the
// probe-count and lower-bits are bits from the hash.
probe_hash &u32
// Measure that decides when to increase the capacity
load_factor f32
pub mut: pub mut:
// Number of key-values currently in the hashmap
size int size int
} }
struct mapnode { struct KeyValue {
key string
mut: mut:
keys [11]string // TODO: Should use `max_size` value voidptr
values [11]voidptr // TODO: Should use `max_size`
children &voidptr
size int
} }
fn new_map(n, value_bytes int) map { // TODO: Remove `n` fn new_map(n, value_bytes int) map {
return map { probe_hash_bytes := sizeof(u32) * init_capicity
key_value_bytes := sizeof(KeyValue) * init_capicity
memory := calloc(key_value_bytes + probe_hash_bytes)
return map{
value_bytes: value_bytes value_bytes: value_bytes
root: new_node() range_cap: init_range_cap
shift: init_log_capicity
window: cached_hashbits
key_values: &KeyValue(memory)
probe_hash: &u32(memory + key_value_bytes)
load_factor: init_load_factor
size: 0 size: 0
} }
} }
@ -54,383 +83,281 @@ fn new_map_init(n, value_bytes int, keys &string, values voidptr) map {
return out return out
} }
// The tree is initialized with an empty node as root to
// avoid having to check whether the root is null for
// each insertion.
fn new_node() &mapnode {
return &mapnode {
children: 0
size: 0
}
}
// This implementation does proactive insertion, meaning
// that splits are done top-down and not bottom-up.
fn (m mut map) set(key string, value voidptr) { fn (m mut map) set(key string, value voidptr) {
mut node := m.root // load_factor can be adjusted.
mut child_index := 0 if (f32(m.size) / f32(m.range_cap)) > m.load_factor {
mut parent := &mapnode(0) m.expand()
for {
if node.size == max_size {
if isnil(parent) {
parent = new_node()
m.root = parent
} }
parent.split_child(child_index, mut node) hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
if key == parent.keys[child_index] { mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
C.memcpy(parent.values[child_index], value, m.value_bytes) mut index := hash & m.range_cap
// While probe count is less
for probe_hash < m.probe_hash[index] {
index = (index + 1) & m.range_cap
probe_hash += probe_inc
}
// While we might have a match
for probe_hash == m.probe_hash[index] {
if key == m.key_values[index].key {
C.memcpy(m.key_values[index].value, value, m.value_bytes)
return return
} }
node = if key < parent.keys[child_index] { index = (index + 1) & m.range_cap
&mapnode(parent.children[child_index]) probe_hash += probe_inc
} else {
&mapnode(parent.children[child_index + 1])
} }
// Match is not possible anymore.
// Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood).
mut current_kv := KeyValue{key, malloc(m.value_bytes)}
C.memcpy(current_kv.value, value, m.value_bytes)
for m.probe_hash[index] != 0 {
if probe_hash > m.probe_hash[index] {
// Swap probe_hash
tmp_probe_hash := m.probe_hash[index]
m.probe_hash[index] = probe_hash
probe_hash = tmp_probe_hash
// Swap KeyValue
tmp_kv := m.key_values[index]
m.key_values[index] = current_kv
current_kv = tmp_kv
} }
mut i := 0 index = (index + 1) & m.range_cap
for i < node.size && key > node.keys[i] { i++ } probe_hash += probe_inc
if i != node.size && key == node.keys[i] { }
C.memcpy(node.values[i], value, m.value_bytes) // Should almost never happen
if (probe_hash & max_probe) == max_probe {
m.expand()
m.set(current_kv.key, current_kv.value)
return return
} }
if isnil(node.children) { m.probe_hash[index] = probe_hash
mut j := node.size - 1 m.key_values[index] = current_kv
for j >= 0 && key < node.keys[j] {
node.keys[j + 1] = node.keys[j]
node.values[j + 1] = node.values[j]
j--
}
node.keys[j + 1] = key
node.values[j + 1] = malloc(m.value_bytes)
C.memcpy(node.values[j + 1], value, m.value_bytes)
node.size++
m.size++ m.size++
return
}
parent = node
child_index = i
node = &mapnode(node.children[child_index])
}
} }
fn (n mut mapnode) split_child(child_index int, y mut mapnode) { fn (m mut map) expand() {
mut z := new_node() old_range_cap := m.range_cap
z.size = mid_index // double the size of the hashmap
y.size = mid_index m.range_cap = ((m.range_cap + 1)<<1) - 1
for j := mid_index - 1; j >= 0; j-- { // check if no hashbits are left
z.keys[j] = y.keys[j + degree] if m.window == 0 {
z.values[j] = y.values[j + degree] m.shift += cached_hashbits
m.rehash(old_range_cap)
m.window = cached_hashbits
} }
if !isnil(y.children) { else {
z.children = &voidptr(malloc(children_bytes)) m.cached_rehash(old_range_cap)
for jj := degree - 1; jj >= 0; jj-- { }
z.children[jj] = y.children[jj + degree] m.window--
}
fn (m mut map) rehash(old_range_cap u32) {
probe_hash_bytes := sizeof(u32) * (m.range_cap + 1)
key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1)
memory := calloc(probe_hash_bytes + key_value_bytes)
mut new_key_values := &KeyValue(memory)
mut new_probe_hash := &u32(memory + key_value_bytes)
for i in 0 .. (old_range_cap + 1) {
if m.probe_hash[i] != 0 {
mut kv := m.key_values[i]
hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
mut index := hash & m.range_cap
// While probe count is less
for probe_hash < new_probe_hash[index] {
index = (index + 1) & m.range_cap
probe_hash += probe_inc
}
// Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood).
for new_probe_hash[index] != 0 {
if probe_hash > new_probe_hash[index] {
// Swap probe_hash
tmp_probe_hash := new_probe_hash[index]
new_probe_hash[index] = probe_hash
probe_hash = tmp_probe_hash
// Swap KeyValue
tmp_kv := new_key_values[index]
new_key_values[index] = kv
kv = tmp_kv
}
index = (index + 1) & m.range_cap
probe_hash += probe_inc
}
// Should almost never happen
if (probe_hash & max_probe) == max_probe {
m.expand()
m.set(kv.key, kv.value)
return
}
new_probe_hash[index] = probe_hash
new_key_values[index] = kv
} }
} }
if isnil(n.children) { unsafe{
n.children = &voidptr(malloc(children_bytes)) free(m.key_values)
} }
n.children[n.size + 1] = n.children[n.size] m.key_values = new_key_values
for j := n.size; j > child_index; j-- { m.probe_hash = new_probe_hash
n.keys[j] = n.keys[j - 1] }
n.values[j] = n.values[j - 1]
n.children[j] = n.children[j - 1] fn (m mut map) cached_rehash(old_range_cap u32) {
probe_hash_bytes := sizeof(u32) * (m.range_cap + 1)
key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1)
memory := calloc(probe_hash_bytes + key_value_bytes)
mut new_probe_hash := &u32(memory + key_value_bytes)
mut new_key_values := &KeyValue(memory)
for i in 0 .. (old_range_cap + 1) {
if m.probe_hash[i] != 0 {
mut kv := m.key_values[i]
mut probe_hash := m.probe_hash[i]
original := u64(i - ((probe_hash>>hashbits) - 1)) & (m.range_cap>>1)
hash := original | (probe_hash<<m.shift)
probe_hash = (probe_hash & hash_mask) | probe_inc
mut index := hash & m.range_cap
// While probe count is less
for probe_hash < new_probe_hash[index] {
index = (index + 1) & m.range_cap
probe_hash += probe_inc
}
// Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood).
for new_probe_hash[index] != 0 {
if probe_hash > new_probe_hash[index] {
// Swap probe_hash
tmp_probe_hash := new_probe_hash[index]
new_probe_hash[index] = probe_hash
probe_hash = tmp_probe_hash
// Swap KeyValue
tmp_kv := new_key_values[index]
new_key_values[index] = kv
kv = tmp_kv
}
index = (index + 1) & m.range_cap
probe_hash += probe_inc
}
// Should almost never happen
if (probe_hash & max_probe) == max_probe {
m.expand()
m.set(kv.key, kv.value)
return
}
new_probe_hash[index] = probe_hash
new_key_values[index] = kv
}
}
unsafe{
free(m.key_values)
}
m.key_values = new_key_values
m.probe_hash = new_probe_hash
}
pub fn (m mut map) delete(key string) {
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
mut index := hash & m.range_cap
mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
for probe_hash < m.probe_hash[index] {
index = (index + 1) & m.range_cap
probe_hash += probe_inc
}
// Perform backwards shifting
for probe_hash == m.probe_hash[index] {
if key == m.key_values[index].key {
mut old_index := index
index = (index + 1) & m.range_cap
mut current_probe_hash := m.probe_hash[index]
for (current_probe_hash>>hashbits) > 1 {
m.probe_hash[old_index] = current_probe_hash - probe_inc
m.key_values[old_index] = m.key_values[index]
old_index = index
index = (index + 1) & m.range_cap
current_probe_hash = m.probe_hash[index]
}
m.probe_hash[old_index] = 0
m.size--
return
}
index = (index + 1) & m.range_cap
probe_hash += probe_inc
} }
n.keys[child_index] = y.keys[mid_index]
n.values[child_index] = y.values[mid_index]
n.children[child_index] = voidptr(y)
n.children[child_index + 1] = voidptr(z)
n.size++
} }
fn (m map) get(key string, out voidptr) bool { fn (m map) get(key string, out voidptr) bool {
mut node := m.root hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
for { mut index := hash & m.range_cap
mut i := node.size - 1 mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
for i >= 0 && key < node.keys[i] { for probe_hash < m.probe_hash[index] {
i-- index = (index + 1) & m.range_cap
probe_hash += probe_inc
} }
if i != -1 && key == node.keys[i] { for probe_hash == m.probe_hash[index] {
C.memcpy(out, node.values[i], m.value_bytes) if key == m.key_values[index].key {
C.memcpy(out, m.key_values[index].value, m.value_bytes)
return true return true
} }
if isnil(node.children) { index = (index + 1) & m.range_cap
break probe_hash += probe_inc
}
node = &mapnode(node.children[i + 1])
} }
return false return false
} }
fn (m map) exists(key string) bool { fn (m map) exists(key string) bool {
if isnil(m.root) { // TODO: find out why root can be nil if m.value_bytes == 0 {
return false return false
} }
mut node := m.root hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
for { mut index := hash & m.range_cap
mut i := node.size - 1 mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
for i >= 0 && key < node.keys[i] { i-- } for probe_hash < m.probe_hash[index] {
if i != -1 && key == node.keys[i] { index = (index + 1) & m.range_cap
probe_hash += probe_inc
}
for probe_hash == m.probe_hash[index] {
if key == m.key_values[index].key {
return true return true
} }
if isnil(node.children) { index = (index + 1) & m.range_cap
break probe_hash += probe_inc
}
node = &mapnode(node.children[i + 1])
} }
return false return false
} }
fn (n mapnode) find_key(k string) int {
mut idx := 0
for idx < n.size && n.keys[idx] < k {
idx++
}
return idx
}
fn (n mut mapnode) remove_key(k string) bool {
idx := n.find_key(k)
if idx < n.size && n.keys[idx] == k {
if isnil(n.children) {
n.remove_from_leaf(idx)
} else {
n.remove_from_non_leaf(idx)
}
return true
} else {
if isnil(n.children) {
return false
}
flag := if idx == n.size {true} else {false}
if (&mapnode(n.children[idx])).size < degree {
n.fill(idx)
}
if flag && idx > n.size {
return (&mapnode(n.children[idx - 1])).remove_key(k)
} else {
return (&mapnode(n.children[idx])).remove_key(k)
}
}
}
fn (n mut mapnode) remove_from_leaf(idx int) {
for i := idx + 1; i < n.size; i++ {
n.keys[i - 1] = n.keys[i]
n.values[i - 1] = n.values[i]
}
n.size--
}
fn (n mut mapnode) remove_from_non_leaf(idx int) {
k := n.keys[idx]
if &mapnode(n.children[idx]).size >= degree {
mut current := &mapnode(n.children[idx])
for !isnil(current.children) {
current = &mapnode(current.children[current.size])
}
predecessor := current.keys[current.size - 1]
n.keys[idx] = predecessor
n.values[idx] = current.values[current.size - 1]
(&mapnode(n.children[idx])).remove_key(predecessor)
} else if &mapnode(n.children[idx + 1]).size >= degree {
mut current := &mapnode(n.children[idx + 1])
for !isnil(current.children) {
current = &mapnode(current.children[0])
}
successor := current.keys[0]
n.keys[idx] = successor
n.values[idx] = current.values[0]
(&mapnode(n.children[idx + 1])).remove_key(successor)
} else {
n.merge(idx)
(&mapnode(n.children[idx])).remove_key(k)
}
}
fn (n mut mapnode) fill(idx int) {
if idx != 0 && &mapnode(n.children[idx - 1]).size >= degree {
n.borrow_from_prev(idx)
} else if idx != n.size && &mapnode(n.children[idx + 1]).size >= degree {
n.borrow_from_next(idx)
} else if idx != n.size {
n.merge(idx)
} else {
n.merge(idx - 1)
}
}
fn (n mut mapnode) borrow_from_prev(idx int) {
mut child := &mapnode(n.children[idx])
mut sibling := &mapnode(n.children[idx - 1])
for i := child.size - 1; i >= 0; i-- {
child.keys[i + 1] = child.keys[i]
child.values[i + 1] = child.values[i]
}
if !isnil(child.children) {
for i := child.size; i >= 0; i-- {
child.children[i + 1] = child.children[i]
}
}
child.keys[0] = n.keys[idx - 1]
child.values[0] = n.values[idx - 1]
if !isnil(child.children) {
child.children[0] = sibling.children[sibling.size]
}
n.keys[idx - 1] = sibling.keys[sibling.size - 1]
n.values[idx - 1] = sibling.values[sibling.size - 1]
child.size++
sibling.size--
}
fn (n mut mapnode) borrow_from_next(idx int) {
mut child := &mapnode(n.children[idx])
mut sibling := &mapnode(n.children[idx + 1])
child.keys[child.size] = n.keys[idx]
child.values[child.size] = n.values[idx]
if !isnil(child.children) {
child.children[child.size + 1] = sibling.children[0]
}
n.keys[idx] = sibling.keys[0]
n.values[idx] = sibling.values[0]
for i := 1; i < sibling.size; i++ {
sibling.keys[i - 1] = sibling.keys[i]
sibling.values[i - 1] = sibling.values[i]
}
if !isnil(sibling.children) {
for i := 1; i <= sibling.size; i++ {
sibling.children[i - 1] = sibling.children[i]
}
}
child.size++
sibling.size--
}
fn (n mut mapnode) merge(idx int) {
mut child := &mapnode(n.children[idx])
sibling := &mapnode(n.children[idx + 1])
child.keys[mid_index] = n.keys[idx]
child.values[mid_index] = n.values[idx]
for i := 0; i < sibling.size; i++ {
child.keys[i + degree] = sibling.keys[i]
child.values[i + degree] = sibling.values[i]
}
if !isnil(child.children) {
for i := 0; i <= sibling.size; i++ {
child.children[i + degree] = sibling.children[i]
}
}
for i := idx + 1; i < n.size; i++ {
n.keys[i - 1] = n.keys[i]
n.values[i - 1] = n.values[i]
}
for i := idx + 2; i <= n.size; i++ {
n.children[i - 1] = n.children[i]
}
child.size += sibling.size + 1
n.size--
// free(sibling)
}
pub fn (m mut map) delete(key string) {
if m.root.size == 0 {
return
}
removed := m.root.remove_key(key)
if removed {
m.size--
}
if m.root.size == 0 {
// tmp := t.root
if isnil(m.root.children) {
return
} else {
m.root = &mapnode(m.root.children[0])
}
// free(tmp)
}
}
// Insert all keys of the subtree into array `keys`
// starting at `at`. Keys are inserted in order.
fn (n mapnode) subkeys(keys mut []string, at int) int {
mut position := at
if !isnil(n.children) {
// Traverse children and insert
// keys inbetween children
for i in 0..n.size {
child := &mapnode(n.children[i])
position += child.subkeys(mut keys, position)
keys[position] = n.keys[i]
position++
}
// Insert the keys of the last child
child := &mapnode(n.children[n.size])
position += child.subkeys(mut keys, position)
} else {
// If leaf, insert keys
for i in 0..n.size {
keys[position + i] = n.keys[i]
}
position += n.size
}
// Return # of added keys
return position - at
}
pub fn (m &map) keys() []string { pub fn (m &map) keys() []string {
mut keys := [''].repeat(m.size) mut keys := [''].repeat(m.size)
if isnil(m.root) || m.root.size == 0 { if m.value_bytes == 0 {
return keys return keys
} }
m.root.subkeys(mut keys, 0) mut j := 0
for i in 0 .. (m.range_cap + 1) {
if m.probe_hash[i] != 0 {
keys[j] = m.key_values[i].key
j++
}
}
return keys return keys
} }
fn (n mut mapnode) free() { pub fn (m mut map) set_load_factor(new_load_factor f32) {
mut i := 0 if new_load_factor > 1.0 {
if isnil(n.children) { m.load_factor = 1.0
i = 0
for i < n.size {
i++
} }
} else { else if new_load_factor < 0.1 {
i = 0 m.load_factor = 0.1
for i < n.size {
&mapnode(n.children[i]).free()
i++
} }
&mapnode(n.children[i]).free() else {
m.load_factor = new_load_factor
} }
// free(n)
} }
pub fn (m mut map) free() { pub fn (m mut map) free() {
if isnil(m.root) { unsafe{
return free(m.key_values)
} }
m.root.free()
} }
pub fn (m map) print() { pub fn (m map) print() {
println('<<<<<<<<') println('TODO')
//for i := 0; i < m.entries.len; i++ {
// entry := m.entries[i]
// println('$entry.key => $entry.val')
//}
/*
for i := 0; i < m.cap * m.value_bytes; i++ {
b := m.table[i]
print('$i: ')
C.printf('%02x', b)
println('')
}
*/
println('>>>>>>>>>>')
} }
pub fn (m map_string) str() string { pub fn (m map_string) str() string {

View File

@ -0,0 +1,416 @@
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module builtin
// import strings
// B-trees are balanced search trees with all leaves at
// the same level. B-trees are generally faster than
// binary search trees due to the better locality of
// reference, since multiple keys are stored in one node.
// The number for `degree` has been picked through vigor-
// ous benchmarking but can be changed to any number > 1.
// `degree` determines the size of each node.
const (
degree = 6
mid_index = degree - 1
max_size = 2 * degree - 1
children_bytes = sizeof(voidptr) * (max_size + 1)
)
pub struct SortedMap {
value_bytes int
mut:
root &mapnode
pub mut:
size int
}
struct mapnode {
mut:
keys [11]string // TODO: Should use `max_size`
values [11]voidptr // TODO: Should use `max_size`
children &voidptr
size int
}
fn new_sorted_map(n, value_bytes int) SortedMap { // TODO: Remove `n`
return SortedMap {
value_bytes: value_bytes
root: new_node()
size: 0
}
}
fn new_sorted_map_init(n, value_bytes int, keys &string, values voidptr) SortedMap {
mut out := new_sorted_map(n, value_bytes)
for i in 0 .. n {
out.set(keys[i], values + i * value_bytes)
}
return out
}
// The tree is initialized with an empty node as root to
// avoid having to check whether the root is null for
// each insertion.
fn new_node() &mapnode {
return &mapnode {
children: 0
size: 0
}
}
// This implementation does proactive insertion, meaning
// that splits are done top-down and not bottom-up.
fn (m mut SortedMap) set(key string, value voidptr) {
mut node := m.root
mut child_index := 0
mut parent := &mapnode(0)
for {
if node.size == max_size {
if isnil(parent) {
parent = new_node()
m.root = parent
}
parent.split_child(child_index, mut node)
if key == parent.keys[child_index] {
C.memcpy(parent.values[child_index], value, m.value_bytes)
return
}
node = if key < parent.keys[child_index] {
&mapnode(parent.children[child_index])
} else {
&mapnode(parent.children[child_index + 1])
}
}
mut i := 0
for i < node.size && key > node.keys[i] { i++ }
if i != node.size && key == node.keys[i] {
C.memcpy(node.values[i], value, m.value_bytes)
return
}
if isnil(node.children) {
mut j := node.size - 1
for j >= 0 && key < node.keys[j] {
node.keys[j + 1] = node.keys[j]
node.values[j + 1] = node.values[j]
j--
}
node.keys[j + 1] = key
node.values[j + 1] = malloc(m.value_bytes)
C.memcpy(node.values[j + 1], value, m.value_bytes)
node.size++
m.size++
return
}
parent = node
child_index = i
node = &mapnode(node.children[child_index])
}
}
fn (n mut mapnode) split_child(child_index int, y mut mapnode) {
mut z := new_node()
z.size = mid_index
y.size = mid_index
for j := mid_index - 1; j >= 0; j-- {
z.keys[j] = y.keys[j + degree]
z.values[j] = y.values[j + degree]
}
if !isnil(y.children) {
z.children = &voidptr(malloc(children_bytes))
for jj := degree - 1; jj >= 0; jj-- {
z.children[jj] = y.children[jj + degree]
}
}
if isnil(n.children) {
n.children = &voidptr(malloc(children_bytes))
}
n.children[n.size + 1] = n.children[n.size]
for j := n.size; j > child_index; j-- {
n.keys[j] = n.keys[j - 1]
n.values[j] = n.values[j - 1]
n.children[j] = n.children[j - 1]
}
n.keys[child_index] = y.keys[mid_index]
n.values[child_index] = y.values[mid_index]
n.children[child_index] = voidptr(y)
n.children[child_index + 1] = voidptr(z)
n.size++
}
fn (m SortedMap) get(key string, out voidptr) bool {
mut node := m.root
for {
mut i := node.size - 1
for i >= 0 && key < node.keys[i] { i-- }
if i != -1 && key == node.keys[i] {
C.memcpy(out, node.values[i], m.value_bytes)
return true
}
if isnil(node.children) {
break
}
node = &mapnode(node.children[i + 1])
}
return false
}
fn (m SortedMap) exists(key string) bool {
if isnil(m.root) { // TODO: find out why root can be nil
return false
}
mut node := m.root
for {
mut i := node.size - 1
for i >= 0 && key < node.keys[i] { i-- }
if i != -1 && key == node.keys[i] {
return true
}
if isnil(node.children) {
break
}
node = &mapnode(node.children[i + 1])
}
return false
}
fn (n mapnode) find_key(k string) int {
mut idx := 0
for idx < n.size && n.keys[idx] < k {
idx++
}
return idx
}
fn (n mut mapnode) remove_key(k string) bool {
idx := n.find_key(k)
if idx < n.size && n.keys[idx] == k {
if isnil(n.children) {
n.remove_from_leaf(idx)
} else {
n.remove_from_non_leaf(idx)
}
return true
} else {
if isnil(n.children) {
return false
}
flag := if idx == n.size {true} else {false}
if (&mapnode(n.children[idx])).size < degree {
n.fill(idx)
}
if flag && idx > n.size {
return (&mapnode(n.children[idx - 1])).remove_key(k)
} else {
return (&mapnode(n.children[idx])).remove_key(k)
}
}
}
fn (n mut mapnode) remove_from_leaf(idx int) {
for i := idx + 1; i < n.size; i++ {
n.keys[i - 1] = n.keys[i]
n.values[i - 1] = n.values[i]
}
n.size--
}
fn (n mut mapnode) remove_from_non_leaf(idx int) {
k := n.keys[idx]
if &mapnode(n.children[idx]).size >= degree {
mut current := &mapnode(n.children[idx])
for !isnil(current.children) {
current = &mapnode(current.children[current.size])
}
predecessor := current.keys[current.size - 1]
n.keys[idx] = predecessor
n.values[idx] = current.values[current.size - 1]
(&mapnode(n.children[idx])).remove_key(predecessor)
} else if &mapnode(n.children[idx + 1]).size >= degree {
mut current := &mapnode(n.children[idx + 1])
for !isnil(current.children) {
current = &mapnode(current.children[0])
}
successor := current.keys[0]
n.keys[idx] = successor
n.values[idx] = current.values[0]
(&mapnode(n.children[idx + 1])).remove_key(successor)
} else {
n.merge(idx)
(&mapnode(n.children[idx])).remove_key(k)
}
}
fn (n mut mapnode) fill(idx int) {
if idx != 0 && &mapnode(n.children[idx - 1]).size >= degree {
n.borrow_from_prev(idx)
} else if idx != n.size && &mapnode(n.children[idx + 1]).size >= degree {
n.borrow_from_next(idx)
} else if idx != n.size {
n.merge(idx)
} else {
n.merge(idx - 1)
}
}
fn (n mut mapnode) borrow_from_prev(idx int) {
mut child := &mapnode(n.children[idx])
mut sibling := &mapnode(n.children[idx - 1])
for i := child.size - 1; i >= 0; i-- {
child.keys[i + 1] = child.keys[i]
child.values[i + 1] = child.values[i]
}
if !isnil(child.children) {
for i := child.size; i >= 0; i-- {
child.children[i + 1] = child.children[i]
}
}
child.keys[0] = n.keys[idx - 1]
child.values[0] = n.values[idx - 1]
if !isnil(child.children) {
child.children[0] = sibling.children[sibling.size]
}
n.keys[idx - 1] = sibling.keys[sibling.size - 1]
n.values[idx - 1] = sibling.values[sibling.size - 1]
child.size++
sibling.size--
}
fn (n mut mapnode) borrow_from_next(idx int) {
mut child := &mapnode(n.children[idx])
mut sibling := &mapnode(n.children[idx + 1])
child.keys[child.size] = n.keys[idx]
child.values[child.size] = n.values[idx]
if !isnil(child.children) {
child.children[child.size + 1] = sibling.children[0]
}
n.keys[idx] = sibling.keys[0]
n.values[idx] = sibling.values[0]
for i := 1; i < sibling.size; i++ {
sibling.keys[i - 1] = sibling.keys[i]
sibling.values[i - 1] = sibling.values[i]
}
if !isnil(sibling.children) {
for i := 1; i <= sibling.size; i++ {
sibling.children[i - 1] = sibling.children[i]
}
}
child.size++
sibling.size--
}
fn (n mut mapnode) merge(idx int) {
mut child := &mapnode(n.children[idx])
sibling := &mapnode(n.children[idx + 1])
child.keys[mid_index] = n.keys[idx]
child.values[mid_index] = n.values[idx]
for i := 0; i < sibling.size; i++ {
child.keys[i + degree] = sibling.keys[i]
child.values[i + degree] = sibling.values[i]
}
if !isnil(child.children) {
for i := 0; i <= sibling.size; i++ {
child.children[i + degree] = sibling.children[i]
}
}
for i := idx + 1; i < n.size; i++ {
n.keys[i - 1] = n.keys[i]
n.values[i - 1] = n.values[i]
}
for i := idx + 2; i <= n.size; i++ {
n.children[i - 1] = n.children[i]
}
child.size += sibling.size + 1
n.size--
// free(sibling)
}
pub fn (m mut SortedMap) delete(key string) {
if m.root.size == 0 {
return
}
removed := m.root.remove_key(key)
if removed {
m.size--
}
if m.root.size == 0 {
// tmp := t.root
if isnil(m.root.children) {
return
} else {
m.root = &mapnode(m.root.children[0])
}
// free(tmp)
}
}
// Insert all keys of the subtree into array `keys`
// starting at `at`. Keys are inserted in order.
fn (n mapnode) subkeys(keys mut []string, at int) int {
mut position := at
if !isnil(n.children) {
// Traverse children and insert
// keys inbetween children
for i in 0..n.size {
child := &mapnode(n.children[i])
position += child.subkeys(mut keys, position)
keys[position] = n.keys[i]
position++
}
// Insert the keys of the last child
child := &mapnode(n.children[n.size])
position += child.subkeys(mut keys, position)
} else {
// If leaf, insert keys
for i in 0..n.size {
keys[position + i] = n.keys[i]
}
position += n.size
}
// Return # of added keys
return position - at
}
pub fn (m &SortedMap) keys() []string {
mut keys := [''].repeat(m.size)
if isnil(m.root) || m.root.size == 0 {
return keys
}
m.root.subkeys(mut keys, 0)
return keys
}
fn (n mut mapnode) free() {
println('TODO')
}
pub fn (m mut SortedMap) free() {
if isnil(m.root) {
return
}
m.root.free()
}
pub fn (m SortedMap) print() {
println('TODO')
}
// pub fn (m map_string) str() string {
// if m.size == 0 {
// return '{}'
// }
// mut sb := strings.new_builder(50)
// sb.writeln('{')
// for key, val in m {
// sb.writeln(' "$key" => "$val"')
// }
// sb.writeln('}')
// return sb.str()
// }