v/vlib/builtin/sorted_map.v

452 lines
10 KiB
V
Raw Normal View History

2020-02-20 20:04:06 +01:00
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module builtin
// import strings
// B-trees are balanced search trees with all leaves at
// the same level. B-trees are generally faster than
// binary search trees due to the better locality of
// reference, since multiple keys are stored in one node.
// The number for `degree` has been picked through vigor-
// ous benchmarking but can be changed to any number > 1.
2020-06-21 16:51:02 +02:00
// `degree` determines the maximum length of each node.
2020-02-20 20:04:06 +01:00
const (
degree = 6
mid_index = degree - 1
2020-06-21 16:51:02 +02:00
max_len = 2 * degree - 1
children_bytes = sizeof(voidptr) * (max_len + 1)
2020-02-20 20:04:06 +01:00
)
pub struct SortedMap {
value_bytes int
mut:
root &mapnode
pub mut:
2020-06-21 16:51:02 +02:00
len int
2020-02-20 20:04:06 +01:00
}
struct mapnode {
mut:
children &voidptr
2020-06-21 16:51:02 +02:00
len int
keys [11]string // TODO: Should use `max_len`
values [11]voidptr // TODO: Should use `max_len`
2020-02-20 20:04:06 +01:00
}
fn new_sorted_map(n, value_bytes int) SortedMap { // TODO: Remove `n`
return SortedMap {
value_bytes: value_bytes
root: new_node()
2020-06-21 16:51:02 +02:00
len: 0
2020-02-20 20:04:06 +01:00
}
}
fn new_sorted_map_init(n, value_bytes int, keys &string, values voidptr) SortedMap {
mut out := new_sorted_map(n, value_bytes)
for i in 0 .. n {
unsafe {
out.set(keys[i], byteptr(values) + i * value_bytes)
}
2020-02-20 20:04:06 +01:00
}
return out
}
// The tree is initialized with an empty node as root to
// avoid having to check whether the root is null for
// each insertion.
fn new_node() &mapnode {
return &mapnode {
children: 0
2020-06-21 16:51:02 +02:00
len: 0
2020-02-20 20:04:06 +01:00
}
}
// This implementation does proactive insertion, meaning
// that splits are done top-down and not bottom-up.
2020-05-17 13:51:18 +02:00
fn (mut m SortedMap) set(key string, value voidptr) {
2020-02-20 20:04:06 +01:00
mut node := m.root
mut child_index := 0
mut parent := &mapnode(0)
for {
2020-06-21 16:51:02 +02:00
if node.len == max_len {
2020-02-20 20:04:06 +01:00
if isnil(parent) {
parent = new_node()
m.root = parent
}
parent.split_child(child_index, mut node)
if key == parent.keys[child_index] {
unsafe {
C.memcpy(parent.values[child_index], value, m.value_bytes)
}
2020-02-20 20:04:06 +01:00
return
}
if key < parent.keys[child_index] {
node = unsafe {&mapnode(parent.children[child_index])}
2020-02-20 20:04:06 +01:00
} else {
node = unsafe {&mapnode(parent.children[child_index + 1])}
2020-02-20 20:04:06 +01:00
}
}
mut i := 0
2020-06-21 16:51:02 +02:00
for i < node.len && key > node.keys[i] { i++ }
if i != node.len && key == node.keys[i] {
unsafe {
C.memcpy(node.values[i], value, m.value_bytes)
}
2020-02-20 20:04:06 +01:00
return
}
if isnil(node.children) {
2020-06-21 16:51:02 +02:00
mut j := node.len - 1
2020-02-20 20:04:06 +01:00
for j >= 0 && key < node.keys[j] {
node.keys[j + 1] = node.keys[j]
node.values[j + 1] = node.values[j]
j--
}
node.keys[j + 1] = key
node.values[j + 1] = malloc(m.value_bytes)
unsafe {
C.memcpy(node.values[j + 1], value, m.value_bytes)
}
2020-06-21 16:51:02 +02:00
node.len++
m.len++
2020-02-20 20:04:06 +01:00
return
}
parent = node
child_index = i
node = unsafe {&mapnode(node.children[child_index])}
2020-02-20 20:04:06 +01:00
}
}
2020-06-04 10:35:40 +02:00
fn (mut n mapnode) split_child(child_index int, mut y mapnode) {
2020-02-20 20:04:06 +01:00
mut z := new_node()
2020-06-21 16:51:02 +02:00
z.len = mid_index
y.len = mid_index
2020-02-20 20:04:06 +01:00
for j := mid_index - 1; j >= 0; j-- {
z.keys[j] = y.keys[j + degree]
z.values[j] = y.values[j + degree]
}
if !isnil(y.children) {
z.children = &voidptr(malloc(int(children_bytes)))
2020-02-20 20:04:06 +01:00
for jj := degree - 1; jj >= 0; jj-- {
unsafe {
z.children[jj] = y.children[jj + degree]
}
2020-02-20 20:04:06 +01:00
}
}
if isnil(n.children) {
n.children = &voidptr(malloc(int(children_bytes)))
2020-02-20 20:04:06 +01:00
}
unsafe {
n.children[n.len + 1] = n.children[n.len]
}
2020-06-21 16:51:02 +02:00
for j := n.len; j > child_index; j-- {
2020-02-20 20:04:06 +01:00
n.keys[j] = n.keys[j - 1]
n.values[j] = n.values[j - 1]
unsafe {
n.children[j] = n.children[j - 1]
}
2020-02-20 20:04:06 +01:00
}
n.keys[child_index] = y.keys[mid_index]
n.values[child_index] = y.values[mid_index]
unsafe {
n.children[child_index] = voidptr(y)
n.children[child_index + 1] = voidptr(z)
}
2020-06-21 16:51:02 +02:00
n.len++
2020-02-20 20:04:06 +01:00
}
fn (m SortedMap) get(key string, out voidptr) bool {
mut node := m.root
for {
2020-06-21 16:51:02 +02:00
mut i := node.len - 1
2020-02-20 20:04:06 +01:00
for i >= 0 && key < node.keys[i] { i-- }
if i != -1 && key == node.keys[i] {
unsafe {
C.memcpy(out, node.values[i], m.value_bytes)
}
2020-02-20 20:04:06 +01:00
return true
}
if isnil(node.children) {
break
}
node = unsafe {&mapnode(node.children[i + 1])}
2020-02-20 20:04:06 +01:00
}
return false
}
fn (m SortedMap) exists(key string) bool {
if isnil(m.root) { // TODO: find out why root can be nil
return false
}
mut node := m.root
for {
2020-06-21 16:51:02 +02:00
mut i := node.len - 1
2020-02-20 20:04:06 +01:00
for i >= 0 && key < node.keys[i] { i-- }
if i != -1 && key == node.keys[i] {
return true
}
if isnil(node.children) {
break
}
node = unsafe {&mapnode(node.children[i + 1])}
2020-02-20 20:04:06 +01:00
}
return false
}
2020-03-11 21:11:27 +01:00
fn (n &mapnode) find_key(k string) int {
2020-02-20 20:04:06 +01:00
mut idx := 0
2020-06-21 16:51:02 +02:00
for idx < n.len && n.keys[idx] < k {
2020-02-20 20:04:06 +01:00
idx++
}
return idx
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) remove_key(k string) bool {
2020-02-20 20:04:06 +01:00
idx := n.find_key(k)
2020-06-21 16:51:02 +02:00
if idx < n.len && n.keys[idx] == k {
2020-02-20 20:04:06 +01:00
if isnil(n.children) {
n.remove_from_leaf(idx)
} else {
n.remove_from_non_leaf(idx)
}
return true
} else {
if isnil(n.children) {
return false
}
2020-06-21 16:51:02 +02:00
flag := if idx == n.len {true} else {false}
if unsafe {&mapnode(n.children[idx])}.len < degree {
2020-02-20 20:04:06 +01:00
n.fill(idx)
}
mut node := &mapnode(0)
2020-06-21 16:51:02 +02:00
if flag && idx > n.len {
node = unsafe {&mapnode(n.children[idx - 1])}
2020-02-20 20:04:06 +01:00
} else {
node = unsafe {&mapnode(n.children[idx])}
2020-02-20 20:04:06 +01:00
}
return node.remove_key(k)
2020-02-20 20:04:06 +01:00
}
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) remove_from_leaf(idx int) {
2020-06-21 16:51:02 +02:00
for i := idx + 1; i < n.len; i++ {
2020-02-20 20:04:06 +01:00
n.keys[i - 1] = n.keys[i]
n.values[i - 1] = n.values[i]
}
2020-06-21 16:51:02 +02:00
n.len--
2020-02-20 20:04:06 +01:00
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) remove_from_non_leaf(idx int) {
2020-02-20 20:04:06 +01:00
k := n.keys[idx]
if unsafe {&mapnode(n.children[idx])}.len >= degree {
mut current := unsafe {&mapnode(n.children[idx])}
2020-02-20 20:04:06 +01:00
for !isnil(current.children) {
current = unsafe {&mapnode(current.children[current.len])}
2020-02-20 20:04:06 +01:00
}
2020-06-21 16:51:02 +02:00
predecessor := current.keys[current.len - 1]
2020-02-20 20:04:06 +01:00
n.keys[idx] = predecessor
2020-06-21 16:51:02 +02:00
n.values[idx] = current.values[current.len - 1]
mut node := unsafe {&mapnode(n.children[idx])}
node.remove_key(predecessor)
} else if unsafe {&mapnode(n.children[idx + 1])}.len >= degree {
mut current := unsafe {&mapnode(n.children[idx + 1])}
2020-02-20 20:04:06 +01:00
for !isnil(current.children) {
current = unsafe {&mapnode(current.children[0])}
2020-02-20 20:04:06 +01:00
}
successor := current.keys[0]
n.keys[idx] = successor
n.values[idx] = current.values[0]
mut node := unsafe {&mapnode(n.children[idx + 1])}
node.remove_key(successor)
2020-02-20 20:04:06 +01:00
} else {
n.merge(idx)
mut node := unsafe {&mapnode(n.children[idx])}
node.remove_key(k)
2020-02-20 20:04:06 +01:00
}
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) fill(idx int) {
if idx != 0 && unsafe {&mapnode(n.children[idx - 1])}.len >= degree {
2020-02-20 20:04:06 +01:00
n.borrow_from_prev(idx)
} else if idx != n.len && unsafe {&mapnode(n.children[idx + 1])}.len >= degree {
2020-02-20 20:04:06 +01:00
n.borrow_from_next(idx)
2020-06-21 16:51:02 +02:00
} else if idx != n.len {
2020-02-20 20:04:06 +01:00
n.merge(idx)
} else {
n.merge(idx - 1)
}
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) borrow_from_prev(idx int) {
mut child := unsafe {&mapnode(n.children[idx])}
mut sibling := unsafe {&mapnode(n.children[idx - 1])}
2020-06-21 16:51:02 +02:00
for i := child.len - 1; i >= 0; i-- {
2020-02-20 20:04:06 +01:00
child.keys[i + 1] = child.keys[i]
child.values[i + 1] = child.values[i]
}
if !isnil(child.children) {
2020-06-21 16:51:02 +02:00
for i := child.len; i >= 0; i-- {
unsafe {
child.children[i + 1] = child.children[i]
}
2020-02-20 20:04:06 +01:00
}
}
child.keys[0] = n.keys[idx - 1]
child.values[0] = n.values[idx - 1]
if !isnil(child.children) {
unsafe {
child.children[0] = sibling.children[sibling.len]
}
2020-02-20 20:04:06 +01:00
}
2020-06-21 16:51:02 +02:00
n.keys[idx - 1] = sibling.keys[sibling.len - 1]
n.values[idx - 1] = sibling.values[sibling.len - 1]
child.len++
sibling.len--
2020-02-20 20:04:06 +01:00
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) borrow_from_next(idx int) {
mut child := unsafe {&mapnode(n.children[idx])}
mut sibling := unsafe {&mapnode(n.children[idx + 1])}
2020-06-21 16:51:02 +02:00
child.keys[child.len] = n.keys[idx]
child.values[child.len] = n.values[idx]
2020-02-20 20:04:06 +01:00
if !isnil(child.children) {
unsafe {
child.children[child.len + 1] = sibling.children[0]
}
2020-02-20 20:04:06 +01:00
}
n.keys[idx] = sibling.keys[0]
n.values[idx] = sibling.values[0]
2020-06-21 16:51:02 +02:00
for i := 1; i < sibling.len; i++ {
2020-02-20 20:04:06 +01:00
sibling.keys[i - 1] = sibling.keys[i]
sibling.values[i - 1] = sibling.values[i]
}
if !isnil(sibling.children) {
2020-06-21 16:51:02 +02:00
for i := 1; i <= sibling.len; i++ {
unsafe {
sibling.children[i - 1] = sibling.children[i]
}
2020-02-20 20:04:06 +01:00
}
}
2020-06-21 16:51:02 +02:00
child.len++
sibling.len--
2020-02-20 20:04:06 +01:00
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) merge(idx int) {
mut child := unsafe {&mapnode(n.children[idx])}
sibling := unsafe {&mapnode(n.children[idx + 1])}
2020-02-20 20:04:06 +01:00
child.keys[mid_index] = n.keys[idx]
child.values[mid_index] = n.values[idx]
2020-06-21 16:51:02 +02:00
for i in 0..sibling.len {
2020-02-20 20:04:06 +01:00
child.keys[i + degree] = sibling.keys[i]
child.values[i + degree] = sibling.values[i]
}
if !isnil(child.children) {
2020-06-21 16:51:02 +02:00
for i := 0; i <= sibling.len; i++ {
unsafe {
child.children[i + degree] = sibling.children[i]
}
2020-02-20 20:04:06 +01:00
}
}
2020-06-21 16:51:02 +02:00
for i := idx + 1; i < n.len; i++ {
2020-02-20 20:04:06 +01:00
n.keys[i - 1] = n.keys[i]
n.values[i - 1] = n.values[i]
}
2020-06-21 16:51:02 +02:00
for i := idx + 2; i <= n.len; i++ {
unsafe {
n.children[i - 1] = n.children[i]
}
2020-02-20 20:04:06 +01:00
}
2020-06-21 16:51:02 +02:00
child.len += sibling.len + 1
n.len--
2020-02-20 20:04:06 +01:00
// free(sibling)
}
2020-05-17 13:51:18 +02:00
pub fn (mut m SortedMap) delete(key string) {
2020-06-21 16:51:02 +02:00
if m.root.len == 0 {
2020-02-20 20:04:06 +01:00
return
}
removed := m.root.remove_key(key)
if removed {
2020-06-21 16:51:02 +02:00
m.len--
2020-02-20 20:04:06 +01:00
}
2020-06-21 16:51:02 +02:00
if m.root.len == 0 {
2020-02-20 20:04:06 +01:00
// tmp := t.root
if isnil(m.root.children) {
return
} else {
m.root = unsafe {&mapnode(m.root.children[0])}
2020-02-20 20:04:06 +01:00
}
// free(tmp)
}
}
// Insert all keys of the subtree into array `keys`
// starting at `at`. Keys are inserted in order.
2020-06-04 10:35:40 +02:00
fn (n &mapnode) subkeys(mut keys []string, at int) int {
2020-02-20 20:04:06 +01:00
mut position := at
if !isnil(n.children) {
// Traverse children and insert
// keys inbetween children
2020-06-21 16:51:02 +02:00
for i in 0..n.len {
child := unsafe {&mapnode(n.children[i])}
2020-02-20 20:04:06 +01:00
position += child.subkeys(mut keys, position)
keys[position] = n.keys[i]
position++
}
// Insert the keys of the last child
child := unsafe {&mapnode(n.children[n.len])}
2020-02-20 20:04:06 +01:00
position += child.subkeys(mut keys, position)
} else {
// If leaf, insert keys
2020-06-21 16:51:02 +02:00
for i in 0..n.len {
2020-02-20 20:04:06 +01:00
keys[position + i] = n.keys[i]
}
2020-06-21 16:51:02 +02:00
position += n.len
2020-02-20 20:04:06 +01:00
}
// Return # of added keys
return position - at
}
pub fn (m &SortedMap) keys() []string {
mut keys := []string{len:m.len}
2020-06-21 16:51:02 +02:00
if isnil(m.root) || m.root.len == 0 {
2020-02-20 20:04:06 +01:00
return keys
}
m.root.subkeys(mut keys, 0)
return keys
}
2020-05-17 13:51:18 +02:00
fn (mut n mapnode) free() {
2020-02-20 20:04:06 +01:00
println('TODO')
}
2020-05-17 13:51:18 +02:00
pub fn (mut m SortedMap) free() {
2020-02-20 20:04:06 +01:00
if isnil(m.root) {
return
}
m.root.free()
}
pub fn (m SortedMap) print() {
println('TODO')
}
// pub fn (m map_string) str() string {
2020-06-21 16:51:02 +02:00
// if m.len == 0 {
2020-02-20 20:04:06 +01:00
// return '{}'
// }
// mut sb := strings.new_builder(50)
// sb.writeln('{')
// for key, val in m {
// sb.writeln(' "$key" => "$val"')
// }
// sb.writeln('}')
// return sb.str()
// }