map: use untyped keys for DenseArray and interleave keys and values (#7142)

pull/7152/head
Nick Treleaven 2020-12-05 21:53:50 +00:00 committed by GitHub
parent 6b7d7cee0c
commit 0d28f12c54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 96 deletions

View File

@ -1,7 +1,7 @@
module builtin
// <string.h>
fn C.memcpy(byteptr, byteptr, int) voidptr
fn C.memcpy(dest byteptr, src byteptr, n int) voidptr
fn C.memcmp(byteptr, byteptr, int) int

View File

@ -94,107 +94,92 @@ fn fast_string_eq(a string, b string) bool {
// Dynamic array with very low growth factor
struct DenseArray {
key_bytes int
value_bytes int
slot_bytes int // sum of 2 fields above
mut:
cap u32
len u32
deletes u32
keys &string
values byteptr
cap int
len int
deletes u32 // count
data byteptr // array of interspersed key data and value data
}
[inline]
[unsafe]
fn new_dense_array(value_bytes int) DenseArray {
s8size := int(8 * sizeof(string))
fn new_dense_array(key_bytes int, value_bytes int) DenseArray {
slot_bytes := key_bytes + value_bytes
cap := 8
return DenseArray{
key_bytes: key_bytes
value_bytes: value_bytes
cap: 8
slot_bytes: slot_bytes
cap: cap
len: 0
deletes: 0
keys: &string(malloc(s8size))
values: malloc(8 * value_bytes)
data: malloc(cap * slot_bytes)
}
}
[inline]
fn (d &DenseArray) key(i int) voidptr {
return unsafe {voidptr(d.keys + i)}
return unsafe {d.data + i * d.slot_bytes}
}
// for cgen
[inline]
fn (d &DenseArray) value(i int) voidptr {
return unsafe {voidptr(d.values + i * d.value_bytes)}
return unsafe {d.data + i * d.slot_bytes + d.key_bytes}
}
[inline]
fn (d &DenseArray) has_index(i int) bool {
pkey := unsafe {d.keys + i}
// assume string keys for now
pkey := unsafe {&string(d.key(i))}
return pkey.str != 0
}
// Push element to array and return index
// The growth-factor is roughly 1.125 `(x + (x >> 3))`
[inline]
fn (mut d DenseArray) push(key string, value voidptr) u32 {
fn (mut d DenseArray) push(key voidptr, value voidptr) int {
if d.cap == d.len {
d.cap += d.cap >> 3
unsafe {
x := v_realloc(byteptr(d.keys), int(sizeof(string) * d.cap))
d.keys = &string(x)
d.values = v_realloc(byteptr(d.values), d.value_bytes * int(d.cap))
d.data = v_realloc(d.data, d.slot_bytes * d.cap)
}
}
push_index := d.len
unsafe {
d.keys[push_index] = key
C.memcpy(d.values + push_index * u32(d.value_bytes), value, d.value_bytes)
ptr := d.key(push_index)
C.memcpy(ptr, key, d.key_bytes)
C.memcpy(byteptr(ptr) + d.key_bytes, value, d.value_bytes)
}
d.len++
return push_index
}
fn (d DenseArray) get(i int) voidptr {
$if !no_bounds_checking? {
if i < 0 || i >= int(d.len) {
panic('DenseArray.get: index out of range (i == $i, d.len == $d.len)')
}
}
unsafe {
return byteptr(d.keys) + i * int(sizeof(string))
}
}
// Move all zeros to the end of the array and resize array
fn (mut d DenseArray) zeros_to_end() {
mut tmp_value := malloc(d.value_bytes)
mut count := u32(0)
for i in 0 .. int(d.len) {
if unsafe {d.keys[i]}.str != 0 {
// swap keys
// TODO alloca?
mut tmp_buf := malloc(d.slot_bytes)
mut count := 0
for i in 0 .. d.len {
if d.has_index(i) {
// swap (TODO: optimize)
unsafe {
tmp_key := d.keys[count]
d.keys[count] = d.keys[i]
d.keys[i] = tmp_key
}
// swap values (TODO: optimize)
unsafe {
C.memcpy(tmp_value, d.values + count * u32(d.value_bytes), d.value_bytes)
C.memcpy(d.values + count * u32(d.value_bytes), d.values + i * d.value_bytes, d.value_bytes)
C.memcpy(d.values + i * d.value_bytes, tmp_value, d.value_bytes)
C.memcpy(tmp_buf, d.key(count), d.slot_bytes)
C.memcpy(d.key(count), d.key(i), d.slot_bytes)
C.memcpy(d.key(i), tmp_buf, d.slot_bytes)
}
count++
}
}
free(tmp_value)
free(tmp_buf)
d.deletes = 0
d.len = count
d.cap = if count < 8 { u32(8) } else { count }
d.cap = if count < 8 { 8 } else { count }
unsafe {
x := v_realloc(byteptr(d.keys), int(sizeof(string) * d.cap))
d.keys = &string(x)
d.values = v_realloc(byteptr(d.values), d.value_bytes * int(d.cap))
d.data = v_realloc(d.data, d.slot_bytes * d.cap)
}
}
@ -229,7 +214,7 @@ fn new_map_1(value_bytes int) map {
cap: init_cap
cached_hashbits: max_cached_hashbits
shift: init_log_capicity
key_values: new_dense_array(value_bytes)
key_values: new_dense_array(int(sizeof(string)), value_bytes)
metas: &u32(vcalloc(metasize))
extra_metas: extra_metas_inc
len: 0
@ -324,10 +309,12 @@ fn (mut m map) set(k string, value voidptr) {
index,meta = m.meta_less(index, meta)
// While we might have a match
for meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]}
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) {
kv_index := int(unsafe {m.metas[index + 1]})
pkey := unsafe {&string(m.key_values.key(kv_index))}
if fast_string_eq(key, *pkey) {
unsafe {
C.memcpy(m.key_values.values + kv_index * u32(m.value_bytes), value, m.value_bytes)
pval := pkey + 1 // skip string
C.memcpy(pval, value, m.value_bytes)
}
return
}
@ -335,7 +322,7 @@ fn (mut m map) set(k string, value voidptr) {
meta += probe_inc
}
kv_index := m.key_values.push(key, value)
m.meta_greater(index, meta, kv_index)
m.meta_greater(index, meta, u32(kv_index))
m.len++
}
@ -367,13 +354,14 @@ fn (mut m map) rehash() {
m.metas = &u32(x)
C.memset(m.metas, 0, meta_bytes)
}
for i := u32(0); i < m.key_values.len; i++ {
if unsafe {m.key_values.keys[i]}.str == 0 {
for i := 0; i < m.key_values.len; i++ {
if !m.key_values.has_index(i) {
continue
}
mut index,mut meta := m.key_to_index(unsafe {m.key_values.keys[i]})
pkey := unsafe {&string(m.key_values.key(i))}
mut index,mut meta := m.key_to_index(*pkey)
index,meta = m.meta_less(index, meta)
m.meta_greater(index, meta, i)
m.meta_greater(index, meta, u32(i))
}
}
@ -403,18 +391,17 @@ fn (mut m map) cached_rehash(old_cap u32) {
}
// This method is used for assignment operators. If the argument-key
// does not exist in the map, it's added to the map along with the zero/dafault value.
// does not exist in the map, it's added to the map along with the zero/default value.
// If the key exists, its respective value is returned.
fn (mut m map) get_and_set(key string, zero voidptr) voidptr {
for {
mut index,mut meta := m.key_to_index(key)
for {
if meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]}
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) {
unsafe {
return voidptr(m.key_values.values + kv_index * u32(m.value_bytes))
}
kv_index := int(unsafe {m.metas[index + 1]})
pkey := unsafe {&string(m.key_values.key(kv_index))}
if fast_string_eq(key, *pkey) {
return unsafe {byteptr(pkey) + m.key_values.key_bytes}
}
}
index += 2
@ -435,11 +422,10 @@ fn (m map) get(key string, zero voidptr) voidptr {
mut index,mut meta := m.key_to_index(key)
for {
if meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]}
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) {
unsafe {
return voidptr(m.key_values.values + kv_index * u32(m.value_bytes))
}
kv_index := int(unsafe {m.metas[index + 1]})
pkey := unsafe {&string(m.key_values.key(kv_index))}
if fast_string_eq(key, *pkey) {
return unsafe {byteptr(pkey) + m.key_values.key_bytes}
}
}
index += 2
@ -454,8 +440,9 @@ fn (m map) exists(key string) bool {
mut index,mut meta := m.key_to_index(key)
for {
if meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]}
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) {
kv_index := int(unsafe {m.metas[index + 1]})
pkey := unsafe {&string(m.key_values.key(kv_index))}
if fast_string_eq(key, *pkey) {
return true
}
}
@ -472,8 +459,9 @@ pub fn (mut m map) delete(key string) {
index,meta = m.meta_less(index, meta)
// Perform backwards shifting
for meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]}
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) {
kv_index := int(unsafe {m.metas[index + 1]})
pkey := unsafe {&string(m.key_values.key(kv_index))}
if fast_string_eq(key, *pkey) {
for (unsafe {m.metas[index + 2]} >> hashbits) > 1 {
unsafe {
m.metas[index] = m.metas[index + 2] - probe_inc
@ -488,8 +476,8 @@ pub fn (mut m map) delete(key string) {
m.key_values.deletes++
// Mark key as deleted
unsafe {
m.key_values.keys[kv_index].free()
C.memset(&m.key_values.keys[kv_index], 0, sizeof(string))
(*pkey).free()
C.memset(pkey, 0, sizeof(string))
}
if m.key_values.len <= 32 {
return
@ -512,11 +500,12 @@ pub fn (mut m map) delete(key string) {
pub fn (m &map) keys() []string {
mut keys := []string{ len:m.len }
mut j := 0
for i := u32(0); i < m.key_values.len; i++ {
if unsafe {m.key_values.keys[i]}.str == 0 {
for i := 0; i < m.key_values.len; i++ {
if !m.key_values.has_index(i) {
continue
}
keys[j] = unsafe {m.key_values.keys[i]}.clone()
pkey := unsafe {&string(m.key_values.key(i))}
keys[j] = pkey.clone()
j++
}
return keys
@ -524,20 +513,16 @@ pub fn (m &map) keys() []string {
[unsafe]
pub fn (d DenseArray) clone() DenseArray {
ksize := int(d.cap * sizeof(string))
vsize := int(d.cap * u32(d.value_bytes))
res := DenseArray {
key_bytes: d.key_bytes
value_bytes: d.value_bytes
slot_bytes: d.slot_bytes
cap: d.cap
len: d.len
deletes: d.deletes
keys: unsafe {&string(malloc(ksize))}
values: unsafe {byteptr(malloc(vsize))}
}
unsafe {
C.memcpy(res.keys, d.keys, ksize)
C.memcpy(res.values, d.values, vsize)
data: unsafe {memdup(d.data, d.cap * d.slot_bytes)}
}
// FIXME clone each key
return res
}
@ -565,17 +550,17 @@ pub fn (m &map) free() {
unsafe {
free(m.metas)
}
for i := u32(0); i < m.key_values.len; i++ {
if unsafe {m.key_values.keys[i]}.str == 0 {
for i := 0; i < m.key_values.len; i++ {
if !m.key_values.has_index(i) {
continue
}
unsafe {
m.key_values.keys[i].free()
pkey := &string(m.key_values.key(i))
(*pkey).free()
}
}
unsafe {
free(m.key_values.keys)
free(m.key_values.values)
free(m.key_values.data)
}
}

View File

@ -264,7 +264,7 @@ fn (mut g Gen) gen_str_for_map(info table.Map, styp string, str_fn_name string)
g.auto_str_funcs.writeln('static string indent_${str_fn_name}($styp m, int indent_count) { /* gen_str_for_map */')
g.auto_str_funcs.writeln('\tstrings__Builder sb = strings__new_builder(m.key_values.len*10);')
g.auto_str_funcs.writeln('\tstrings__Builder_write(&sb, _SLIT("{"));')
g.auto_str_funcs.writeln('\tfor (unsigned int i = 0; i < m.key_values.len; ++i) {')
g.auto_str_funcs.writeln('\tfor (int i = 0; i < m.key_values.len; ++i) {')
g.auto_str_funcs.writeln('\t\tif (!DenseArray_has_index(&m.key_values, i)) { continue; }')
g.auto_str_funcs.writeln('\t\tstring key = *(string*)DenseArray_key(&m.key_values, i);')
g.auto_str_funcs.writeln('\t\tstrings__Builder_write(&sb, _STR("\'%.*s\\000\'", 2, key));')

View File

@ -1233,7 +1233,7 @@ fn (mut g Gen) for_in(it ast.ForInStmt) {
g.write('$atmp_styp $atmp = ')
g.expr(it.cond)
g.writeln(';')
g.writeln('for (int $idx = 0; $idx < (int)${atmp}.key_values.len; ++$idx) {')
g.writeln('for (int $idx = 0; $idx < ${atmp}.key_values.len; ++$idx) {')
g.writeln('\tif (!DenseArray_has_index(&${atmp}.key_values, $idx)) {continue;}')
if it.key_var != '_' {
key_styp := g.typ(it.key_type)