map: use untyped keys for DenseArray and interleave keys and values (#7142)

pull/7152/head
Nick Treleaven 2020-12-05 21:53:50 +00:00 committed by GitHub
parent 6b7d7cee0c
commit 0d28f12c54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 96 deletions

View File

@ -1,7 +1,7 @@
module builtin module builtin
// <string.h> // <string.h>
fn C.memcpy(byteptr, byteptr, int) voidptr fn C.memcpy(dest byteptr, src byteptr, n int) voidptr
fn C.memcmp(byteptr, byteptr, int) int fn C.memcmp(byteptr, byteptr, int) int

View File

@ -94,107 +94,92 @@ fn fast_string_eq(a string, b string) bool {
// Dynamic array with very low growth factor // Dynamic array with very low growth factor
struct DenseArray { struct DenseArray {
key_bytes int
value_bytes int value_bytes int
slot_bytes int // sum of 2 fields above
mut: mut:
cap u32 cap int
len u32 len int
deletes u32 deletes u32 // count
keys &string data byteptr // array of interspersed key data and value data
values byteptr
} }
[inline] [inline]
[unsafe] [unsafe]
fn new_dense_array(value_bytes int) DenseArray { fn new_dense_array(key_bytes int, value_bytes int) DenseArray {
s8size := int(8 * sizeof(string)) slot_bytes := key_bytes + value_bytes
cap := 8
return DenseArray{ return DenseArray{
key_bytes: key_bytes
value_bytes: value_bytes value_bytes: value_bytes
cap: 8 slot_bytes: slot_bytes
cap: cap
len: 0 len: 0
deletes: 0 deletes: 0
keys: &string(malloc(s8size)) data: malloc(cap * slot_bytes)
values: malloc(8 * value_bytes)
} }
} }
[inline] [inline]
fn (d &DenseArray) key(i int) voidptr { fn (d &DenseArray) key(i int) voidptr {
return unsafe {voidptr(d.keys + i)} return unsafe {d.data + i * d.slot_bytes}
} }
// for cgen // for cgen
[inline] [inline]
fn (d &DenseArray) value(i int) voidptr { fn (d &DenseArray) value(i int) voidptr {
return unsafe {voidptr(d.values + i * d.value_bytes)} return unsafe {d.data + i * d.slot_bytes + d.key_bytes}
} }
[inline] [inline]
fn (d &DenseArray) has_index(i int) bool { fn (d &DenseArray) has_index(i int) bool {
pkey := unsafe {d.keys + i} // assume string keys for now
pkey := unsafe {&string(d.key(i))}
return pkey.str != 0 return pkey.str != 0
} }
// Push element to array and return index // Push element to array and return index
// The growth-factor is roughly 1.125 `(x + (x >> 3))` // The growth-factor is roughly 1.125 `(x + (x >> 3))`
[inline] [inline]
fn (mut d DenseArray) push(key string, value voidptr) u32 { fn (mut d DenseArray) push(key voidptr, value voidptr) int {
if d.cap == d.len { if d.cap == d.len {
d.cap += d.cap >> 3 d.cap += d.cap >> 3
unsafe { unsafe {
x := v_realloc(byteptr(d.keys), int(sizeof(string) * d.cap)) d.data = v_realloc(d.data, d.slot_bytes * d.cap)
d.keys = &string(x)
d.values = v_realloc(byteptr(d.values), d.value_bytes * int(d.cap))
} }
} }
push_index := d.len push_index := d.len
unsafe { unsafe {
d.keys[push_index] = key ptr := d.key(push_index)
C.memcpy(d.values + push_index * u32(d.value_bytes), value, d.value_bytes) C.memcpy(ptr, key, d.key_bytes)
C.memcpy(byteptr(ptr) + d.key_bytes, value, d.value_bytes)
} }
d.len++ d.len++
return push_index return push_index
} }
fn (d DenseArray) get(i int) voidptr {
$if !no_bounds_checking? {
if i < 0 || i >= int(d.len) {
panic('DenseArray.get: index out of range (i == $i, d.len == $d.len)')
}
}
unsafe {
return byteptr(d.keys) + i * int(sizeof(string))
}
}
// Move all zeros to the end of the array and resize array // Move all zeros to the end of the array and resize array
fn (mut d DenseArray) zeros_to_end() { fn (mut d DenseArray) zeros_to_end() {
mut tmp_value := malloc(d.value_bytes) // TODO alloca?
mut count := u32(0) mut tmp_buf := malloc(d.slot_bytes)
for i in 0 .. int(d.len) { mut count := 0
if unsafe {d.keys[i]}.str != 0 { for i in 0 .. d.len {
// swap keys if d.has_index(i) {
// swap (TODO: optimize)
unsafe { unsafe {
tmp_key := d.keys[count] C.memcpy(tmp_buf, d.key(count), d.slot_bytes)
d.keys[count] = d.keys[i] C.memcpy(d.key(count), d.key(i), d.slot_bytes)
d.keys[i] = tmp_key C.memcpy(d.key(i), tmp_buf, d.slot_bytes)
}
// swap values (TODO: optimize)
unsafe {
C.memcpy(tmp_value, d.values + count * u32(d.value_bytes), d.value_bytes)
C.memcpy(d.values + count * u32(d.value_bytes), d.values + i * d.value_bytes, d.value_bytes)
C.memcpy(d.values + i * d.value_bytes, tmp_value, d.value_bytes)
} }
count++ count++
} }
} }
free(tmp_value) free(tmp_buf)
d.deletes = 0 d.deletes = 0
d.len = count d.len = count
d.cap = if count < 8 { u32(8) } else { count } d.cap = if count < 8 { 8 } else { count }
unsafe { unsafe {
x := v_realloc(byteptr(d.keys), int(sizeof(string) * d.cap)) d.data = v_realloc(d.data, d.slot_bytes * d.cap)
d.keys = &string(x)
d.values = v_realloc(byteptr(d.values), d.value_bytes * int(d.cap))
} }
} }
@ -229,7 +214,7 @@ fn new_map_1(value_bytes int) map {
cap: init_cap cap: init_cap
cached_hashbits: max_cached_hashbits cached_hashbits: max_cached_hashbits
shift: init_log_capicity shift: init_log_capicity
key_values: new_dense_array(value_bytes) key_values: new_dense_array(int(sizeof(string)), value_bytes)
metas: &u32(vcalloc(metasize)) metas: &u32(vcalloc(metasize))
extra_metas: extra_metas_inc extra_metas: extra_metas_inc
len: 0 len: 0
@ -324,10 +309,12 @@ fn (mut m map) set(k string, value voidptr) {
index,meta = m.meta_less(index, meta) index,meta = m.meta_less(index, meta)
// While we might have a match // While we might have a match
for meta == unsafe {m.metas[index]} { for meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]} kv_index := int(unsafe {m.metas[index + 1]})
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { pkey := unsafe {&string(m.key_values.key(kv_index))}
if fast_string_eq(key, *pkey) {
unsafe { unsafe {
C.memcpy(m.key_values.values + kv_index * u32(m.value_bytes), value, m.value_bytes) pval := pkey + 1 // skip string
C.memcpy(pval, value, m.value_bytes)
} }
return return
} }
@ -335,7 +322,7 @@ fn (mut m map) set(k string, value voidptr) {
meta += probe_inc meta += probe_inc
} }
kv_index := m.key_values.push(key, value) kv_index := m.key_values.push(key, value)
m.meta_greater(index, meta, kv_index) m.meta_greater(index, meta, u32(kv_index))
m.len++ m.len++
} }
@ -367,13 +354,14 @@ fn (mut m map) rehash() {
m.metas = &u32(x) m.metas = &u32(x)
C.memset(m.metas, 0, meta_bytes) C.memset(m.metas, 0, meta_bytes)
} }
for i := u32(0); i < m.key_values.len; i++ { for i := 0; i < m.key_values.len; i++ {
if unsafe {m.key_values.keys[i]}.str == 0 { if !m.key_values.has_index(i) {
continue continue
} }
mut index,mut meta := m.key_to_index(unsafe {m.key_values.keys[i]}) pkey := unsafe {&string(m.key_values.key(i))}
mut index,mut meta := m.key_to_index(*pkey)
index,meta = m.meta_less(index, meta) index,meta = m.meta_less(index, meta)
m.meta_greater(index, meta, i) m.meta_greater(index, meta, u32(i))
} }
} }
@ -403,18 +391,17 @@ fn (mut m map) cached_rehash(old_cap u32) {
} }
// This method is used for assignment operators. If the argument-key // This method is used for assignment operators. If the argument-key
// does not exist in the map, it's added to the map along with the zero/dafault value. // does not exist in the map, it's added to the map along with the zero/default value.
// If the key exists, its respective value is returned. // If the key exists, its respective value is returned.
fn (mut m map) get_and_set(key string, zero voidptr) voidptr { fn (mut m map) get_and_set(key string, zero voidptr) voidptr {
for { for {
mut index,mut meta := m.key_to_index(key) mut index,mut meta := m.key_to_index(key)
for { for {
if meta == unsafe {m.metas[index]} { if meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]} kv_index := int(unsafe {m.metas[index + 1]})
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { pkey := unsafe {&string(m.key_values.key(kv_index))}
unsafe { if fast_string_eq(key, *pkey) {
return voidptr(m.key_values.values + kv_index * u32(m.value_bytes)) return unsafe {byteptr(pkey) + m.key_values.key_bytes}
}
} }
} }
index += 2 index += 2
@ -435,11 +422,10 @@ fn (m map) get(key string, zero voidptr) voidptr {
mut index,mut meta := m.key_to_index(key) mut index,mut meta := m.key_to_index(key)
for { for {
if meta == unsafe {m.metas[index]} { if meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]} kv_index := int(unsafe {m.metas[index + 1]})
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { pkey := unsafe {&string(m.key_values.key(kv_index))}
unsafe { if fast_string_eq(key, *pkey) {
return voidptr(m.key_values.values + kv_index * u32(m.value_bytes)) return unsafe {byteptr(pkey) + m.key_values.key_bytes}
}
} }
} }
index += 2 index += 2
@ -454,9 +440,10 @@ fn (m map) exists(key string) bool {
mut index,mut meta := m.key_to_index(key) mut index,mut meta := m.key_to_index(key)
for { for {
if meta == unsafe {m.metas[index]} { if meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]} kv_index := int(unsafe {m.metas[index + 1]})
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { pkey := unsafe {&string(m.key_values.key(kv_index))}
return true if fast_string_eq(key, *pkey) {
return true
} }
} }
index += 2 index += 2
@ -472,8 +459,9 @@ pub fn (mut m map) delete(key string) {
index,meta = m.meta_less(index, meta) index,meta = m.meta_less(index, meta)
// Perform backwards shifting // Perform backwards shifting
for meta == unsafe {m.metas[index]} { for meta == unsafe {m.metas[index]} {
kv_index := unsafe {m.metas[index + 1]} kv_index := int(unsafe {m.metas[index + 1]})
if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { pkey := unsafe {&string(m.key_values.key(kv_index))}
if fast_string_eq(key, *pkey) {
for (unsafe {m.metas[index + 2]} >> hashbits) > 1 { for (unsafe {m.metas[index + 2]} >> hashbits) > 1 {
unsafe { unsafe {
m.metas[index] = m.metas[index + 2] - probe_inc m.metas[index] = m.metas[index + 2] - probe_inc
@ -488,8 +476,8 @@ pub fn (mut m map) delete(key string) {
m.key_values.deletes++ m.key_values.deletes++
// Mark key as deleted // Mark key as deleted
unsafe { unsafe {
m.key_values.keys[kv_index].free() (*pkey).free()
C.memset(&m.key_values.keys[kv_index], 0, sizeof(string)) C.memset(pkey, 0, sizeof(string))
} }
if m.key_values.len <= 32 { if m.key_values.len <= 32 {
return return
@ -512,11 +500,12 @@ pub fn (mut m map) delete(key string) {
pub fn (m &map) keys() []string { pub fn (m &map) keys() []string {
mut keys := []string{ len:m.len } mut keys := []string{ len:m.len }
mut j := 0 mut j := 0
for i := u32(0); i < m.key_values.len; i++ { for i := 0; i < m.key_values.len; i++ {
if unsafe {m.key_values.keys[i]}.str == 0 { if !m.key_values.has_index(i) {
continue continue
} }
keys[j] = unsafe {m.key_values.keys[i]}.clone() pkey := unsafe {&string(m.key_values.key(i))}
keys[j] = pkey.clone()
j++ j++
} }
return keys return keys
@ -524,20 +513,16 @@ pub fn (m &map) keys() []string {
[unsafe] [unsafe]
pub fn (d DenseArray) clone() DenseArray { pub fn (d DenseArray) clone() DenseArray {
ksize := int(d.cap * sizeof(string))
vsize := int(d.cap * u32(d.value_bytes))
res := DenseArray { res := DenseArray {
key_bytes: d.key_bytes
value_bytes: d.value_bytes value_bytes: d.value_bytes
slot_bytes: d.slot_bytes
cap: d.cap cap: d.cap
len: d.len len: d.len
deletes: d.deletes deletes: d.deletes
keys: unsafe {&string(malloc(ksize))} data: unsafe {memdup(d.data, d.cap * d.slot_bytes)}
values: unsafe {byteptr(malloc(vsize))}
}
unsafe {
C.memcpy(res.keys, d.keys, ksize)
C.memcpy(res.values, d.values, vsize)
} }
// FIXME clone each key
return res return res
} }
@ -552,7 +537,7 @@ pub fn (m map) clone() map {
key_values: unsafe {m.key_values.clone()} key_values: unsafe {m.key_values.clone()}
metas: &u32(malloc(metasize)) metas: &u32(malloc(metasize))
extra_metas: m.extra_metas extra_metas: m.extra_metas
len: m.len len: m.len
} }
unsafe { unsafe {
C.memcpy(res.metas, m.metas, metasize) C.memcpy(res.metas, m.metas, metasize)
@ -565,17 +550,17 @@ pub fn (m &map) free() {
unsafe { unsafe {
free(m.metas) free(m.metas)
} }
for i := u32(0); i < m.key_values.len; i++ { for i := 0; i < m.key_values.len; i++ {
if unsafe {m.key_values.keys[i]}.str == 0 { if !m.key_values.has_index(i) {
continue continue
} }
unsafe { unsafe {
m.key_values.keys[i].free() pkey := &string(m.key_values.key(i))
(*pkey).free()
} }
} }
unsafe { unsafe {
free(m.key_values.keys) free(m.key_values.data)
free(m.key_values.values)
} }
} }

View File

@ -264,7 +264,7 @@ fn (mut g Gen) gen_str_for_map(info table.Map, styp string, str_fn_name string)
g.auto_str_funcs.writeln('static string indent_${str_fn_name}($styp m, int indent_count) { /* gen_str_for_map */') g.auto_str_funcs.writeln('static string indent_${str_fn_name}($styp m, int indent_count) { /* gen_str_for_map */')
g.auto_str_funcs.writeln('\tstrings__Builder sb = strings__new_builder(m.key_values.len*10);') g.auto_str_funcs.writeln('\tstrings__Builder sb = strings__new_builder(m.key_values.len*10);')
g.auto_str_funcs.writeln('\tstrings__Builder_write(&sb, _SLIT("{"));') g.auto_str_funcs.writeln('\tstrings__Builder_write(&sb, _SLIT("{"));')
g.auto_str_funcs.writeln('\tfor (unsigned int i = 0; i < m.key_values.len; ++i) {') g.auto_str_funcs.writeln('\tfor (int i = 0; i < m.key_values.len; ++i) {')
g.auto_str_funcs.writeln('\t\tif (!DenseArray_has_index(&m.key_values, i)) { continue; }') g.auto_str_funcs.writeln('\t\tif (!DenseArray_has_index(&m.key_values, i)) { continue; }')
g.auto_str_funcs.writeln('\t\tstring key = *(string*)DenseArray_key(&m.key_values, i);') g.auto_str_funcs.writeln('\t\tstring key = *(string*)DenseArray_key(&m.key_values, i);')
g.auto_str_funcs.writeln('\t\tstrings__Builder_write(&sb, _STR("\'%.*s\\000\'", 2, key));') g.auto_str_funcs.writeln('\t\tstrings__Builder_write(&sb, _STR("\'%.*s\\000\'", 2, key));')

View File

@ -1233,7 +1233,7 @@ fn (mut g Gen) for_in(it ast.ForInStmt) {
g.write('$atmp_styp $atmp = ') g.write('$atmp_styp $atmp = ')
g.expr(it.cond) g.expr(it.cond)
g.writeln(';') g.writeln(';')
g.writeln('for (int $idx = 0; $idx < (int)${atmp}.key_values.len; ++$idx) {') g.writeln('for (int $idx = 0; $idx < ${atmp}.key_values.len; ++$idx) {')
g.writeln('\tif (!DenseArray_has_index(&${atmp}.key_values, $idx)) {continue;}') g.writeln('\tif (!DenseArray_has_index(&${atmp}.key_values, $idx)) {continue;}')
if it.key_var != '_' { if it.key_var != '_' {
key_styp := g.typ(it.key_type) key_styp := g.typ(it.key_type)