From 841d82421646408ca4cc3a8b13ab48427d6836c6 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Tue, 1 Oct 2019 02:14:12 +0300 Subject: [PATCH] builtin: a very early version of the hashmap --- compiler/main.v | 1 + vlib/builtin/hashmap.v | 80 +++++++++++++++++++++++++++++++++++++ vlib/builtin/hashmap_test.v | 33 +++++++++++++++ vlib/strings/strings.v | 1 + 4 files changed, 115 insertions(+) create mode 100644 vlib/builtin/hashmap.v create mode 100644 vlib/builtin/hashmap_test.v diff --git a/compiler/main.v b/compiler/main.v index d5a2f4fe62..56e92e176a 100644 --- a/compiler/main.v +++ b/compiler/main.v @@ -875,6 +875,7 @@ fn new_v(args[]string) &V { 'int.v', 'utf8.v', 'map.v', + 'hashmap.v', 'option.v', ] //println(builtins) diff --git a/vlib/builtin/hashmap.v b/vlib/builtin/hashmap.v new file mode 100644 index 0000000000..c0ebb5a1ab --- /dev/null +++ b/vlib/builtin/hashmap.v @@ -0,0 +1,80 @@ +// Copyright (c) 2019 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +module builtin + +/* + This is work in progress. + A very early test version of the hashmap with a fixed size. + Only works with string keys and int values for now. +*/ + +import math + +struct hashmap { + cap int + keys []string + table []hashmapentry + elm_size int +pub: + nr_collisions int +} + +struct hashmapentry { + key string + val int + next &hashmapentry // linked list for collisions +} + +const ( + min_cap = 2 << 10 + max_cap = 2 << 20 +) + +fn new_hashmap(planned_nr_items int) hashmap { + mut cap := planned_nr_items * 3 + if cap < min_cap { + cap = min_cap + } + if cap > max_cap { + cap = max_cap + } + return hashmap{ + cap: cap + elm_size: 4 + table: _make(cap, cap, sizeof(hashmapentry)) + } +} + +fn (m mut hashmap) set(key string, val int) { + hash := int(math.abs(key.hash())) + idx := hash % m.cap + if m.table[idx].key.len != 0 { + //println('\nset() idx=$idx key="$key" hash="$hash" val=$val') + m.nr_collisions++ + //println('collision:' + m.table[idx].key) + mut e := &m.table[idx] + for e.next != 0 { + e = e.next + } + e.next = &hashmapentry{key, val, 0} + } else { + m.table[idx] = hashmapentry{key, val, 0} + } +} + +fn (m mut hashmap) get(key string) int { + hash := int(math.abs(key.hash())) + idx := hash % m.cap + mut e := &m.table[idx] + for e.next != 0 { // todo unsafe { + if e.key == key { + return e.val + } + e = e.next + } + return e.val +} + + diff --git a/vlib/builtin/hashmap_test.v b/vlib/builtin/hashmap_test.v new file mode 100644 index 0000000000..4bb588342d --- /dev/null +++ b/vlib/builtin/hashmap_test.v @@ -0,0 +1,33 @@ +import rand +import strings + +fn test_random_strings() { + mut m := new_hashmap(1000) + for i in 0..1000 { + mut buf := []byte + for j in 0..10 { + buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) + } + s := string(buf) + //println(s) + m.set(s, i) + assert m.get(s) == i + } + m.set('foo', 12) + val := m.get('foo') + assert val == 12 +} + +fn test_large_hashmap() { + N := 300 * 1000 + mut nums := new_hashmap(N) + for i := 0; i < N; i++ { + key := i.str() + nums.set(key, i) + } + println('nr collisions: $nums.nr_collisions') + for i := 0; i < N; i++ { + key := i.str() + assert nums.get(key) == i + } +} diff --git a/vlib/strings/strings.v b/vlib/strings/strings.v index fcb8044f9b..d0507142ea 100644 --- a/vlib/strings/strings.v +++ b/vlib/strings/strings.v @@ -8,3 +8,4 @@ pub fn repeat(c byte, n int) string { arr[n] = `\0` return string(arr, n) } +