v/vlib/v/token/keywords_matcher.v

93 lines
2.4 KiB
V

module token
// bump token.max_keyword_len, if you add a longer keyword
const max_keyword_len = 20
// KeywordsMatcher provides a faster way of determinining whether a given name
// is a reserved word, by doing a comparison with only the keywords that
// have exactly the same length as `name`.
// Benchmarking shows that with -prod, it is 20-25% slower in the worst case
// compared to just using token.keywords[name], but can be 20x faster
// in the case, where there is a length mismatch, and 2x-3x faster in most
// cases, where there is a match.
// Without -prod, with tcc, using KeywordsMatcher is always faster
// (2x to 14x times), compared to using a hash of all the keywords.
pub struct KeywordsMatcher {
mut:
len_min int = 9999
len_max int = -1
words [max_keyword_len][]WIndex
}
struct WIndex {
mut:
word string
index int
}
pub fn new_keywords_matcher<T>(kw_map map[string]T) KeywordsMatcher {
mut km := KeywordsMatcher{}
// TODO: remove this loop. It is currently needed, because a
// fixed array of arrays is not initialised properly automatically
// as of 2021/10/28
for i in 0 .. token.max_keyword_len {
km.words[i] = []WIndex{}
}
for k, v in kw_map {
km.add_word(k, int(v))
}
for i in 0 .. token.max_keyword_len {
if km.words[i].len > 0 {
km.words[i].sort(a.word < b.word)
$if trace_keyword_matcher_initialisation ? {
print('word len: ${i:3} | words: ')
for w in km.words[i] {
print('$w.word, ')
}
println('')
}
}
}
return km
}
fn (mut km KeywordsMatcher) add_word(word string, kind int) {
if word.len >= token.max_keyword_len {
panic('increase max_keyword_len to > $word.len')
}
if km.len_max < word.len {
km.len_max = word.len
}
if word.len < km.len_min {
km.len_min = word.len
}
km.words[word.len] << WIndex{word, kind}
}
// find returns the int index, given a word, by doing a binary search
// on the sorted list of words for each bin
[direct_array_access]
pub fn (km &KeywordsMatcher) find(word string) int {
wlen := word.len
if wlen < km.len_min || wlen > km.len_max {
return -1
}
list_len := km.words[wlen].len
if list_len == 0 {
return -1
}
mut lo := 0
mut hi := list_len - 1
for lo <= hi {
mid := lo + (hi - lo) / 2
cmp := km.words[wlen][mid].word.compare(word)
match cmp {
0 { return km.words[wlen][mid].index }
-1 { lo = mid + 1 }
1 { hi = mid - 1 }
else {}
}
}
return -1
}