v.token,ast,scanner,parser: use a KeywordsMatcher lookup for builtin_type_names searches too

pull/12407/head
Delyan Angelov 2021-11-07 16:02:37 +02:00
parent 80242c8041
commit 1c12186701
No known key found for this signature in database
GPG Key ID: 66886C0F12D595ED
7 changed files with 55 additions and 43 deletions

View File

@ -2103,3 +2103,26 @@ pub fn (expr Expr) is_literal() bool {
} }
} }
} }
pub fn type_can_start_with_token(tok &token.Token) bool {
match tok.kind {
.name {
return (tok.lit.len > 0 && tok.lit[0].is_capital())
|| builtin_type_names_matcher.find(tok.lit) > 0
}
// Note: return type (T1, T2) should be handled elsewhere
.amp, .key_fn, .lsbr, .question {
return true
}
else {}
}
return false
}
fn build_builtin_type_names_matcher() token.KeywordsMatcher {
mut m := map[string]int{}
for i, name in builtin_type_names {
m[name] = i
}
return token.new_keywords_matcher<int>(m)
}

View File

@ -427,6 +427,13 @@ pub const (
u8_type_idx = 30 u8_type_idx = 30
) )
// NB: builtin_type_names must be in the same order as the idx consts above
pub const builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64',
'isize', 'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string',
'rune', 'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
pub const builtin_type_names_matcher = build_builtin_type_names_matcher()
pub const ( pub const (
integer_type_idxs = [i8_type_idx, i16_type_idx, int_type_idx, i64_type_idx, integer_type_idxs = [i8_type_idx, i16_type_idx, int_type_idx, i64_type_idx,
byte_type_idx, u8_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, isize_type_idx, byte_type_idx, u8_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, isize_type_idx,
@ -489,13 +496,6 @@ pub fn merge_types(params ...[]Type) []Type {
return res return res
} }
pub const (
// must be in the same order as the idx consts above
builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64', 'isize',
'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string', 'rune',
'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
)
pub struct MultiReturn { pub struct MultiReturn {
pub mut: pub mut:
types []Type types []Type

View File

@ -180,7 +180,7 @@ fn (mut p Parser) match_expr() ast.MatchExpr {
is_else = true is_else = true
p.next() p.next()
} else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot) } else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot)
&& (((p.tok.lit in ast.builtin_type_names || p.tok.lit[0].is_capital()) && (((ast.builtin_type_names_matcher.find(p.tok.lit) > 0 || p.tok.lit[0].is_capital())
&& p.peek_tok.kind != .lpar) || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0 && p.peek_tok.kind != .lpar) || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0
&& p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr { && p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr {
mut types := []ast.Type{} mut types := []ast.Type{}

View File

@ -1961,7 +1961,7 @@ fn (p &Parser) is_generic_call() bool {
const valid_tokens_inside_types = [token.Kind.lsbr, .rsbr, .name, .dot, .comma, .key_fn, .lt] const valid_tokens_inside_types = [token.Kind.lsbr, .rsbr, .name, .dot, .comma, .key_fn, .lt]
fn (mut p Parser) is_generic_cast() bool { fn (mut p Parser) is_generic_cast() bool {
if !p.tok.can_start_type(ast.builtin_type_names) { if !ast.type_can_start_with_token(p.tok) {
return false return false
} }
mut i := 0 mut i := 0
@ -2160,7 +2160,7 @@ pub fn (mut p Parser) name_expr() ast.Expr {
} }
name_w_mod := p.prepend_mod(name) name_w_mod := p.prepend_mod(name)
// type cast. TODO: finish // type cast. TODO: finish
// if name in ast.builtin_type_names { // if name in ast.builtin_type_names_to_idx {
if (!known_var && (name in p.table.type_idxs || name_w_mod in p.table.type_idxs) if (!known_var && (name in p.table.type_idxs || name_w_mod in p.table.type_idxs)
&& name !in ['C.stat', 'C.sigaction']) || is_mod_cast || is_generic_cast && name !in ['C.stat', 'C.sigaction']) || is_mod_cast || is_generic_cast
|| (language == .v && name.len > 0 && name[0].is_capital()) { || (language == .v && name.len > 0 && name[0].is_capital()) {

View File

@ -683,8 +683,8 @@ fn (mut s Scanner) text_scan() token.Token {
// Check if not .eof to prevent panic // Check if not .eof to prevent panic
next_char := s.look_ahead(1) next_char := s.look_ahead(1)
kind := token.matcher.find(name) kind := token.matcher.find(name)
if kind != .unknown { if kind != -1 {
return s.new_token(kind, name, name.len) return s.new_token(token.Kind(kind), name, name.len)
} }
// 'asdf $b' => "b" is the last name in the string, dont start parsing string // 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it // at the next ', skip it
@ -942,7 +942,8 @@ fn (mut s Scanner) text_scan() token.Token {
// if any typ is neither Type nor builtin, then the case is non-generic // if any typ is neither Type nor builtin, then the case is non-generic
typs.all(it.len > 0 typs.all(it.len > 0
&& ((it[0].is_capital() && it[1..].bytes().all(it.is_alnum() && ((it[0].is_capital() && it[1..].bytes().all(it.is_alnum()
|| it == `_`)) || it in ast.builtin_type_names)) || it == `_`))
|| ast.builtin_type_names_matcher.find(it) > 0))
} else { } else {
false false
} }

View File

@ -1,10 +1,10 @@
module token module token
// bump token.max_keyword_len, if you add a longer keyword // bump token.max_keyword_len, if you add a longer keyword
const max_keyword_len = 11 const max_keyword_len = 20
// KeywordsMatcher provides a faster way of determinining whether a given name // KeywordsMatcher provides a faster way of determinining whether a given name
// is a reserved keyword, by doing a comparison with only the keywords that // is a reserved word, by doing a comparison with only the keywords that
// have exactly the same length as `name`. // have exactly the same length as `name`.
// Benchmarking shows that with -prod, it is 20-25% slower in the worst case // Benchmarking shows that with -prod, it is 20-25% slower in the worst case
// compared to just using token.keywords[name], but can be 20x faster // compared to just using token.keywords[name], but can be 20x faster
@ -16,25 +16,25 @@ pub struct KeywordsMatcher {
mut: mut:
len_min int = 9999 len_min int = 9999
len_max int = -1 len_max int = -1
words [max_keyword_len][]WKind words [max_keyword_len][]WIndex
} }
struct WKind { struct WIndex {
mut: mut:
word string word string
kind Kind index int
} }
pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher { pub fn new_keywords_matcher<T>(kw_map map[string]T) KeywordsMatcher {
mut km := KeywordsMatcher{} mut km := KeywordsMatcher{}
// TODO: remove this loop. It is currently needed, because a // TODO: remove this loop. It is currently needed, because a
// fixed array of arrays is not initialised properly automatically // fixed array of arrays is not initialised properly automatically
// as of 2021/10/28 // as of 2021/10/28
for i in 0 .. token.max_keyword_len { for i in 0 .. token.max_keyword_len {
km.words[i] = []WKind{} km.words[i] = []WIndex{}
} }
for k, v in kw_map { for k, v in kw_map {
km.add_word(k, v) km.add_word(k, int(v))
} }
for i in 0 .. token.max_keyword_len { for i in 0 .. token.max_keyword_len {
if km.words[i].len > 0 { if km.words[i].len > 0 {
@ -51,7 +51,7 @@ pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher {
return km return km
} }
fn (mut km KeywordsMatcher) add_word(word string, kind Kind) { fn (mut km KeywordsMatcher) add_word(word string, kind int) {
if word.len >= token.max_keyword_len { if word.len >= token.max_keyword_len {
panic('increase max_keyword_len to > $word.len') panic('increase max_keyword_len to > $word.len')
} }
@ -61,20 +61,20 @@ fn (mut km KeywordsMatcher) add_word(word string, kind Kind) {
if word.len < km.len_min { if word.len < km.len_min {
km.len_min = word.len km.len_min = word.len
} }
km.words[word.len] << WKind{word, kind} km.words[word.len] << WIndex{word, kind}
} }
// find returns the Kind given a word, by doing a binary search // find returns the int index, given a word, by doing a binary search
// on the sorted list of words for each bin // on the sorted list of words for each bin
[direct_array_access] [direct_array_access]
pub fn (km &KeywordsMatcher) find(word string) Kind { pub fn (km &KeywordsMatcher) find(word string) int {
wlen := word.len wlen := word.len
if wlen < km.len_min || wlen > km.len_max { if wlen < km.len_min || wlen > km.len_max {
return Kind.unknown return -1
} }
list_len := km.words[wlen].len list_len := km.words[wlen].len
if list_len == 0 { if list_len == 0 {
return Kind.unknown return -1
} }
mut lo := 0 mut lo := 0
mut hi := list_len - 1 mut hi := list_len - 1
@ -82,11 +82,11 @@ pub fn (km &KeywordsMatcher) find(word string) Kind {
mid := lo + (hi - lo) / 2 mid := lo + (hi - lo) / 2
cmp := km.words[wlen][mid].word.compare(word) cmp := km.words[wlen][mid].word.compare(word)
match cmp { match cmp {
0 { return km.words[wlen][mid].kind } 0 { return km.words[wlen][mid].index }
-1 { lo = mid + 1 } -1 { lo = mid + 1 }
1 { hi = mid - 1 } 1 { hi = mid - 1 }
else {} else {}
} }
} }
return Kind.unknown return -1
} }

View File

@ -313,7 +313,7 @@ const token_str = build_token_str()
pub const keywords = build_keys() pub const keywords = build_keys()
pub const matcher = new_keywords_matcher(keywords) pub const matcher = new_keywords_matcher<Kind>(keywords)
[inline] [inline]
pub fn is_key(key string) bool { pub fn is_key(key string) bool {
@ -472,15 +472,3 @@ pub fn (kind Kind) is_infix() bool {
.le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift, .le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift,
.right_shift, .unsigned_right_shift, .arrow] .right_shift, .unsigned_right_shift, .arrow]
} }
// Pass ast.builtin_type_names
// Note: can't import table here due to circular module dependency
pub fn (tok &Token) can_start_type(builtin_types []string) bool {
match tok.kind {
.name { return (tok.lit.len > 0 && tok.lit[0].is_capital()) || tok.lit in builtin_types }
// Note: return type (T1, T2) should be handled elsewhere
.amp, .key_fn, .lsbr, .question { return true }
else {}
}
return false
}