From 1c121867018c18984b4688264a5f9f5abbcd60a0 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Sun, 7 Nov 2021 16:02:37 +0200 Subject: [PATCH] v.token,ast,scanner,parser: use a KeywordsMatcher lookup for builtin_type_names searches too --- vlib/v/ast/ast.v | 23 ++++++++++++++++++++++ vlib/v/ast/types.v | 14 +++++++------- vlib/v/parser/if_match.v | 2 +- vlib/v/parser/parser.v | 4 ++-- vlib/v/scanner/scanner.v | 7 ++++--- vlib/v/token/keywords_matcher.v | 34 ++++++++++++++++----------------- vlib/v/token/token.v | 14 +------------- 7 files changed, 55 insertions(+), 43 deletions(-) diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v index 33f2a05953..88eed350f1 100644 --- a/vlib/v/ast/ast.v +++ b/vlib/v/ast/ast.v @@ -2103,3 +2103,26 @@ pub fn (expr Expr) is_literal() bool { } } } + +pub fn type_can_start_with_token(tok &token.Token) bool { + match tok.kind { + .name { + return (tok.lit.len > 0 && tok.lit[0].is_capital()) + || builtin_type_names_matcher.find(tok.lit) > 0 + } + // Note: return type (T1, T2) should be handled elsewhere + .amp, .key_fn, .lsbr, .question { + return true + } + else {} + } + return false +} + +fn build_builtin_type_names_matcher() token.KeywordsMatcher { + mut m := map[string]int{} + for i, name in builtin_type_names { + m[name] = i + } + return token.new_keywords_matcher(m) +} diff --git a/vlib/v/ast/types.v b/vlib/v/ast/types.v index ab3ae1ec38..3ff9ed618d 100644 --- a/vlib/v/ast/types.v +++ b/vlib/v/ast/types.v @@ -427,6 +427,13 @@ pub const ( u8_type_idx = 30 ) +// NB: builtin_type_names must be in the same order as the idx consts above +pub const builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64', + 'isize', 'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string', + 'rune', 'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8'] + +pub const builtin_type_names_matcher = build_builtin_type_names_matcher() + pub const ( integer_type_idxs = [i8_type_idx, i16_type_idx, int_type_idx, i64_type_idx, byte_type_idx, u8_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, isize_type_idx, @@ -489,13 +496,6 @@ pub fn merge_types(params ...[]Type) []Type { return res } -pub const ( - // must be in the same order as the idx consts above - builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64', 'isize', - 'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string', 'rune', - 'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8'] -) - pub struct MultiReturn { pub mut: types []Type diff --git a/vlib/v/parser/if_match.v b/vlib/v/parser/if_match.v index 99df1811cf..7325ad44ff 100644 --- a/vlib/v/parser/if_match.v +++ b/vlib/v/parser/if_match.v @@ -180,7 +180,7 @@ fn (mut p Parser) match_expr() ast.MatchExpr { is_else = true p.next() } else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot) - && (((p.tok.lit in ast.builtin_type_names || p.tok.lit[0].is_capital()) + && (((ast.builtin_type_names_matcher.find(p.tok.lit) > 0 || p.tok.lit[0].is_capital()) && p.peek_tok.kind != .lpar) || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0 && p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr { mut types := []ast.Type{} diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v index 22c7e62df8..788d7b8ca1 100644 --- a/vlib/v/parser/parser.v +++ b/vlib/v/parser/parser.v @@ -1961,7 +1961,7 @@ fn (p &Parser) is_generic_call() bool { const valid_tokens_inside_types = [token.Kind.lsbr, .rsbr, .name, .dot, .comma, .key_fn, .lt] fn (mut p Parser) is_generic_cast() bool { - if !p.tok.can_start_type(ast.builtin_type_names) { + if !ast.type_can_start_with_token(p.tok) { return false } mut i := 0 @@ -2160,7 +2160,7 @@ pub fn (mut p Parser) name_expr() ast.Expr { } name_w_mod := p.prepend_mod(name) // type cast. TODO: finish - // if name in ast.builtin_type_names { + // if name in ast.builtin_type_names_to_idx { if (!known_var && (name in p.table.type_idxs || name_w_mod in p.table.type_idxs) && name !in ['C.stat', 'C.sigaction']) || is_mod_cast || is_generic_cast || (language == .v && name.len > 0 && name[0].is_capital()) { diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 3116ede07e..3f2fa710d6 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -683,8 +683,8 @@ fn (mut s Scanner) text_scan() token.Token { // Check if not .eof to prevent panic next_char := s.look_ahead(1) kind := token.matcher.find(name) - if kind != .unknown { - return s.new_token(kind, name, name.len) + if kind != -1 { + return s.new_token(token.Kind(kind), name, name.len) } // 'asdf $b' => "b" is the last name in the string, dont start parsing string // at the next ', skip it @@ -942,7 +942,8 @@ fn (mut s Scanner) text_scan() token.Token { // if any typ is neither Type nor builtin, then the case is non-generic typs.all(it.len > 0 && ((it[0].is_capital() && it[1..].bytes().all(it.is_alnum() - || it == `_`)) || it in ast.builtin_type_names)) + || it == `_`)) + || ast.builtin_type_names_matcher.find(it) > 0)) } else { false } diff --git a/vlib/v/token/keywords_matcher.v b/vlib/v/token/keywords_matcher.v index 50d737c528..772d0ea85f 100644 --- a/vlib/v/token/keywords_matcher.v +++ b/vlib/v/token/keywords_matcher.v @@ -1,10 +1,10 @@ module token // bump token.max_keyword_len, if you add a longer keyword -const max_keyword_len = 11 +const max_keyword_len = 20 // KeywordsMatcher provides a faster way of determinining whether a given name -// is a reserved keyword, by doing a comparison with only the keywords that +// is a reserved word, by doing a comparison with only the keywords that // have exactly the same length as `name`. // Benchmarking shows that with -prod, it is 20-25% slower in the worst case // compared to just using token.keywords[name], but can be 20x faster @@ -16,25 +16,25 @@ pub struct KeywordsMatcher { mut: len_min int = 9999 len_max int = -1 - words [max_keyword_len][]WKind + words [max_keyword_len][]WIndex } -struct WKind { +struct WIndex { mut: - word string - kind Kind + word string + index int } -pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher { +pub fn new_keywords_matcher(kw_map map[string]T) KeywordsMatcher { mut km := KeywordsMatcher{} // TODO: remove this loop. It is currently needed, because a // fixed array of arrays is not initialised properly automatically // as of 2021/10/28 for i in 0 .. token.max_keyword_len { - km.words[i] = []WKind{} + km.words[i] = []WIndex{} } for k, v in kw_map { - km.add_word(k, v) + km.add_word(k, int(v)) } for i in 0 .. token.max_keyword_len { if km.words[i].len > 0 { @@ -51,7 +51,7 @@ pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher { return km } -fn (mut km KeywordsMatcher) add_word(word string, kind Kind) { +fn (mut km KeywordsMatcher) add_word(word string, kind int) { if word.len >= token.max_keyword_len { panic('increase max_keyword_len to > $word.len') } @@ -61,20 +61,20 @@ fn (mut km KeywordsMatcher) add_word(word string, kind Kind) { if word.len < km.len_min { km.len_min = word.len } - km.words[word.len] << WKind{word, kind} + km.words[word.len] << WIndex{word, kind} } -// find returns the Kind given a word, by doing a binary search +// find returns the int index, given a word, by doing a binary search // on the sorted list of words for each bin [direct_array_access] -pub fn (km &KeywordsMatcher) find(word string) Kind { +pub fn (km &KeywordsMatcher) find(word string) int { wlen := word.len if wlen < km.len_min || wlen > km.len_max { - return Kind.unknown + return -1 } list_len := km.words[wlen].len if list_len == 0 { - return Kind.unknown + return -1 } mut lo := 0 mut hi := list_len - 1 @@ -82,11 +82,11 @@ pub fn (km &KeywordsMatcher) find(word string) Kind { mid := lo + (hi - lo) / 2 cmp := km.words[wlen][mid].word.compare(word) match cmp { - 0 { return km.words[wlen][mid].kind } + 0 { return km.words[wlen][mid].index } -1 { lo = mid + 1 } 1 { hi = mid - 1 } else {} } } - return Kind.unknown + return -1 } diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v index e164c40101..27a6a874b0 100644 --- a/vlib/v/token/token.v +++ b/vlib/v/token/token.v @@ -313,7 +313,7 @@ const token_str = build_token_str() pub const keywords = build_keys() -pub const matcher = new_keywords_matcher(keywords) +pub const matcher = new_keywords_matcher(keywords) [inline] pub fn is_key(key string) bool { @@ -472,15 +472,3 @@ pub fn (kind Kind) is_infix() bool { .le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift, .right_shift, .unsigned_right_shift, .arrow] } - -// Pass ast.builtin_type_names -// Note: can't import table here due to circular module dependency -pub fn (tok &Token) can_start_type(builtin_types []string) bool { - match tok.kind { - .name { return (tok.lit.len > 0 && tok.lit[0].is_capital()) || tok.lit in builtin_types } - // Note: return type (T1, T2) should be handled elsewhere - .amp, .key_fn, .lsbr, .question { return true } - else {} - } - return false -}