v.token,ast,scanner,parser: use a KeywordsMatcher lookup for builtin_type_names searches too

2021-11-07 16:02:37 +02:00 · 2021-11-07 16:02:37 +02:00 · 1c12186701
parent 80242c8041
commit 1c12186701
7 changed files with 55 additions and 43 deletions
--- a/vlib/v/ast/ast.v
+++ b/vlib/v/ast/ast.v
@ -2103,3 +2103,26 @@ pub fn (expr Expr) is_literal() bool {
 		}
 	}
 }
 pub fn type_can_start_with_token(tok &token.Token) bool {
 	match tok.kind {
 		.name {
 			return (tok.lit.len > 0 && tok.lit[0].is_capital())
 				|| builtin_type_names_matcher.find(tok.lit) > 0
 		}
 		// Note: return type (T1, T2) should be handled elsewhere
 		.amp, .key_fn, .lsbr, .question {
 			return true
 		}
 		else {}
 	}
 	return false
 }
 fn build_builtin_type_names_matcher() token.KeywordsMatcher {
 	mut m := map[string]int{}
 	for i, name in builtin_type_names {
 		m[name] = i
 	}
 	return token.new_keywords_matcher<int>(m)
 }
--- a/vlib/v/ast/types.v
+++ b/vlib/v/ast/types.v
@ -427,6 +427,13 @@ pub const (
 	u8_type_idx            = 30
 )
 // NB: builtin_type_names must be in the same order as the idx consts above
 pub const builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64',
 	'isize', 'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string',
 	'rune', 'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
 pub const builtin_type_names_matcher = build_builtin_type_names_matcher()
 pub const (
 	integer_type_idxs          = [i8_type_idx, i16_type_idx, int_type_idx, i64_type_idx,
 		byte_type_idx, u8_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, isize_type_idx,
@ -489,13 +496,6 @@ pub fn merge_types(params ...[]Type) []Type {
 	return res
 }
 pub const (
 	// must be in the same order as the idx consts above
 	builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64', 'isize',
 		'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string', 'rune',
 		'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
 )
 pub struct MultiReturn {
 pub mut:
 	types []Type
--- a/vlib/v/parser/if_match.v
+++ b/vlib/v/parser/if_match.v
@ -180,7 +180,7 @@ fn (mut p Parser) match_expr() ast.MatchExpr {
 			is_else = true
 			p.next()
 		} else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot)
-			&& (((p.tok.lit in ast.builtin_type_names || p.tok.lit[0].is_capital())
+			&& (((ast.builtin_type_names_matcher.find(p.tok.lit) > 0 || p.tok.lit[0].is_capital())
 			&& p.peek_tok.kind != .lpar) || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0
 			&& p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr {
 			mut types := []ast.Type{}
--- a/vlib/v/parser/parser.v
+++ b/vlib/v/parser/parser.v
@ -1961,7 +1961,7 @@ fn (p &Parser) is_generic_call() bool {
 const valid_tokens_inside_types = [token.Kind.lsbr, .rsbr, .name, .dot, .comma, .key_fn, .lt]
 fn (mut p Parser) is_generic_cast() bool {
-	if !p.tok.can_start_type(ast.builtin_type_names) {
+	if !ast.type_can_start_with_token(p.tok) {
 		return false
 	}
 	mut i := 0
@ -2160,7 +2160,7 @@ pub fn (mut p Parser) name_expr() ast.Expr {
 		}
 		name_w_mod := p.prepend_mod(name)
 		// type cast. TODO: finish
-		// if name in ast.builtin_type_names {
+		// if name in ast.builtin_type_names_to_idx {
 		if (!known_var && (name in p.table.type_idxs || name_w_mod in p.table.type_idxs)
 			&& name !in ['C.stat', 'C.sigaction']) || is_mod_cast || is_generic_cast
 			|| (language == .v && name.len > 0 && name[0].is_capital()) {
--- a/vlib/v/scanner/scanner.v
+++ b/vlib/v/scanner/scanner.v
@ -683,8 +683,8 @@ fn (mut s Scanner) text_scan() token.Token {
 			// Check if not .eof to prevent panic
 			next_char := s.look_ahead(1)
 			kind := token.matcher.find(name)
-			if kind != .unknown {
+			if kind != -1 {
-				return s.new_token(kind, name, name.len)
+				return s.new_token(token.Kind(kind), name, name.len)
 			}
 			// 'asdf $b' => "b" is the last name in the string, dont start parsing string
 			// at the next ', skip it
@ -942,7 +942,8 @@ fn (mut s Scanner) text_scan() token.Token {
 							// if any typ is neither Type nor builtin, then the case is non-generic
 							typs.all(it.len > 0
 								&& ((it[0].is_capital() && it[1..].bytes().all(it.is_alnum()
-								|| it == `_`)) || it in ast.builtin_type_names))
+								|| it == `_`))
 								|| ast.builtin_type_names_matcher.find(it) > 0))
 						} else {
 							false
 						}
--- a/vlib/v/token/keywords_matcher.v
+++ b/vlib/v/token/keywords_matcher.v
@ -1,10 +1,10 @@
 module token
 // bump token.max_keyword_len, if you add a longer keyword
-const max_keyword_len = 11
+const max_keyword_len = 20
 // KeywordsMatcher provides a faster way of determinining whether a given name
-// is a reserved keyword, by doing a comparison with only the keywords that
+// is a reserved word, by doing a comparison with only the keywords that
 // have exactly the same length as `name`.
 // Benchmarking shows that with -prod, it is 20-25% slower in the worst case
 // compared to just using token.keywords[name], but can be 20x faster
@ -16,25 +16,25 @@ pub struct KeywordsMatcher {
 mut:
 	len_min int = 9999
 	len_max int = -1
-	words   [max_keyword_len][]WKind
+	words   [max_keyword_len][]WIndex
 }
-struct WKind {
+struct WIndex {
 mut:
 	word  string
-	kind Kind
+	index int
 }
-pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher {
+pub fn new_keywords_matcher<T>(kw_map map[string]T) KeywordsMatcher {
 	mut km := KeywordsMatcher{}
 	// TODO: remove this loop. It is currently needed, because a
 	// fixed array of arrays is not initialised properly automatically
 	// as of 2021/10/28
 	for i in 0 .. token.max_keyword_len {
-		km.words[i] = []WKind{}
+		km.words[i] = []WIndex{}
 	}
 	for k, v in kw_map {
-		km.add_word(k, v)
+		km.add_word(k, int(v))
 	}
 	for i in 0 .. token.max_keyword_len {
 		if km.words[i].len > 0 {
@ -51,7 +51,7 @@ pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher {
 	return km
 }
-fn (mut km KeywordsMatcher) add_word(word string, kind Kind) {
+fn (mut km KeywordsMatcher) add_word(word string, kind int) {
 	if word.len >= token.max_keyword_len {
 		panic('increase max_keyword_len to > $word.len')
 	}
@ -61,20 +61,20 @@ fn (mut km KeywordsMatcher) add_word(word string, kind Kind) {
 	if word.len < km.len_min {
 		km.len_min = word.len
 	}
-	km.words[word.len] << WKind{word, kind}
+	km.words[word.len] << WIndex{word, kind}
 }
-// find returns the Kind given a word, by doing a binary search
+// find returns the int index, given a word, by doing a binary search
 // on the sorted list of words for each bin
 [direct_array_access]
-pub fn (km &KeywordsMatcher) find(word string) Kind {
+pub fn (km &KeywordsMatcher) find(word string) int {
 	wlen := word.len
 	if wlen < km.len_min || wlen > km.len_max {
-		return Kind.unknown
+		return -1
 	}
 	list_len := km.words[wlen].len
 	if list_len == 0 {
-		return Kind.unknown
+		return -1
 	}
 	mut lo := 0
 	mut hi := list_len - 1
@ -82,11 +82,11 @@ pub fn (km &KeywordsMatcher) find(word string) Kind {
 		mid := lo + (hi - lo) / 2
 		cmp := km.words[wlen][mid].word.compare(word)
 		match cmp {
-			0 { return km.words[wlen][mid].kind }
+			0 { return km.words[wlen][mid].index }
 			-1 { lo = mid + 1 }
 			1 { hi = mid - 1 }
 			else {}
 		}
 	}
-	return Kind.unknown
+	return -1
 }
--- a/vlib/v/token/token.v
+++ b/vlib/v/token/token.v
@ -313,7 +313,7 @@ const token_str = build_token_str()
 pub const keywords = build_keys()
-pub const matcher = new_keywords_matcher(keywords)
+pub const matcher = new_keywords_matcher<Kind>(keywords)
 [inline]
 pub fn is_key(key string) bool {
@ -472,15 +472,3 @@ pub fn (kind Kind) is_infix() bool {
 		.le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift,
 		.right_shift, .unsigned_right_shift, .arrow]
 }
 // Pass ast.builtin_type_names
 // Note: can't import table here due to circular module dependency
 pub fn (tok &Token) can_start_type(builtin_types []string) bool {
 	match tok.kind {
 		.name { return (tok.lit.len > 0 && tok.lit[0].is_capital()) || tok.lit in builtin_types }
 		// Note: return type (T1, T2) should be handled elsewhere
 		.amp, .key_fn, .lsbr, .question { return true }
 		else {}
 	}
 	return false
 }