v.token,ast,scanner,parser: use a KeywordsMatcher lookup for builtin_type_names searches too
parent
80242c8041
commit
1c12186701
|
@ -2103,3 +2103,26 @@ pub fn (expr Expr) is_literal() bool {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn type_can_start_with_token(tok &token.Token) bool {
|
||||
match tok.kind {
|
||||
.name {
|
||||
return (tok.lit.len > 0 && tok.lit[0].is_capital())
|
||||
|| builtin_type_names_matcher.find(tok.lit) > 0
|
||||
}
|
||||
// Note: return type (T1, T2) should be handled elsewhere
|
||||
.amp, .key_fn, .lsbr, .question {
|
||||
return true
|
||||
}
|
||||
else {}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
fn build_builtin_type_names_matcher() token.KeywordsMatcher {
|
||||
mut m := map[string]int{}
|
||||
for i, name in builtin_type_names {
|
||||
m[name] = i
|
||||
}
|
||||
return token.new_keywords_matcher<int>(m)
|
||||
}
|
||||
|
|
|
@ -427,6 +427,13 @@ pub const (
|
|||
u8_type_idx = 30
|
||||
)
|
||||
|
||||
// NB: builtin_type_names must be in the same order as the idx consts above
|
||||
pub const builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64',
|
||||
'isize', 'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string',
|
||||
'rune', 'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
|
||||
|
||||
pub const builtin_type_names_matcher = build_builtin_type_names_matcher()
|
||||
|
||||
pub const (
|
||||
integer_type_idxs = [i8_type_idx, i16_type_idx, int_type_idx, i64_type_idx,
|
||||
byte_type_idx, u8_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, isize_type_idx,
|
||||
|
@ -489,13 +496,6 @@ pub fn merge_types(params ...[]Type) []Type {
|
|||
return res
|
||||
}
|
||||
|
||||
pub const (
|
||||
// must be in the same order as the idx consts above
|
||||
builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64', 'isize',
|
||||
'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string', 'rune',
|
||||
'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
|
||||
)
|
||||
|
||||
pub struct MultiReturn {
|
||||
pub mut:
|
||||
types []Type
|
||||
|
|
|
@ -180,7 +180,7 @@ fn (mut p Parser) match_expr() ast.MatchExpr {
|
|||
is_else = true
|
||||
p.next()
|
||||
} else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot)
|
||||
&& (((p.tok.lit in ast.builtin_type_names || p.tok.lit[0].is_capital())
|
||||
&& (((ast.builtin_type_names_matcher.find(p.tok.lit) > 0 || p.tok.lit[0].is_capital())
|
||||
&& p.peek_tok.kind != .lpar) || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0
|
||||
&& p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr {
|
||||
mut types := []ast.Type{}
|
||||
|
|
|
@ -1961,7 +1961,7 @@ fn (p &Parser) is_generic_call() bool {
|
|||
const valid_tokens_inside_types = [token.Kind.lsbr, .rsbr, .name, .dot, .comma, .key_fn, .lt]
|
||||
|
||||
fn (mut p Parser) is_generic_cast() bool {
|
||||
if !p.tok.can_start_type(ast.builtin_type_names) {
|
||||
if !ast.type_can_start_with_token(p.tok) {
|
||||
return false
|
||||
}
|
||||
mut i := 0
|
||||
|
@ -2160,7 +2160,7 @@ pub fn (mut p Parser) name_expr() ast.Expr {
|
|||
}
|
||||
name_w_mod := p.prepend_mod(name)
|
||||
// type cast. TODO: finish
|
||||
// if name in ast.builtin_type_names {
|
||||
// if name in ast.builtin_type_names_to_idx {
|
||||
if (!known_var && (name in p.table.type_idxs || name_w_mod in p.table.type_idxs)
|
||||
&& name !in ['C.stat', 'C.sigaction']) || is_mod_cast || is_generic_cast
|
||||
|| (language == .v && name.len > 0 && name[0].is_capital()) {
|
||||
|
|
|
@ -683,8 +683,8 @@ fn (mut s Scanner) text_scan() token.Token {
|
|||
// Check if not .eof to prevent panic
|
||||
next_char := s.look_ahead(1)
|
||||
kind := token.matcher.find(name)
|
||||
if kind != .unknown {
|
||||
return s.new_token(kind, name, name.len)
|
||||
if kind != -1 {
|
||||
return s.new_token(token.Kind(kind), name, name.len)
|
||||
}
|
||||
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
|
||||
// at the next ', skip it
|
||||
|
@ -942,7 +942,8 @@ fn (mut s Scanner) text_scan() token.Token {
|
|||
// if any typ is neither Type nor builtin, then the case is non-generic
|
||||
typs.all(it.len > 0
|
||||
&& ((it[0].is_capital() && it[1..].bytes().all(it.is_alnum()
|
||||
|| it == `_`)) || it in ast.builtin_type_names))
|
||||
|| it == `_`))
|
||||
|| ast.builtin_type_names_matcher.find(it) > 0))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
module token
|
||||
|
||||
// bump token.max_keyword_len, if you add a longer keyword
|
||||
const max_keyword_len = 11
|
||||
const max_keyword_len = 20
|
||||
|
||||
// KeywordsMatcher provides a faster way of determinining whether a given name
|
||||
// is a reserved keyword, by doing a comparison with only the keywords that
|
||||
// is a reserved word, by doing a comparison with only the keywords that
|
||||
// have exactly the same length as `name`.
|
||||
// Benchmarking shows that with -prod, it is 20-25% slower in the worst case
|
||||
// compared to just using token.keywords[name], but can be 20x faster
|
||||
|
@ -16,25 +16,25 @@ pub struct KeywordsMatcher {
|
|||
mut:
|
||||
len_min int = 9999
|
||||
len_max int = -1
|
||||
words [max_keyword_len][]WKind
|
||||
words [max_keyword_len][]WIndex
|
||||
}
|
||||
|
||||
struct WKind {
|
||||
struct WIndex {
|
||||
mut:
|
||||
word string
|
||||
kind Kind
|
||||
index int
|
||||
}
|
||||
|
||||
pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher {
|
||||
pub fn new_keywords_matcher<T>(kw_map map[string]T) KeywordsMatcher {
|
||||
mut km := KeywordsMatcher{}
|
||||
// TODO: remove this loop. It is currently needed, because a
|
||||
// fixed array of arrays is not initialised properly automatically
|
||||
// as of 2021/10/28
|
||||
for i in 0 .. token.max_keyword_len {
|
||||
km.words[i] = []WKind{}
|
||||
km.words[i] = []WIndex{}
|
||||
}
|
||||
for k, v in kw_map {
|
||||
km.add_word(k, v)
|
||||
km.add_word(k, int(v))
|
||||
}
|
||||
for i in 0 .. token.max_keyword_len {
|
||||
if km.words[i].len > 0 {
|
||||
|
@ -51,7 +51,7 @@ pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher {
|
|||
return km
|
||||
}
|
||||
|
||||
fn (mut km KeywordsMatcher) add_word(word string, kind Kind) {
|
||||
fn (mut km KeywordsMatcher) add_word(word string, kind int) {
|
||||
if word.len >= token.max_keyword_len {
|
||||
panic('increase max_keyword_len to > $word.len')
|
||||
}
|
||||
|
@ -61,20 +61,20 @@ fn (mut km KeywordsMatcher) add_word(word string, kind Kind) {
|
|||
if word.len < km.len_min {
|
||||
km.len_min = word.len
|
||||
}
|
||||
km.words[word.len] << WKind{word, kind}
|
||||
km.words[word.len] << WIndex{word, kind}
|
||||
}
|
||||
|
||||
// find returns the Kind given a word, by doing a binary search
|
||||
// find returns the int index, given a word, by doing a binary search
|
||||
// on the sorted list of words for each bin
|
||||
[direct_array_access]
|
||||
pub fn (km &KeywordsMatcher) find(word string) Kind {
|
||||
pub fn (km &KeywordsMatcher) find(word string) int {
|
||||
wlen := word.len
|
||||
if wlen < km.len_min || wlen > km.len_max {
|
||||
return Kind.unknown
|
||||
return -1
|
||||
}
|
||||
list_len := km.words[wlen].len
|
||||
if list_len == 0 {
|
||||
return Kind.unknown
|
||||
return -1
|
||||
}
|
||||
mut lo := 0
|
||||
mut hi := list_len - 1
|
||||
|
@ -82,11 +82,11 @@ pub fn (km &KeywordsMatcher) find(word string) Kind {
|
|||
mid := lo + (hi - lo) / 2
|
||||
cmp := km.words[wlen][mid].word.compare(word)
|
||||
match cmp {
|
||||
0 { return km.words[wlen][mid].kind }
|
||||
0 { return km.words[wlen][mid].index }
|
||||
-1 { lo = mid + 1 }
|
||||
1 { hi = mid - 1 }
|
||||
else {}
|
||||
}
|
||||
}
|
||||
return Kind.unknown
|
||||
return -1
|
||||
}
|
||||
|
|
|
@ -313,7 +313,7 @@ const token_str = build_token_str()
|
|||
|
||||
pub const keywords = build_keys()
|
||||
|
||||
pub const matcher = new_keywords_matcher(keywords)
|
||||
pub const matcher = new_keywords_matcher<Kind>(keywords)
|
||||
|
||||
[inline]
|
||||
pub fn is_key(key string) bool {
|
||||
|
@ -472,15 +472,3 @@ pub fn (kind Kind) is_infix() bool {
|
|||
.le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift,
|
||||
.right_shift, .unsigned_right_shift, .arrow]
|
||||
}
|
||||
|
||||
// Pass ast.builtin_type_names
|
||||
// Note: can't import table here due to circular module dependency
|
||||
pub fn (tok &Token) can_start_type(builtin_types []string) bool {
|
||||
match tok.kind {
|
||||
.name { return (tok.lit.len > 0 && tok.lit[0].is_capital()) || tok.lit in builtin_types }
|
||||
// Note: return type (T1, T2) should be handled elsewhere
|
||||
.amp, .key_fn, .lsbr, .question { return true }
|
||||
else {}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue