v.token,ast,scanner,parser: use a KeywordsMatcher lookup for builtin_type_names searches too
parent
80242c8041
commit
1c12186701
|
@ -2103,3 +2103,26 @@ pub fn (expr Expr) is_literal() bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn type_can_start_with_token(tok &token.Token) bool {
|
||||||
|
match tok.kind {
|
||||||
|
.name {
|
||||||
|
return (tok.lit.len > 0 && tok.lit[0].is_capital())
|
||||||
|
|| builtin_type_names_matcher.find(tok.lit) > 0
|
||||||
|
}
|
||||||
|
// Note: return type (T1, T2) should be handled elsewhere
|
||||||
|
.amp, .key_fn, .lsbr, .question {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
else {}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_builtin_type_names_matcher() token.KeywordsMatcher {
|
||||||
|
mut m := map[string]int{}
|
||||||
|
for i, name in builtin_type_names {
|
||||||
|
m[name] = i
|
||||||
|
}
|
||||||
|
return token.new_keywords_matcher<int>(m)
|
||||||
|
}
|
||||||
|
|
|
@ -427,6 +427,13 @@ pub const (
|
||||||
u8_type_idx = 30
|
u8_type_idx = 30
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// NB: builtin_type_names must be in the same order as the idx consts above
|
||||||
|
pub const builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64',
|
||||||
|
'isize', 'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string',
|
||||||
|
'rune', 'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
|
||||||
|
|
||||||
|
pub const builtin_type_names_matcher = build_builtin_type_names_matcher()
|
||||||
|
|
||||||
pub const (
|
pub const (
|
||||||
integer_type_idxs = [i8_type_idx, i16_type_idx, int_type_idx, i64_type_idx,
|
integer_type_idxs = [i8_type_idx, i16_type_idx, int_type_idx, i64_type_idx,
|
||||||
byte_type_idx, u8_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, isize_type_idx,
|
byte_type_idx, u8_type_idx, u16_type_idx, u32_type_idx, u64_type_idx, isize_type_idx,
|
||||||
|
@ -489,13 +496,6 @@ pub fn merge_types(params ...[]Type) []Type {
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const (
|
|
||||||
// must be in the same order as the idx consts above
|
|
||||||
builtin_type_names = ['void', 'voidptr', 'byteptr', 'charptr', 'i8', 'i16', 'int', 'i64', 'isize',
|
|
||||||
'byte', 'u16', 'u32', 'u64', 'usize', 'f32', 'f64', 'char', 'bool', 'none', 'string', 'rune',
|
|
||||||
'array', 'map', 'chan', 'any', 'float_literal', 'int_literal', 'thread', 'Error', 'u8']
|
|
||||||
)
|
|
||||||
|
|
||||||
pub struct MultiReturn {
|
pub struct MultiReturn {
|
||||||
pub mut:
|
pub mut:
|
||||||
types []Type
|
types []Type
|
||||||
|
|
|
@ -180,7 +180,7 @@ fn (mut p Parser) match_expr() ast.MatchExpr {
|
||||||
is_else = true
|
is_else = true
|
||||||
p.next()
|
p.next()
|
||||||
} else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot)
|
} else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot)
|
||||||
&& (((p.tok.lit in ast.builtin_type_names || p.tok.lit[0].is_capital())
|
&& (((ast.builtin_type_names_matcher.find(p.tok.lit) > 0 || p.tok.lit[0].is_capital())
|
||||||
&& p.peek_tok.kind != .lpar) || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0
|
&& p.peek_tok.kind != .lpar) || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0
|
||||||
&& p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr {
|
&& p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr {
|
||||||
mut types := []ast.Type{}
|
mut types := []ast.Type{}
|
||||||
|
|
|
@ -1961,7 +1961,7 @@ fn (p &Parser) is_generic_call() bool {
|
||||||
const valid_tokens_inside_types = [token.Kind.lsbr, .rsbr, .name, .dot, .comma, .key_fn, .lt]
|
const valid_tokens_inside_types = [token.Kind.lsbr, .rsbr, .name, .dot, .comma, .key_fn, .lt]
|
||||||
|
|
||||||
fn (mut p Parser) is_generic_cast() bool {
|
fn (mut p Parser) is_generic_cast() bool {
|
||||||
if !p.tok.can_start_type(ast.builtin_type_names) {
|
if !ast.type_can_start_with_token(p.tok) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
mut i := 0
|
mut i := 0
|
||||||
|
@ -2160,7 +2160,7 @@ pub fn (mut p Parser) name_expr() ast.Expr {
|
||||||
}
|
}
|
||||||
name_w_mod := p.prepend_mod(name)
|
name_w_mod := p.prepend_mod(name)
|
||||||
// type cast. TODO: finish
|
// type cast. TODO: finish
|
||||||
// if name in ast.builtin_type_names {
|
// if name in ast.builtin_type_names_to_idx {
|
||||||
if (!known_var && (name in p.table.type_idxs || name_w_mod in p.table.type_idxs)
|
if (!known_var && (name in p.table.type_idxs || name_w_mod in p.table.type_idxs)
|
||||||
&& name !in ['C.stat', 'C.sigaction']) || is_mod_cast || is_generic_cast
|
&& name !in ['C.stat', 'C.sigaction']) || is_mod_cast || is_generic_cast
|
||||||
|| (language == .v && name.len > 0 && name[0].is_capital()) {
|
|| (language == .v && name.len > 0 && name[0].is_capital()) {
|
||||||
|
|
|
@ -683,8 +683,8 @@ fn (mut s Scanner) text_scan() token.Token {
|
||||||
// Check if not .eof to prevent panic
|
// Check if not .eof to prevent panic
|
||||||
next_char := s.look_ahead(1)
|
next_char := s.look_ahead(1)
|
||||||
kind := token.matcher.find(name)
|
kind := token.matcher.find(name)
|
||||||
if kind != .unknown {
|
if kind != -1 {
|
||||||
return s.new_token(kind, name, name.len)
|
return s.new_token(token.Kind(kind), name, name.len)
|
||||||
}
|
}
|
||||||
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
|
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
|
||||||
// at the next ', skip it
|
// at the next ', skip it
|
||||||
|
@ -942,7 +942,8 @@ fn (mut s Scanner) text_scan() token.Token {
|
||||||
// if any typ is neither Type nor builtin, then the case is non-generic
|
// if any typ is neither Type nor builtin, then the case is non-generic
|
||||||
typs.all(it.len > 0
|
typs.all(it.len > 0
|
||||||
&& ((it[0].is_capital() && it[1..].bytes().all(it.is_alnum()
|
&& ((it[0].is_capital() && it[1..].bytes().all(it.is_alnum()
|
||||||
|| it == `_`)) || it in ast.builtin_type_names))
|
|| it == `_`))
|
||||||
|
|| ast.builtin_type_names_matcher.find(it) > 0))
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
module token
|
module token
|
||||||
|
|
||||||
// bump token.max_keyword_len, if you add a longer keyword
|
// bump token.max_keyword_len, if you add a longer keyword
|
||||||
const max_keyword_len = 11
|
const max_keyword_len = 20
|
||||||
|
|
||||||
// KeywordsMatcher provides a faster way of determinining whether a given name
|
// KeywordsMatcher provides a faster way of determinining whether a given name
|
||||||
// is a reserved keyword, by doing a comparison with only the keywords that
|
// is a reserved word, by doing a comparison with only the keywords that
|
||||||
// have exactly the same length as `name`.
|
// have exactly the same length as `name`.
|
||||||
// Benchmarking shows that with -prod, it is 20-25% slower in the worst case
|
// Benchmarking shows that with -prod, it is 20-25% slower in the worst case
|
||||||
// compared to just using token.keywords[name], but can be 20x faster
|
// compared to just using token.keywords[name], but can be 20x faster
|
||||||
|
@ -16,25 +16,25 @@ pub struct KeywordsMatcher {
|
||||||
mut:
|
mut:
|
||||||
len_min int = 9999
|
len_min int = 9999
|
||||||
len_max int = -1
|
len_max int = -1
|
||||||
words [max_keyword_len][]WKind
|
words [max_keyword_len][]WIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
struct WKind {
|
struct WIndex {
|
||||||
mut:
|
mut:
|
||||||
word string
|
word string
|
||||||
kind Kind
|
index int
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher {
|
pub fn new_keywords_matcher<T>(kw_map map[string]T) KeywordsMatcher {
|
||||||
mut km := KeywordsMatcher{}
|
mut km := KeywordsMatcher{}
|
||||||
// TODO: remove this loop. It is currently needed, because a
|
// TODO: remove this loop. It is currently needed, because a
|
||||||
// fixed array of arrays is not initialised properly automatically
|
// fixed array of arrays is not initialised properly automatically
|
||||||
// as of 2021/10/28
|
// as of 2021/10/28
|
||||||
for i in 0 .. token.max_keyword_len {
|
for i in 0 .. token.max_keyword_len {
|
||||||
km.words[i] = []WKind{}
|
km.words[i] = []WIndex{}
|
||||||
}
|
}
|
||||||
for k, v in kw_map {
|
for k, v in kw_map {
|
||||||
km.add_word(k, v)
|
km.add_word(k, int(v))
|
||||||
}
|
}
|
||||||
for i in 0 .. token.max_keyword_len {
|
for i in 0 .. token.max_keyword_len {
|
||||||
if km.words[i].len > 0 {
|
if km.words[i].len > 0 {
|
||||||
|
@ -51,7 +51,7 @@ pub fn new_keywords_matcher(kw_map map[string]Kind) KeywordsMatcher {
|
||||||
return km
|
return km
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (mut km KeywordsMatcher) add_word(word string, kind Kind) {
|
fn (mut km KeywordsMatcher) add_word(word string, kind int) {
|
||||||
if word.len >= token.max_keyword_len {
|
if word.len >= token.max_keyword_len {
|
||||||
panic('increase max_keyword_len to > $word.len')
|
panic('increase max_keyword_len to > $word.len')
|
||||||
}
|
}
|
||||||
|
@ -61,20 +61,20 @@ fn (mut km KeywordsMatcher) add_word(word string, kind Kind) {
|
||||||
if word.len < km.len_min {
|
if word.len < km.len_min {
|
||||||
km.len_min = word.len
|
km.len_min = word.len
|
||||||
}
|
}
|
||||||
km.words[word.len] << WKind{word, kind}
|
km.words[word.len] << WIndex{word, kind}
|
||||||
}
|
}
|
||||||
|
|
||||||
// find returns the Kind given a word, by doing a binary search
|
// find returns the int index, given a word, by doing a binary search
|
||||||
// on the sorted list of words for each bin
|
// on the sorted list of words for each bin
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
pub fn (km &KeywordsMatcher) find(word string) Kind {
|
pub fn (km &KeywordsMatcher) find(word string) int {
|
||||||
wlen := word.len
|
wlen := word.len
|
||||||
if wlen < km.len_min || wlen > km.len_max {
|
if wlen < km.len_min || wlen > km.len_max {
|
||||||
return Kind.unknown
|
return -1
|
||||||
}
|
}
|
||||||
list_len := km.words[wlen].len
|
list_len := km.words[wlen].len
|
||||||
if list_len == 0 {
|
if list_len == 0 {
|
||||||
return Kind.unknown
|
return -1
|
||||||
}
|
}
|
||||||
mut lo := 0
|
mut lo := 0
|
||||||
mut hi := list_len - 1
|
mut hi := list_len - 1
|
||||||
|
@ -82,11 +82,11 @@ pub fn (km &KeywordsMatcher) find(word string) Kind {
|
||||||
mid := lo + (hi - lo) / 2
|
mid := lo + (hi - lo) / 2
|
||||||
cmp := km.words[wlen][mid].word.compare(word)
|
cmp := km.words[wlen][mid].word.compare(word)
|
||||||
match cmp {
|
match cmp {
|
||||||
0 { return km.words[wlen][mid].kind }
|
0 { return km.words[wlen][mid].index }
|
||||||
-1 { lo = mid + 1 }
|
-1 { lo = mid + 1 }
|
||||||
1 { hi = mid - 1 }
|
1 { hi = mid - 1 }
|
||||||
else {}
|
else {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Kind.unknown
|
return -1
|
||||||
}
|
}
|
||||||
|
|
|
@ -313,7 +313,7 @@ const token_str = build_token_str()
|
||||||
|
|
||||||
pub const keywords = build_keys()
|
pub const keywords = build_keys()
|
||||||
|
|
||||||
pub const matcher = new_keywords_matcher(keywords)
|
pub const matcher = new_keywords_matcher<Kind>(keywords)
|
||||||
|
|
||||||
[inline]
|
[inline]
|
||||||
pub fn is_key(key string) bool {
|
pub fn is_key(key string) bool {
|
||||||
|
@ -472,15 +472,3 @@ pub fn (kind Kind) is_infix() bool {
|
||||||
.le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift,
|
.le, .logical_or, .xor, .not_in, .key_is, .not_is, .and, .dot, .pipe, .amp, .left_shift,
|
||||||
.right_shift, .unsigned_right_shift, .arrow]
|
.right_shift, .unsigned_right_shift, .arrow]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pass ast.builtin_type_names
|
|
||||||
// Note: can't import table here due to circular module dependency
|
|
||||||
pub fn (tok &Token) can_start_type(builtin_types []string) bool {
|
|
||||||
match tok.kind {
|
|
||||||
.name { return (tok.lit.len > 0 && tok.lit[0].is_capital()) || tok.lit in builtin_types }
|
|
||||||
// Note: return type (T1, T2) should be handled elsewhere
|
|
||||||
.amp, .key_fn, .lsbr, .question { return true }
|
|
||||||
else {}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue