parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)

pull/8951/head
Delyan Angelov 2021-02-24 20:03:53 +02:00 committed by GitHub
parent 1dd1be4400
commit 1c0eefae38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 183 additions and 75 deletions

View File

@ -11,7 +11,9 @@ pub fn (mut b Builder) gen_c(v_files []string) string {
util.timing_start('PARSE')
b.parsed_files = parser.parse_files(v_files, b.table, b.pref, b.global_scope)
b.parse_imports()
util.timing_measure('PARSE')
util.get_timers().show('SCAN')
util.get_timers().show('PARSE')
util.get_timers().show_if_exists('PARSE stmt')
if b.pref.only_check_syntax {
return ''
}

View File

@ -11,7 +11,9 @@ pub fn (mut b Builder) gen_js(v_files []string) string {
util.timing_start('PARSE')
b.parsed_files = parser.parse_files(v_files, b.table, b.pref, b.global_scope)
b.parse_imports()
util.timing_measure('PARSE')
util.get_timers().show('SCAN')
util.get_timers().show('PARSE')
util.get_timers().show_if_exists('PARSE stmt')
//
util.timing_start('CHECK')
b.checker.check_files(b.parsed_files)

View File

@ -15,7 +15,9 @@ pub fn (mut b Builder) build_x64(v_files []string, out_file string) {
util.timing_start('PARSE')
b.parsed_files = parser.parse_files(v_files, b.table, b.pref, b.global_scope)
b.parse_imports()
util.timing_measure('PARSE')
util.get_timers().show('SCAN')
util.get_timers().show('PARSE')
util.get_timers().show_if_exists('PARSE stmt')
//
util.timing_start('CHECK')
b.checker.check_files(b.parsed_files)

View File

@ -423,11 +423,13 @@ fn (mut p Parser) fn_receiver(mut params []table.Param, mut rec ReceiverParsingI
if !rec.is_mut {
rec.is_mut = p.tok.kind == .key_mut
if rec.is_mut {
p.warn_with_pos('use `(mut f Foo)` instead of `(f mut Foo)`', lpar_pos.extend(p.peek_tok2.position()))
ptoken2 := p.peek_token(2) // needed to prevent codegen bug, where .position() expects &Token
p.warn_with_pos('use `(mut f Foo)` instead of `(f mut Foo)`', lpar_pos.extend(ptoken2.position()))
}
}
if p.tok.kind == .key_shared {
p.error_with_pos('use `(shared f Foo)` instead of `(f shared Foo)`', lpar_pos.extend(p.peek_tok2.position()))
ptoken2 := p.peek_token(2) // needed to prevent codegen bug, where .position() expects &Token
p.error_with_pos('use `(shared f Foo)` instead of `(f shared Foo)`', lpar_pos.extend(ptoken2.position()))
}
rec.pos = rec_start_pos.extend(p.tok.position())
is_amp := p.tok.kind == .amp

View File

@ -30,8 +30,8 @@ fn (mut p Parser) for_stmt() ast.Stmt {
p.close_scope()
return for_stmt
} else if p.peek_tok.kind in [.decl_assign, .assign, .semicolon]
|| p.tok.kind == .semicolon || (p.peek_tok.kind == .comma && p.peek_tok2.kind != .key_mut
&& p.peek_tok3.kind != .key_in) {
|| p.tok.kind == .semicolon || (p.peek_tok.kind == .comma
&& p.peek_token(2).kind != .key_mut && p.peek_token(3).kind != .key_in) {
// `for i := 0; i < 10; i++ {` or `for a,b := 0,1; a < 10; a++ {`
if p.tok.kind == .key_mut {
p.error('`mut` is not needed in `for ;;` loops: use `for i := 0; i < n; i ++ {`')
@ -43,8 +43,8 @@ fn (mut p Parser) for_stmt() ast.Stmt {
mut has_init := false
mut has_cond := false
mut has_inc := false
mut is_multi := p.peek_tok.kind == .comma && p.peek_tok2.kind != .key_mut
&& p.peek_tok3.kind != .key_in
mut is_multi := p.peek_tok.kind == .comma && p.peek_token(2).kind != .key_mut
&& p.peek_token(3).kind != .key_in
if p.peek_tok.kind in [.assign, .decl_assign] || is_multi {
init = p.assign_stmt()
has_init = true
@ -87,7 +87,7 @@ fn (mut p Parser) for_stmt() ast.Stmt {
p.close_scope()
return for_c_stmt
} else if p.peek_tok.kind in [.key_in, .comma]
|| (p.tok.kind == .key_mut && p.peek_tok2.kind in [.key_in, .comma]) {
|| (p.tok.kind == .key_mut && p.peek_token(2).kind in [.key_in, .comma]) {
// `for i in vals`, `for i in start .. end`, `for mut user in users`, `for i, mut user in users`
mut val_is_mut := p.tok.kind == .key_mut
mut_pos := p.tok.position()

View File

@ -182,8 +182,8 @@ fn (mut p Parser) match_expr() ast.MatchExpr {
p.next()
} else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot)
&& (p.tok.lit in table.builtin_type_names || p.tok.lit[0].is_capital()
|| (p.peek_tok.kind == .dot && p.peek_tok2.lit.len > 0
&& p.peek_tok2.lit[0].is_capital()))) || p.tok.kind == .lsbr {
|| (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0
&& p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr {
mut types := []table.Type{}
for {
// Sum type match

View File

@ -55,7 +55,7 @@ pub fn (mut p Parser) parse_array_type() table.Type {
}
mut nr_dims := 1
// detect attr
not_attr := p.peek_tok.kind != .name && p.peek_tok2.kind !in [.semicolon, .rsbr]
not_attr := p.peek_tok.kind != .name && p.peek_token(2).kind !in [.semicolon, .rsbr]
for p.tok.kind == .lsbr && not_attr {
p.next()
p.check(.rsbr)

View File

@ -30,8 +30,6 @@ mut:
tok token.Token
prev_tok token.Token
peek_tok token.Token
peek_tok2 token.Token
peek_tok3 token.Token
table &table.Table
language table.Language
inside_if bool
@ -85,6 +83,10 @@ pub fn parse_stmt(text string, table &table.Table, scope &ast.Scope) ast.Stmt {
}
}
p.init_parse_fns()
util.timing_start('PARSE stmt')
defer {
util.timing_measure_cumulative('PARSE stmt')
}
p.read_first_token()
return p.stmt(false)
}
@ -120,6 +122,13 @@ pub fn parse_text(text string, path string, table &table.Table, comments_mode sc
return p.parse()
}
[unsafe]
pub fn (mut p Parser) free() {
unsafe {
p.scanner.free()
}
}
pub fn (mut p Parser) set_path(path string) {
p.file_name = path
p.file_base = os.base(path)
@ -166,7 +175,7 @@ pub fn parse_vet_file(path string, table_ &table.Table, pref &pref.Preferences)
parent: 0
}
mut p := Parser{
scanner: scanner.new_vet_scanner_file(path, .parse_comments, pref)
scanner: scanner.new_scanner_file(path, .parse_comments, pref)
comments_mode: .parse_comments
table: table_
pref: pref
@ -194,6 +203,10 @@ pub fn parse_vet_file(path string, table_ &table.Table, pref &pref.Preferences)
}
pub fn (mut p Parser) parse() ast.File {
util.timing_start('PARSE')
defer {
util.timing_measure_cumulative('PARSE')
}
// comments_mode: comments_mode
p.init_parse_fns()
p.read_first_token()
@ -323,9 +336,6 @@ pub fn parse_files(paths []string, table &table.Table, pref &pref.Preferences, g
}
pub fn (mut p Parser) init_parse_fns() {
if p.comments_mode == .toplevel_comments {
p.scanner.scan_all_tokens_in_buffer()
}
// p.prefix_parse_fns = make(100, 100, sizeof(PrefixParseFn))
// p.prefix_parse_fns[token.Kind.name] = parse_name
}
@ -334,8 +344,11 @@ pub fn (mut p Parser) read_first_token() {
// need to call next() 4 times to get peek token 1,2,3 and current token
p.next()
p.next()
p.next()
p.next()
}
[inline]
pub fn (p &Parser) peek_token(n int) token.Token {
return p.scanner.peek_token(n - 2)
}
pub fn (mut p Parser) open_scope() {
@ -399,9 +412,7 @@ fn (mut p Parser) next_with_comment() {
fn (mut p Parser) next() {
p.prev_tok = p.tok
p.tok = p.peek_tok
p.peek_tok = p.peek_tok2
p.peek_tok2 = p.peek_tok3
p.peek_tok3 = p.scanner.scan()
p.peek_tok = p.scanner.scan()
/*
if p.tok.kind==.comment {
p.comments << ast.Comment{text:p.tok.lit, line_nr:p.tok.line_nr}
@ -1111,14 +1122,14 @@ fn (p &Parser) is_generic_call() bool {
false
}
// use heuristics to detect `func<T>()` from `var < expr`
return !lit0_is_capital && p.peek_tok.kind == .lt && (match p.peek_tok2.kind {
return !lit0_is_capital && p.peek_tok.kind == .lt && (match p.peek_token(2).kind {
.name {
// maybe `f<int>`, `f<map[`, f<string,
(p.peek_tok2.kind == .name && p.peek_tok3.kind in [.gt, .comma]) || (p.peek_tok2.lit == 'map' && p.peek_tok3.kind == .lsbr)
(p.peek_token(2).kind == .name && p.peek_token(3).kind in [.gt, .comma]) || (p.peek_token(2).lit == 'map' && p.peek_token(3).kind == .lsbr)
}
.lsbr {
// maybe `f<[]T>`, assume `var < []` is invalid
p.peek_tok3.kind == .rsbr
p.peek_token(3).kind == .rsbr
}
else {
false
@ -1205,7 +1216,7 @@ pub fn (mut p Parser) name_expr() ast.Expr {
}
}
// Raw string (`s := r'hello \n ')
if p.peek_tok.kind == .string && !p.inside_str_interp && p.peek_tok2.kind != .colon {
if p.peek_tok.kind == .string && !p.inside_str_interp && p.peek_token(2).kind != .colon {
if p.tok.lit in ['r', 'c', 'js'] && p.tok.kind == .name {
return p.string_expr()
} else {
@ -1233,11 +1244,11 @@ pub fn (mut p Parser) name_expr() ast.Expr {
if p.tok.lit in p.imports {
// mark the imported module as used
p.register_used_import(p.tok.lit)
if p.peek_tok.kind == .dot && p.peek_tok2.kind != .eof && p.peek_tok2.lit.len > 0
&& p.peek_tok2.lit[0].is_capital() {
if p.peek_tok.kind == .dot && p.peek_token(2).kind != .eof
&& p.peek_token(2).lit.len > 0 && p.peek_token(2).lit[0].is_capital() {
is_mod_cast = true
} else if p.peek_tok.kind == .dot && p.peek_tok2.kind != .eof
&& p.peek_tok2.lit.len == 0 {
} else if p.peek_tok.kind == .dot && p.peek_token(2).kind != .eof
&& p.peek_token(2).lit.len == 0 {
// incomplete module selector must be handled by dot_expr instead
node = p.parse_ident(language)
return node
@ -1362,7 +1373,7 @@ pub fn (mut p Parser) name_expr() ast.Expr {
pos: p.tok.position()
mod: mod
}
} else if language == .js && p.peek_tok.kind == .dot && p.peek_tok2.kind == .name {
} else if language == .js && p.peek_tok.kind == .dot && p.peek_token(2).kind == .name {
// JS. function call with more than 1 dot
node = p.call_expr(language, mod)
} else {
@ -2396,7 +2407,7 @@ fn (mut p Parser) top_level_statement_start() {
p.scanner.set_is_inside_toplevel_statement(true)
p.rewind_scanner_to_current_token_in_new_mode()
$if debugscanner ? {
eprintln('>> p.top_level_statement_start | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...')
eprintln('>> p.top_level_statement_start | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit ${p.peek_token(2).lit} ${p.peek_token(3).lit} ...')
}
}
}
@ -2406,14 +2417,14 @@ fn (mut p Parser) top_level_statement_end() {
p.scanner.set_is_inside_toplevel_statement(false)
p.rewind_scanner_to_current_token_in_new_mode()
$if debugscanner ? {
eprintln('>> p.top_level_statement_end | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...')
eprintln('>> p.top_level_statement_end | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit ${p.peek_token(2).lit} ${p.peek_token(3).lit} ...')
}
}
}
fn (mut p Parser) rewind_scanner_to_current_token_in_new_mode() {
// Go back and rescan some tokens, ensuring that the parser's
// lookahead buffer p.peek_tok .. p.peek_tok3, will now contain
// lookahead buffer p.peek_tok .. p.peek_token(3), will now contain
// the correct tokens (possible comments), for the new mode
// This refilling of the lookahead buffer is needed for the
// .toplevel_comments parsing mode.
@ -2423,8 +2434,6 @@ fn (mut p Parser) rewind_scanner_to_current_token_in_new_mode() {
p.prev_tok = no_token
p.tok = no_token
p.peek_tok = no_token
p.peek_tok2 = no_token
p.peek_tok3 = no_token
for {
p.next()
// eprintln('rewinding to ${p.tok.tidx:5} | goal: ${tidx:5}')

View File

@ -149,7 +149,7 @@ pub fn (mut p Parser) expr(precedence int) ast.Expr {
if p.expecting_type {
// parse json.decode type (`json.decode([]User, s)`)
node = p.name_expr()
} else if p.is_amp && p.peek_tok.kind == .rsbr && p.peek_tok3.kind != .lcbr {
} else if p.is_amp && p.peek_tok.kind == .rsbr && p.peek_token(3).kind != .lcbr {
pos := p.tok.position()
typ := p.parse_type().to_ptr()
p.check(.lpar)
@ -369,7 +369,7 @@ pub fn (mut p Parser) expr_with_left(left ast.Expr, precedence int, is_stmt_iden
} else if p.tok.kind.is_infix() {
if p.tok.kind.is_prefix() && p.tok.line_nr != p.prev_tok.line_nr {
// return early for deref assign `*x = 2` goes to prefix expr
if p.tok.kind == .mul && p.peek_tok2.kind == .assign {
if p.tok.kind == .mul && p.peek_token(2).kind == .assign {
return node
}
// added 10/2020: LATER this will be parsed as PrefixExpr instead

View File

@ -67,8 +67,7 @@ no reason to complain about them.
When the parser determines, that it is outside of a top level statement,
it tells the scanner to backtrack s.tidx to the current p.tok index,
then it changes .is_inside_toplvl_statement to false , and refills its
lookahead buffer (i.e. p.peek_tok, p.peek_tok2, p.peek_tok3) from the
scanner.
lookahead buffer (i.e. p.peek_tok), from the scanner.
In effect, from the parser's point of view, the next tokens, that it will
receive with p.next(), will be the same, as if comments are not ignored
@ -98,10 +97,6 @@ pub enum CommentsMode {
// new scanner from file.
pub fn new_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
return new_vet_scanner_file(file_path, comments_mode, pref)
}
pub fn new_vet_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
if !os.exists(file_path) {
verror("$file_path doesn't exist")
}
@ -109,19 +104,24 @@ pub fn new_vet_scanner_file(file_path string, comments_mode CommentsMode, pref &
verror(err)
return voidptr(0)
}
mut s := new_vet_scanner(raw_text, comments_mode, pref)
s.file_path = file_path
s.file_base = os.base(file_path)
mut s := &Scanner{
pref: pref
text: raw_text
is_print_line_on_error: true
is_print_colored_error: true
is_print_rel_paths_on_error: true
is_fmt: pref.is_fmt
comments_mode: comments_mode
file_path: file_path
file_base: os.base(file_path)
}
s.init_scanner()
return s
}
// new scanner from string.
pub fn new_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
return new_vet_scanner(text, comments_mode, pref)
}
pub fn new_vet_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
return &Scanner{
mut s := &Scanner{
pref: pref
text: text
is_print_line_on_error: true
@ -132,6 +132,21 @@ pub fn new_vet_scanner(text string, comments_mode CommentsMode, pref &pref.Prefe
file_path: 'internal_memory'
file_base: 'internal_memory'
}
s.init_scanner()
return s
}
fn (mut s Scanner) init_scanner() {
util.get_timers().measure_pause('PARSE')
s.scan_all_tokens_in_buffer(s.comments_mode)
util.get_timers().measure_resume('PARSE')
}
[unsafe]
pub fn (mut s Scanner) free() {
unsafe {
s.text.free()
}
}
[inline]
@ -166,6 +181,18 @@ fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Tok
}
}
[inline]
fn (s &Scanner) new_eof_token() token.Token {
return token.Token{
kind: .eof
lit: ''
line_nr: s.line_nr + 1
pos: s.pos
len: 1
tidx: s.tidx
}
}
[inline]
fn (mut s Scanner) new_multiline_token(tok_kind token.Kind, lit string, len int, start_line int) token.Token {
cidx := s.tidx
@ -487,22 +514,20 @@ fn (mut s Scanner) end_of_file() token.Token {
s.inc_line_number()
}
s.pos = s.text.len
return s.new_token(.eof, '', 1)
return s.new_eof_token()
}
pub fn (mut s Scanner) scan_all_tokens_in_buffer() {
pub fn (mut s Scanner) scan_all_tokens_in_buffer(mode CommentsMode) {
// s.scan_all_tokens_in_buffer is used mainly by vdoc,
// in order to implement the .toplevel_comments mode.
cmode := s.comments_mode
s.comments_mode = .parse_comments
for {
t := s.text_scan()
s.all_tokens << t
if t.kind == .eof {
break
}
util.timing_start('SCAN')
defer {
util.timing_measure_cumulative('SCAN')
}
s.comments_mode = cmode
oldmode := s.comments_mode
s.comments_mode = mode
s.scan_remaining_text()
s.comments_mode = oldmode
s.tidx = 0
$if debugscanner ? {
for t in s.all_tokens {
@ -511,11 +536,21 @@ pub fn (mut s Scanner) scan_all_tokens_in_buffer() {
}
}
pub fn (mut s Scanner) scan() token.Token {
if s.comments_mode == .toplevel_comments {
return s.buffer_scan()
pub fn (mut s Scanner) scan_remaining_text() {
for {
t := s.text_scan()
if s.comments_mode == .skip_comments && t.kind == .comment {
continue
}
s.all_tokens << t
if t.kind == .eof {
break
}
}
return s.text_scan()
}
pub fn (mut s Scanner) scan() token.Token {
return s.buffer_scan()
}
pub fn (mut s Scanner) buffer_scan() token.Token {
@ -536,7 +571,17 @@ pub fn (mut s Scanner) buffer_scan() token.Token {
}
[inline]
fn (s Scanner) look_ahead(n int) byte {
pub fn (s &Scanner) peek_token(n int) token.Token {
idx := s.tidx + n
if idx >= s.all_tokens.len {
return s.new_eof_token()
}
t := s.all_tokens[idx]
return t
}
[inline]
fn (s &Scanner) look_ahead(n int) byte {
if s.pos + n < s.text.len {
return s.text[s.pos + n]
} else {
@ -1292,14 +1337,19 @@ pub fn verror(s string) {
}
pub fn (mut s Scanner) codegen(newtext string) {
$if debug_codegen ? {
eprintln('scanner.codegen:\n $newtext')
}
// codegen makes sense only during normal compilation
// feeding code generated V code to vfmt or vdoc will
// cause them to output/document ephemeral stuff.
if s.comments_mode == .skip_comments {
s.all_tokens.delete_last() // remove .eof from end of .all_tokens
s.text += newtext
$if debug_codegen ? {
eprintln('scanner.codegen:\n $newtext')
}
old_tidx := s.tidx
s.tidx = s.all_tokens.len
s.scan_remaining_text()
s.tidx = old_tidx
}
}

View File

@ -33,13 +33,18 @@ pub fn timing_measure(label string) {
get_timers().show(label)
}
pub fn timing_measure_cumulative(label string) {
get_timers().measure_cumulative(label)
}
pub fn timing_set_should_print(should_print bool) {
mut t := util.timers
t.should_print = should_print
}
pub fn (mut t Timers) start(name string) {
sw := time.new_stopwatch({})
mut sw := t.swatches[name] or { time.new_stopwatch({}) }
sw.start()
t.swatches[name] = sw
}
@ -54,6 +59,35 @@ pub fn (mut t Timers) measure(name string) i64 {
return ms
}
pub fn (mut t Timers) measure_cumulative(name string) i64 {
ms := t.measure(name)
if name !in t.swatches {
return ms
}
mut sw := t.swatches[name]
sw.pause()
t.swatches[name] = sw
return ms
}
pub fn (mut t Timers) measure_pause(name string) {
if name !in t.swatches {
return
}
mut sw := t.swatches[name]
sw.pause()
t.swatches[name] = sw
}
pub fn (mut t Timers) measure_resume(name string) {
if name !in t.swatches {
return
}
mut sw := t.swatches[name]
sw.start()
t.swatches[name] = sw
}
pub fn (mut t Timers) message(name string) string {
ms := f64(t.measure(name)) / 1000.0
value := bold('${ms:-8.3f}')
@ -68,6 +102,13 @@ pub fn (mut t Timers) show(label string) {
}
}
pub fn (mut t Timers) show_if_exists(label string) {
if label !in t.swatches {
return
}
t.show(label)
}
pub fn (mut t Timers) dump_all() {
for k, _ in t.swatches {
elapsed := t.message(k)

View File

@ -81,7 +81,7 @@ fn new_parser(srce string, convert_type bool) Parser {
}
}
return Parser{
scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{})
scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{output_mode: .silent})
convert_type: convert_type
}
}