diff --git a/vlib/v/builder/c.v b/vlib/v/builder/c.v index c498d06212..b0da21f188 100644 --- a/vlib/v/builder/c.v +++ b/vlib/v/builder/c.v @@ -11,7 +11,9 @@ pub fn (mut b Builder) gen_c(v_files []string) string { util.timing_start('PARSE') b.parsed_files = parser.parse_files(v_files, b.table, b.pref, b.global_scope) b.parse_imports() - util.timing_measure('PARSE') + util.get_timers().show('SCAN') + util.get_timers().show('PARSE') + util.get_timers().show_if_exists('PARSE stmt') if b.pref.only_check_syntax { return '' } diff --git a/vlib/v/builder/js.v b/vlib/v/builder/js.v index fa06eefc31..6a5296b66a 100644 --- a/vlib/v/builder/js.v +++ b/vlib/v/builder/js.v @@ -11,7 +11,9 @@ pub fn (mut b Builder) gen_js(v_files []string) string { util.timing_start('PARSE') b.parsed_files = parser.parse_files(v_files, b.table, b.pref, b.global_scope) b.parse_imports() - util.timing_measure('PARSE') + util.get_timers().show('SCAN') + util.get_timers().show('PARSE') + util.get_timers().show_if_exists('PARSE stmt') // util.timing_start('CHECK') b.checker.check_files(b.parsed_files) diff --git a/vlib/v/builder/x64.v b/vlib/v/builder/x64.v index 5f1022966f..4e8accf50a 100644 --- a/vlib/v/builder/x64.v +++ b/vlib/v/builder/x64.v @@ -15,7 +15,9 @@ pub fn (mut b Builder) build_x64(v_files []string, out_file string) { util.timing_start('PARSE') b.parsed_files = parser.parse_files(v_files, b.table, b.pref, b.global_scope) b.parse_imports() - util.timing_measure('PARSE') + util.get_timers().show('SCAN') + util.get_timers().show('PARSE') + util.get_timers().show_if_exists('PARSE stmt') // util.timing_start('CHECK') b.checker.check_files(b.parsed_files) diff --git a/vlib/v/parser/fn.v b/vlib/v/parser/fn.v index 1ef666e6ae..2152f415b5 100644 --- a/vlib/v/parser/fn.v +++ b/vlib/v/parser/fn.v @@ -423,11 +423,13 @@ fn (mut p Parser) fn_receiver(mut params []table.Param, mut rec ReceiverParsingI if !rec.is_mut { rec.is_mut = p.tok.kind == .key_mut if rec.is_mut { - p.warn_with_pos('use `(mut f Foo)` instead of `(f mut Foo)`', lpar_pos.extend(p.peek_tok2.position())) + ptoken2 := p.peek_token(2) // needed to prevent codegen bug, where .position() expects &Token + p.warn_with_pos('use `(mut f Foo)` instead of `(f mut Foo)`', lpar_pos.extend(ptoken2.position())) } } if p.tok.kind == .key_shared { - p.error_with_pos('use `(shared f Foo)` instead of `(f shared Foo)`', lpar_pos.extend(p.peek_tok2.position())) + ptoken2 := p.peek_token(2) // needed to prevent codegen bug, where .position() expects &Token + p.error_with_pos('use `(shared f Foo)` instead of `(f shared Foo)`', lpar_pos.extend(ptoken2.position())) } rec.pos = rec_start_pos.extend(p.tok.position()) is_amp := p.tok.kind == .amp diff --git a/vlib/v/parser/for.v b/vlib/v/parser/for.v index be1e942684..1dcff1f17b 100644 --- a/vlib/v/parser/for.v +++ b/vlib/v/parser/for.v @@ -30,8 +30,8 @@ fn (mut p Parser) for_stmt() ast.Stmt { p.close_scope() return for_stmt } else if p.peek_tok.kind in [.decl_assign, .assign, .semicolon] - || p.tok.kind == .semicolon || (p.peek_tok.kind == .comma && p.peek_tok2.kind != .key_mut - && p.peek_tok3.kind != .key_in) { + || p.tok.kind == .semicolon || (p.peek_tok.kind == .comma + && p.peek_token(2).kind != .key_mut && p.peek_token(3).kind != .key_in) { // `for i := 0; i < 10; i++ {` or `for a,b := 0,1; a < 10; a++ {` if p.tok.kind == .key_mut { p.error('`mut` is not needed in `for ;;` loops: use `for i := 0; i < n; i ++ {`') @@ -43,8 +43,8 @@ fn (mut p Parser) for_stmt() ast.Stmt { mut has_init := false mut has_cond := false mut has_inc := false - mut is_multi := p.peek_tok.kind == .comma && p.peek_tok2.kind != .key_mut - && p.peek_tok3.kind != .key_in + mut is_multi := p.peek_tok.kind == .comma && p.peek_token(2).kind != .key_mut + && p.peek_token(3).kind != .key_in if p.peek_tok.kind in [.assign, .decl_assign] || is_multi { init = p.assign_stmt() has_init = true @@ -87,7 +87,7 @@ fn (mut p Parser) for_stmt() ast.Stmt { p.close_scope() return for_c_stmt } else if p.peek_tok.kind in [.key_in, .comma] - || (p.tok.kind == .key_mut && p.peek_tok2.kind in [.key_in, .comma]) { + || (p.tok.kind == .key_mut && p.peek_token(2).kind in [.key_in, .comma]) { // `for i in vals`, `for i in start .. end`, `for mut user in users`, `for i, mut user in users` mut val_is_mut := p.tok.kind == .key_mut mut_pos := p.tok.position() diff --git a/vlib/v/parser/if_match.v b/vlib/v/parser/if_match.v index c376eb5b8f..b0c7b2f8bd 100644 --- a/vlib/v/parser/if_match.v +++ b/vlib/v/parser/if_match.v @@ -182,8 +182,8 @@ fn (mut p Parser) match_expr() ast.MatchExpr { p.next() } else if (p.tok.kind == .name && !(p.tok.lit == 'C' && p.peek_tok.kind == .dot) && (p.tok.lit in table.builtin_type_names || p.tok.lit[0].is_capital() - || (p.peek_tok.kind == .dot && p.peek_tok2.lit.len > 0 - && p.peek_tok2.lit[0].is_capital()))) || p.tok.kind == .lsbr { + || (p.peek_tok.kind == .dot && p.peek_token(2).lit.len > 0 + && p.peek_token(2).lit[0].is_capital()))) || p.tok.kind == .lsbr { mut types := []table.Type{} for { // Sum type match diff --git a/vlib/v/parser/parse_type.v b/vlib/v/parser/parse_type.v index 47fe9357fd..08138cbdc5 100644 --- a/vlib/v/parser/parse_type.v +++ b/vlib/v/parser/parse_type.v @@ -55,7 +55,7 @@ pub fn (mut p Parser) parse_array_type() table.Type { } mut nr_dims := 1 // detect attr - not_attr := p.peek_tok.kind != .name && p.peek_tok2.kind !in [.semicolon, .rsbr] + not_attr := p.peek_tok.kind != .name && p.peek_token(2).kind !in [.semicolon, .rsbr] for p.tok.kind == .lsbr && not_attr { p.next() p.check(.rsbr) diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v index efc324bd1f..cde33d4978 100644 --- a/vlib/v/parser/parser.v +++ b/vlib/v/parser/parser.v @@ -30,8 +30,6 @@ mut: tok token.Token prev_tok token.Token peek_tok token.Token - peek_tok2 token.Token - peek_tok3 token.Token table &table.Table language table.Language inside_if bool @@ -85,6 +83,10 @@ pub fn parse_stmt(text string, table &table.Table, scope &ast.Scope) ast.Stmt { } } p.init_parse_fns() + util.timing_start('PARSE stmt') + defer { + util.timing_measure_cumulative('PARSE stmt') + } p.read_first_token() return p.stmt(false) } @@ -120,6 +122,13 @@ pub fn parse_text(text string, path string, table &table.Table, comments_mode sc return p.parse() } +[unsafe] +pub fn (mut p Parser) free() { + unsafe { + p.scanner.free() + } +} + pub fn (mut p Parser) set_path(path string) { p.file_name = path p.file_base = os.base(path) @@ -166,7 +175,7 @@ pub fn parse_vet_file(path string, table_ &table.Table, pref &pref.Preferences) parent: 0 } mut p := Parser{ - scanner: scanner.new_vet_scanner_file(path, .parse_comments, pref) + scanner: scanner.new_scanner_file(path, .parse_comments, pref) comments_mode: .parse_comments table: table_ pref: pref @@ -194,6 +203,10 @@ pub fn parse_vet_file(path string, table_ &table.Table, pref &pref.Preferences) } pub fn (mut p Parser) parse() ast.File { + util.timing_start('PARSE') + defer { + util.timing_measure_cumulative('PARSE') + } // comments_mode: comments_mode p.init_parse_fns() p.read_first_token() @@ -323,9 +336,6 @@ pub fn parse_files(paths []string, table &table.Table, pref &pref.Preferences, g } pub fn (mut p Parser) init_parse_fns() { - if p.comments_mode == .toplevel_comments { - p.scanner.scan_all_tokens_in_buffer() - } // p.prefix_parse_fns = make(100, 100, sizeof(PrefixParseFn)) // p.prefix_parse_fns[token.Kind.name] = parse_name } @@ -334,8 +344,11 @@ pub fn (mut p Parser) read_first_token() { // need to call next() 4 times to get peek token 1,2,3 and current token p.next() p.next() - p.next() - p.next() +} + +[inline] +pub fn (p &Parser) peek_token(n int) token.Token { + return p.scanner.peek_token(n - 2) } pub fn (mut p Parser) open_scope() { @@ -399,9 +412,7 @@ fn (mut p Parser) next_with_comment() { fn (mut p Parser) next() { p.prev_tok = p.tok p.tok = p.peek_tok - p.peek_tok = p.peek_tok2 - p.peek_tok2 = p.peek_tok3 - p.peek_tok3 = p.scanner.scan() + p.peek_tok = p.scanner.scan() /* if p.tok.kind==.comment { p.comments << ast.Comment{text:p.tok.lit, line_nr:p.tok.line_nr} @@ -1111,14 +1122,14 @@ fn (p &Parser) is_generic_call() bool { false } // use heuristics to detect `func()` from `var < expr` - return !lit0_is_capital && p.peek_tok.kind == .lt && (match p.peek_tok2.kind { + return !lit0_is_capital && p.peek_tok.kind == .lt && (match p.peek_token(2).kind { .name { // maybe `f`, `f`, assume `var < []` is invalid - p.peek_tok3.kind == .rsbr + p.peek_token(3).kind == .rsbr } else { false @@ -1205,7 +1216,7 @@ pub fn (mut p Parser) name_expr() ast.Expr { } } // Raw string (`s := r'hello \n ') - if p.peek_tok.kind == .string && !p.inside_str_interp && p.peek_tok2.kind != .colon { + if p.peek_tok.kind == .string && !p.inside_str_interp && p.peek_token(2).kind != .colon { if p.tok.lit in ['r', 'c', 'js'] && p.tok.kind == .name { return p.string_expr() } else { @@ -1233,11 +1244,11 @@ pub fn (mut p Parser) name_expr() ast.Expr { if p.tok.lit in p.imports { // mark the imported module as used p.register_used_import(p.tok.lit) - if p.peek_tok.kind == .dot && p.peek_tok2.kind != .eof && p.peek_tok2.lit.len > 0 - && p.peek_tok2.lit[0].is_capital() { + if p.peek_tok.kind == .dot && p.peek_token(2).kind != .eof + && p.peek_token(2).lit.len > 0 && p.peek_token(2).lit[0].is_capital() { is_mod_cast = true - } else if p.peek_tok.kind == .dot && p.peek_tok2.kind != .eof - && p.peek_tok2.lit.len == 0 { + } else if p.peek_tok.kind == .dot && p.peek_token(2).kind != .eof + && p.peek_token(2).lit.len == 0 { // incomplete module selector must be handled by dot_expr instead node = p.parse_ident(language) return node @@ -1362,7 +1373,7 @@ pub fn (mut p Parser) name_expr() ast.Expr { pos: p.tok.position() mod: mod } - } else if language == .js && p.peek_tok.kind == .dot && p.peek_tok2.kind == .name { + } else if language == .js && p.peek_tok.kind == .dot && p.peek_token(2).kind == .name { // JS. function call with more than 1 dot node = p.call_expr(language, mod) } else { @@ -2396,7 +2407,7 @@ fn (mut p Parser) top_level_statement_start() { p.scanner.set_is_inside_toplevel_statement(true) p.rewind_scanner_to_current_token_in_new_mode() $if debugscanner ? { - eprintln('>> p.top_level_statement_start | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...') + eprintln('>> p.top_level_statement_start | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit ${p.peek_token(2).lit} ${p.peek_token(3).lit} ...') } } } @@ -2406,14 +2417,14 @@ fn (mut p Parser) top_level_statement_end() { p.scanner.set_is_inside_toplevel_statement(false) p.rewind_scanner_to_current_token_in_new_mode() $if debugscanner ? { - eprintln('>> p.top_level_statement_end | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit $p.peek_tok2.lit $p.peek_tok3.lit ...') + eprintln('>> p.top_level_statement_end | tidx:${p.tok.tidx:-5} | p.tok.kind: ${p.tok.kind:-10} | p.tok.lit: $p.tok.lit $p.peek_tok.lit ${p.peek_token(2).lit} ${p.peek_token(3).lit} ...') } } } fn (mut p Parser) rewind_scanner_to_current_token_in_new_mode() { // Go back and rescan some tokens, ensuring that the parser's - // lookahead buffer p.peek_tok .. p.peek_tok3, will now contain + // lookahead buffer p.peek_tok .. p.peek_token(3), will now contain // the correct tokens (possible comments), for the new mode // This refilling of the lookahead buffer is needed for the // .toplevel_comments parsing mode. @@ -2423,8 +2434,6 @@ fn (mut p Parser) rewind_scanner_to_current_token_in_new_mode() { p.prev_tok = no_token p.tok = no_token p.peek_tok = no_token - p.peek_tok2 = no_token - p.peek_tok3 = no_token for { p.next() // eprintln('rewinding to ${p.tok.tidx:5} | goal: ${tidx:5}') diff --git a/vlib/v/parser/pratt.v b/vlib/v/parser/pratt.v index 0f6769b0ad..166e9659a3 100644 --- a/vlib/v/parser/pratt.v +++ b/vlib/v/parser/pratt.v @@ -149,7 +149,7 @@ pub fn (mut p Parser) expr(precedence int) ast.Expr { if p.expecting_type { // parse json.decode type (`json.decode([]User, s)`) node = p.name_expr() - } else if p.is_amp && p.peek_tok.kind == .rsbr && p.peek_tok3.kind != .lcbr { + } else if p.is_amp && p.peek_tok.kind == .rsbr && p.peek_token(3).kind != .lcbr { pos := p.tok.position() typ := p.parse_type().to_ptr() p.check(.lpar) @@ -369,7 +369,7 @@ pub fn (mut p Parser) expr_with_left(left ast.Expr, precedence int, is_stmt_iden } else if p.tok.kind.is_infix() { if p.tok.kind.is_prefix() && p.tok.line_nr != p.prev_tok.line_nr { // return early for deref assign `*x = 2` goes to prefix expr - if p.tok.kind == .mul && p.peek_tok2.kind == .assign { + if p.tok.kind == .mul && p.peek_token(2).kind == .assign { return node } // added 10/2020: LATER this will be parsed as PrefixExpr instead diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index feaf1eab05..5ac872be07 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -67,8 +67,7 @@ no reason to complain about them. When the parser determines, that it is outside of a top level statement, it tells the scanner to backtrack s.tidx to the current p.tok index, then it changes .is_inside_toplvl_statement to false , and refills its -lookahead buffer (i.e. p.peek_tok, p.peek_tok2, p.peek_tok3) from the -scanner. +lookahead buffer (i.e. p.peek_tok), from the scanner. In effect, from the parser's point of view, the next tokens, that it will receive with p.next(), will be the same, as if comments are not ignored @@ -98,10 +97,6 @@ pub enum CommentsMode { // new scanner from file. pub fn new_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner { - return new_vet_scanner_file(file_path, comments_mode, pref) -} - -pub fn new_vet_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner { if !os.exists(file_path) { verror("$file_path doesn't exist") } @@ -109,19 +104,24 @@ pub fn new_vet_scanner_file(file_path string, comments_mode CommentsMode, pref & verror(err) return voidptr(0) } - mut s := new_vet_scanner(raw_text, comments_mode, pref) - s.file_path = file_path - s.file_base = os.base(file_path) + mut s := &Scanner{ + pref: pref + text: raw_text + is_print_line_on_error: true + is_print_colored_error: true + is_print_rel_paths_on_error: true + is_fmt: pref.is_fmt + comments_mode: comments_mode + file_path: file_path + file_base: os.base(file_path) + } + s.init_scanner() return s } // new scanner from string. pub fn new_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner { - return new_vet_scanner(text, comments_mode, pref) -} - -pub fn new_vet_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner { - return &Scanner{ + mut s := &Scanner{ pref: pref text: text is_print_line_on_error: true @@ -132,6 +132,21 @@ pub fn new_vet_scanner(text string, comments_mode CommentsMode, pref &pref.Prefe file_path: 'internal_memory' file_base: 'internal_memory' } + s.init_scanner() + return s +} + +fn (mut s Scanner) init_scanner() { + util.get_timers().measure_pause('PARSE') + s.scan_all_tokens_in_buffer(s.comments_mode) + util.get_timers().measure_resume('PARSE') +} + +[unsafe] +pub fn (mut s Scanner) free() { + unsafe { + s.text.free() + } } [inline] @@ -166,6 +181,18 @@ fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Tok } } +[inline] +fn (s &Scanner) new_eof_token() token.Token { + return token.Token{ + kind: .eof + lit: '' + line_nr: s.line_nr + 1 + pos: s.pos + len: 1 + tidx: s.tidx + } +} + [inline] fn (mut s Scanner) new_multiline_token(tok_kind token.Kind, lit string, len int, start_line int) token.Token { cidx := s.tidx @@ -487,22 +514,20 @@ fn (mut s Scanner) end_of_file() token.Token { s.inc_line_number() } s.pos = s.text.len - return s.new_token(.eof, '', 1) + return s.new_eof_token() } -pub fn (mut s Scanner) scan_all_tokens_in_buffer() { +pub fn (mut s Scanner) scan_all_tokens_in_buffer(mode CommentsMode) { // s.scan_all_tokens_in_buffer is used mainly by vdoc, // in order to implement the .toplevel_comments mode. - cmode := s.comments_mode - s.comments_mode = .parse_comments - for { - t := s.text_scan() - s.all_tokens << t - if t.kind == .eof { - break - } + util.timing_start('SCAN') + defer { + util.timing_measure_cumulative('SCAN') } - s.comments_mode = cmode + oldmode := s.comments_mode + s.comments_mode = mode + s.scan_remaining_text() + s.comments_mode = oldmode s.tidx = 0 $if debugscanner ? { for t in s.all_tokens { @@ -511,11 +536,21 @@ pub fn (mut s Scanner) scan_all_tokens_in_buffer() { } } -pub fn (mut s Scanner) scan() token.Token { - if s.comments_mode == .toplevel_comments { - return s.buffer_scan() +pub fn (mut s Scanner) scan_remaining_text() { + for { + t := s.text_scan() + if s.comments_mode == .skip_comments && t.kind == .comment { + continue + } + s.all_tokens << t + if t.kind == .eof { + break + } } - return s.text_scan() +} + +pub fn (mut s Scanner) scan() token.Token { + return s.buffer_scan() } pub fn (mut s Scanner) buffer_scan() token.Token { @@ -536,7 +571,17 @@ pub fn (mut s Scanner) buffer_scan() token.Token { } [inline] -fn (s Scanner) look_ahead(n int) byte { +pub fn (s &Scanner) peek_token(n int) token.Token { + idx := s.tidx + n + if idx >= s.all_tokens.len { + return s.new_eof_token() + } + t := s.all_tokens[idx] + return t +} + +[inline] +fn (s &Scanner) look_ahead(n int) byte { if s.pos + n < s.text.len { return s.text[s.pos + n] } else { @@ -1292,14 +1337,19 @@ pub fn verror(s string) { } pub fn (mut s Scanner) codegen(newtext string) { + $if debug_codegen ? { + eprintln('scanner.codegen:\n $newtext') + } // codegen makes sense only during normal compilation // feeding code generated V code to vfmt or vdoc will // cause them to output/document ephemeral stuff. if s.comments_mode == .skip_comments { + s.all_tokens.delete_last() // remove .eof from end of .all_tokens s.text += newtext - $if debug_codegen ? { - eprintln('scanner.codegen:\n $newtext') - } + old_tidx := s.tidx + s.tidx = s.all_tokens.len + s.scan_remaining_text() + s.tidx = old_tidx } } diff --git a/vlib/v/util/timers.v b/vlib/v/util/timers.v index 64f3e43cec..60ae92b60d 100644 --- a/vlib/v/util/timers.v +++ b/vlib/v/util/timers.v @@ -33,13 +33,18 @@ pub fn timing_measure(label string) { get_timers().show(label) } +pub fn timing_measure_cumulative(label string) { + get_timers().measure_cumulative(label) +} + pub fn timing_set_should_print(should_print bool) { mut t := util.timers t.should_print = should_print } pub fn (mut t Timers) start(name string) { - sw := time.new_stopwatch({}) + mut sw := t.swatches[name] or { time.new_stopwatch({}) } + sw.start() t.swatches[name] = sw } @@ -54,6 +59,35 @@ pub fn (mut t Timers) measure(name string) i64 { return ms } +pub fn (mut t Timers) measure_cumulative(name string) i64 { + ms := t.measure(name) + if name !in t.swatches { + return ms + } + mut sw := t.swatches[name] + sw.pause() + t.swatches[name] = sw + return ms +} + +pub fn (mut t Timers) measure_pause(name string) { + if name !in t.swatches { + return + } + mut sw := t.swatches[name] + sw.pause() + t.swatches[name] = sw +} + +pub fn (mut t Timers) measure_resume(name string) { + if name !in t.swatches { + return + } + mut sw := t.swatches[name] + sw.start() + t.swatches[name] = sw +} + pub fn (mut t Timers) message(name string) string { ms := f64(t.measure(name)) / 1000.0 value := bold('${ms:-8.3f}') @@ -68,6 +102,13 @@ pub fn (mut t Timers) show(label string) { } } +pub fn (mut t Timers) show_if_exists(label string) { + if label !in t.swatches { + return + } + t.show(label) +} + pub fn (mut t Timers) dump_all() { for k, _ in t.swatches { elapsed := t.message(k) diff --git a/vlib/x/json2/decoder.v b/vlib/x/json2/decoder.v index 1c749f06b0..b88d4be3d2 100644 --- a/vlib/x/json2/decoder.v +++ b/vlib/x/json2/decoder.v @@ -81,7 +81,7 @@ fn new_parser(srce string, convert_type bool) Parser { } } return Parser{ - scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{}) + scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{output_mode: .silent}) convert_type: convert_type } }