From 8b8cd139294b57c4331264e0d72a6cbedf45c061 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Sun, 29 Sep 2019 04:33:23 +0300 Subject: [PATCH] parser: add some infrastructure for more specific errors * compiler: change s.line_nr in just one place, so that s.last_nl_pos will be updated in tandem too. * Cleanup spurious spaces. * Store ScannerPos info inside the cached tokens. Use the stored information when errors are encountered. * Fix #2079 ( cannot use type...in assignment ). * do not store scannerpos per each token, instead rescan the source once on error to get the position. * compiler: implement highlighting for errors. Use only line/col info stored in the cached tokens. * fixing building on windows * Split can_show_color to _nix and _win files. --- compiler/fn.v | 26 +++---- compiler/parser.v | 111 +++++++++++++++----------- compiler/scanner.v | 137 +++++++++++++++++++-------------- compiler/table.v | 2 +- vlib/term/can_show_color_nix.v | 11 +++ vlib/term/can_show_color_win.v | 10 +++ 6 files changed, 181 insertions(+), 116 deletions(-) create mode 100644 vlib/term/can_show_color_nix.v create mode 100644 vlib/term/can_show_color_win.v diff --git a/compiler/fn.v b/compiler/fn.v index 219e02020f..8ba5779a89 100644 --- a/compiler/fn.v +++ b/compiler/fn.v @@ -33,7 +33,7 @@ mut: is_decl bool // type myfn fn(int, int) defer_text []string //gen_types []string - fn_name_sp ScannerPos + fn_name_token Tok } fn (p &Parser) find_var(name string) ?Var { @@ -110,9 +110,8 @@ fn (p mut Parser) known_var(name string) bool { fn (p mut Parser) register_var(v Var) { mut new_var := {v | idx: p.var_idx, scope_level: p.cur_fn.scope_level} if v.line_nr == 0 { - spos := p.scanner.get_scanner_pos() - new_var.scanner_pos = spos - new_var.line_nr = spos.line_nr + new_var.token = p.cur_tok() + new_var.line_nr = new_var.token.line_nr } // Expand the array if p.var_idx >= p.local_vars.len { @@ -214,7 +213,7 @@ fn (p mut Parser) fn_decl() { ref: is_amp ptr: is_mut line_nr: p.scanner.line_nr - scanner_pos: p.scanner.get_scanner_pos() + token: p.cur_tok() } f.args << receiver p.register_var(receiver) @@ -227,7 +226,7 @@ fn (p mut Parser) fn_decl() { else { f.name = p.check_name() } - f.fn_name_sp = p.scanner.get_scanner_pos() + f.fn_name_token = p.cur_tok() // C function header def? (fn C.NSMakeRect(int,int,int,int)) is_c := f.name == 'C' && p.tok == .dot // Just fn signature? only builtin.v + default build mode @@ -335,7 +334,7 @@ fn (p mut Parser) fn_decl() { // Special case for main() args if f.name == 'main__main' && !has_receiver { if str_args != '' || typ != 'void' { - p.error_with_position('fn main must have no arguments and no return values', f.fn_name_sp) + p.error_with_tok('fn main must have no arguments and no return values', f.fn_name_token) } } dll_export_linkage := if p.os == .msvc && p.attr == 'live' && p.pref.is_so { @@ -440,7 +439,7 @@ fn (p mut Parser) fn_decl() { if f.name == 'main__main' || f.name == 'main' || f.name == 'WinMain' { if p.pref.is_test && !p.scanner.file_path.contains('/volt') { - p.error_with_position('tests cannot have function `main`', f.fn_name_sp) + p.error_with_tok('tests cannot have function `main`', f.fn_name_token) } } // println('is_c=$is_c name=$f.name') @@ -473,7 +472,7 @@ fn (p mut Parser) fn_decl() { } } if typ != 'void' && !p.returns { - p.error_with_position('$f.name must return "$typ"', f.fn_name_sp) + p.error_with_tok('$f.name must return "$typ"', f.fn_name_token) } if p.attr == 'live' && p.pref.is_so { //p.genln('// live_function body end') @@ -536,10 +535,10 @@ fn (p mut Parser) check_unused_variables() { break } if !var.is_used && !p.pref.is_repl && !var.is_arg && !p.pref.translated { - p.production_error('`$var.name` declared and not used', var.scanner_pos ) + p.production_error_with_token('`$var.name` declared and not used', var.token ) } if !var.is_changed && var.is_mut && !p.pref.is_repl && !p.pref.translated { - p.error_with_position( '`$var.name` is declared as mutable, but it was never changed', var.scanner_pos ) + p.error_with_tok( '`$var.name` is declared as mutable, but it was never changed', var.token ) } } } @@ -699,6 +698,7 @@ fn (p mut Parser) fn_args(f mut Fn) { if f.is_interface { int_arg := Var { typ: f.receiver_typ + token: p.cur_tok() } f.args << int_arg } @@ -714,7 +714,7 @@ fn (p mut Parser) fn_args(f mut Fn) { is_arg: true // is_mut: is_mut line_nr: p.scanner.line_nr - scanner_pos: p.scanner.get_scanner_pos() + token: p.cur_tok() } // f.register_var(v) f.args << v @@ -757,7 +757,7 @@ fn (p mut Parser) fn_args(f mut Fn) { is_mut: is_mut ptr: is_mut line_nr: p.scanner.line_nr - scanner_pos: p.scanner.get_scanner_pos() + token: p.cur_tok() } p.register_var(v) f.args << v diff --git a/compiler/parser.v b/compiler/parser.v index 6b9a8e4785..982a7a241e 100644 --- a/compiler/parser.v +++ b/compiler/parser.v @@ -12,11 +12,11 @@ import ( // TODO rename to Token // TODO rename enum Token to TokenType struct Tok { - tok Token - lit string - line_nr int + tok Token // the token number/enum; for quick comparisons + lit string // literal representation of the token + line_nr int // the line number in the source where the token occured name_idx int // name table index for O(1) lookup - col int + col int // the column where the token ends } struct Parser { @@ -154,6 +154,7 @@ fn (v mut V) new_parser(scanner &Scanner, id string) Parser { } if p.pref.is_repl { p.scanner.should_print_line_on_error = false + p.scanner.should_print_errors_in_color = false } v.cgen.line_directives = v.pref.is_debuggable // v.cgen.file = path @@ -163,11 +164,11 @@ fn (v mut V) new_parser(scanner &Scanner, id string) Parser { fn (p mut Parser) scan_tokens() { for { res := p.scanner.scan() - p.tokens << Tok { + p.tokens << Tok{ tok: res.tok lit: res.lit line_nr: p.scanner.line_nr - col: p.scanner.pos - p.scanner.last_nl_pos + col: p.scanner.pos - p.scanner.last_nl_pos } if res.tok == .eof { break @@ -216,7 +217,7 @@ fn (p &Parser) cur_tok() Tok { fn (p &Parser) peek_token() Tok { if p.token_idx >= p.tokens.len - 2 { - return Tok{tok:Token.eof} + return Tok{ tok:Token.eof } } tok := p.tokens[p.token_idx] return tok @@ -886,30 +887,13 @@ if p.scanner.line_comment != '' { } } +///////////////////////////////////////////////////////////////// fn (p &Parser) warn(s string) { - println('warning: $p.scanner.file_path:${p.scanner.line_nr+1}: $s') + e := normalized_error( s ) + println('warning: $p.scanner.file_path:${p.scanner.line_nr+1}: $e') } - -fn (p mut Parser) error_with_position(e string, sp ScannerPos) { - p.scanner.goto_scanner_position( sp ) - p.error( e ) -} - -fn (p mut Parser) production_error(e string, sp ScannerPos) { - if p.pref.is_prod { - p.scanner.goto_scanner_position( sp ) - p.error( e ) - }else { - // on a warning, restore the scanner state after printing the warning: - cpos := p.scanner.get_scanner_pos() - p.scanner.goto_scanner_position( sp ) - p.warn(e) - p.scanner.goto_scanner_position( cpos ) - } -} - -fn (p mut Parser) error(s string) { +fn (p mut Parser) print_error_context(){ // Dump all vars and types for debugging if p.pref.is_debug { // os.write_to_file('/var/tmp/lang.types', '')//pes(p.table.types)) @@ -935,14 +919,53 @@ fn (p mut Parser) error(s string) { print_backtrace() } // p.scanner.debug_tokens() - // Print `[]int` instead of `array_int` in errors - e := s.replace('array_', '[]') - .replace('__', '.') - .replace('Option_', '?') - .replace('main.', '') - p.scanner.error_with_col(e, p.tokens[p.token_idx-1].col) } +fn normalized_error( s string ) string { + // Print `[]int` instead of `array_int` in errors + return s.replace('array_', '[]') + .replace('__', '.') + .replace('Option_', '?') + .replace('main.', '') +} + +fn (p mut Parser) error_with_position(s string, sp ScannerPos) { + p.print_error_context() + e := normalized_error( s ) + p.scanner.goto_scanner_position( sp ) + p.scanner.error_with_col(e, sp.pos - sp.last_nl_pos) +} + +fn (p mut Parser) warn_with_position(e string, sp ScannerPos) { + // on a warning, restore the scanner state after printing the warning: + cpos := p.scanner.get_scanner_pos() + p.scanner.goto_scanner_position( sp ) + p.warn(e) + p.scanner.goto_scanner_position( cpos ) +} + +fn (p mut Parser) production_error_with_token(e string, tok Tok) { + if p.pref.is_prod { + p.error_with_tok( e, tok ) + }else { + p.warn_with_token( e, tok ) + } +} + +fn (p &Parser) warn_with_token(s string, tok Tok) { + e := normalized_error( s ) + println('warning: $p.scanner.file_path:${tok.line_nr+1}:${tok.col}: $e') +} +fn (p mut Parser) error_with_tok(s string, tok Tok) { + p.error_with_position(s, p.scanner.get_scanner_pos_of_token(tok) ) +} + +fn (p mut Parser) error(s string) { + // no positioning info, so just assume that the last token was the culprit: + p.error_with_tok(s, p.tokens[p.token_idx-1] ) +} +///////////////////////////////////////////////////////////////// + fn (p &Parser) first_pass() bool { return p.pass == .decl } @@ -1382,6 +1405,7 @@ fn (p mut Parser) statement(add_semi bool) string { // is_map: are we in map assignment? (m[key] = val) if yes, dont generate '=' // this can be `user = ...` or `user.field = ...`, in both cases `v` is `user` fn (p mut Parser) assign_statement(v Var, ph int, is_map bool) { + errtok := p.cur_tok() //p.log('assign_statement() name=$v.name tok=') is_vid := p.fileis('vid') // TODO remove tok := p.tok @@ -1436,8 +1460,7 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) { p.cgen.resetln(left + 'opt_ok($expr, sizeof($typ))') } else if !p.builtin_mod && !p.check_types_no_throw(expr_type, p.assigned_type) { - p.scanner.line_nr-- - p.error('cannot use type `$expr_type` as type `$p.assigned_type` in assignment') + p.error_with_tok( 'cannot use type `$expr_type` as type `$p.assigned_type` in assignment', errtok) } if (is_str || is_ustr) && tok == .plus_assign && !p.is_js { p.gen(')') @@ -1488,7 +1511,7 @@ fn (p mut Parser) var_decl() { } typ := types[i] // println('var decl tok=${p.strtok()} ismut=$is_mut') - var_scanner_pos := p.scanner.get_scanner_pos() + var_token := p.cur_tok() // name := p.check_name() // p.var_decl_name = name // Don't allow declaring a variable with the same name. Even in a child scope @@ -1515,8 +1538,8 @@ fn (p mut Parser) var_decl() { typ: typ is_mut: is_mut is_alloc: p.is_alloc || typ.starts_with('array_') - scanner_pos: var_scanner_pos - line_nr: var_scanner_pos.line_nr + line_nr: var_token.line_nr + token: var_token }) //if p.fileis('str.v') { //if p.is_alloc { println('REG VAR IS ALLOC $name') } @@ -3783,7 +3806,7 @@ fn (p &Parser) prepend_mod(name string) string { fn (p mut Parser) go_statement() { p.check(.key_go) - mut gopos := p.scanner.get_scanner_pos() + mut gotoken := p.cur_tok() // TODO copypasta of name_expr() ? if p.peek() == .dot { // Method @@ -3792,12 +3815,12 @@ fn (p mut Parser) go_statement() { return } p.mark_var_used(v) - gopos = p.scanner.get_scanner_pos() + gotoken = p.cur_tok() p.next() p.check(.dot) typ := p.table.find_type(v.typ) method := p.table.find_method(typ, p.lit) or { - p.error_with_position('go method missing $var_name', gopos) + p.error_with_tok('go method missing $var_name', gotoken) return } p.async_fn_call(method, 0, var_name, v.typ) @@ -3807,11 +3830,11 @@ fn (p mut Parser) go_statement() { // Normal function f := p.table.find_fn(p.prepend_mod(f_name)) or { println( p.table.debug_fns() ) - p.error_with_position('can not find function $f_name', gopos) + p.error_with_tok('can not find function $f_name', gotoken) return } if f.name == 'println' || f.name == 'print' { - p.error_with_position('`go` cannot be used with `println`', gopos) + p.error_with_tok('`go` cannot be used with `println`', gotoken) } p.async_fn_call(f, 0, '', '') } diff --git a/compiler/scanner.v b/compiler/scanner.v index f9404e5843..b8bc2f1708 100644 --- a/compiler/scanner.v +++ b/compiler/scanner.v @@ -7,11 +7,14 @@ module main import ( os strings + term ) const ( single_quote = `\'` double_quote = `"` + error_context_before = 2 // how many lines of source context to print before the pointer line + error_context_after = 2 // ^^^ same, but after ) struct Scanner { @@ -34,7 +37,9 @@ mut: prev_tok Token fn_name string // needed for @FN should_print_line_on_error bool + should_print_errors_in_color bool quote byte // which quote is used to denote current string: ' or " + file_lines []string // filled *only on error* by rescanning the source till the error (and several lines more) } // new scanner from file. @@ -71,6 +76,7 @@ fn new_scanner(text string) &Scanner { text: text fmt_out: strings.new_builder(1000) should_print_line_on_error: true + should_print_errors_in_color: true } } @@ -98,6 +104,48 @@ fn (s mut Scanner) goto_scanner_position(scp ScannerPos) { s.last_nl_pos = scp.last_nl_pos } +// get_scanner_pos_of_token rescans *the whole source* till it reaches {t.line_nr, t.col} . +fn (s mut Scanner) get_scanner_pos_of_token(t Tok) ScannerPos { + // This rescanning is done just once on error, so it is fine for now. + // Be careful for the performance implications, if you want to + // do it more frequently. The alternative would be to store + // the scanpos (12 bytes) for each token, and there are potentially many tokens. + tline := t.line_nr + tcol := if t.line_nr == 0 { t.col + 1 } else { t.col - 1 } + // save the current scanner position, it will be restored later + cpos := s.get_scanner_pos() + mut sptoken := ScannerPos{} + // Starting from the start, scan the source lines + // till the desired tline is reached, then + // s.pos + tcol would be the proper position + // of the token. Continue scanning for some more lines of context too. + s.goto_scanner_position(ScannerPos{}) + s.file_lines = []string + mut prevlinepos := 0 + for { + prevlinepos = s.pos + if s.pos >= s.text.len { break } + if s.line_nr > tline + 10 { break } + //////////////////////////////////////// + if tline == s.line_nr { + sptoken = s.get_scanner_pos() + sptoken.pos += tcol + } + s.ignore_line() s.eat_single_newline() + sline := s.text.substr( prevlinepos, s.pos ).trim_right('\r\n') + s.file_lines << sline + } + ////////////////////////////////////////////////// + s.goto_scanner_position(cpos) + return sptoken +} +fn (s mut Scanner) eat_single_newline(){ + if s.pos >= s.text.len { return } + if s.expect('\r\n', s.pos) { s.pos += 2 return } + if s.text[ s.pos ] == `\n` { s.pos ++ return } + if s.text[ s.pos ] == `\r` { s.pos ++ return } +} + // TODO remove once multiple return values are implemented struct ScanRes { @@ -600,83 +648,56 @@ fn (s mut Scanner) scan() ScanRes { return scan_res(.eof, '') } -fn (s &Scanner) find_current_line_start_position() int { - if s.pos >= s.text.len { return s.pos } - mut linestart := s.pos - for { - if linestart <= 0 { - linestart = 1 - break - } - if s.text[linestart] == 10 || s.text[linestart] == 13 { - linestart++ - break - } - linestart-- - } - return linestart -} - -fn (s &Scanner) find_current_line_end_position() int { - if s.pos >= s.text.len { return s.pos } - mut lineend := s.pos - for { - if lineend >= s.text.len { - lineend = s.text.len - break - } - if s.text[lineend] == 10 || s.text[lineend] == 13 { - break - } - lineend++ - } - return lineend -} - fn (s &Scanner) current_column() int { - return s.pos - s.find_current_line_start_position() + return s.pos - s.last_nl_pos } +fn imax(a,b int) int { return if a > b { a } else { b } } +fn imin(a,b int) int { return if a < b { a } else { b } } fn (s &Scanner) error(msg string) { s.error_with_col(msg, 0) } fn (s &Scanner) error_with_col(msg string, col int) { - column := col-1 - linestart := s.find_current_line_start_position() - lineend := s.find_current_line_end_position() - - fullpath := os.realpath( s.file_path ) + fullpath := os.realpath( s.file_path ) + color_on := s.should_print_errors_in_color && term.can_show_color() + final_message := if color_on { term.red( term.bold( msg ) ) } else { msg } // The filepath:line:col: format is the default C compiler // error output format. It allows editors and IDE's like // emacs to quickly find the errors in the output // and jump to their source with a keyboard shortcut. // Using only the filename leads to inability of IDE/editors // to find the source file, when it is in another folder. - //println('${s.file_path}:${s.line_nr + 1}:${column+1}: $msg') - println('${fullpath}:${s.line_nr + 1}:${column+1}: $msg') - - if s.should_print_line_on_error && lineend > linestart { - line := s.text.substr( linestart, lineend ) - // The pointerline should have the same spaces/tabs as the offending - // line, so that it prints the ^ character exactly on the *same spot* - // where it is needed. That is the reason we can not just - // use strings.repeat(` `, column) to form it. - pointerline := line.clone() - mut pl := pointerline.str - for i,c in line { - pl[i] = ` ` - if i == column { pl[i] = `^` } - else if c.is_space() { pl[i] = c } + eprintln('${fullpath}:${s.line_nr + 1}:${col}: $final_message') + + if s.should_print_line_on_error && s.file_lines.len > 0 { + context_start_line := imax(0, (s.line_nr - error_context_before + 1 )) + context_end_line := imin(s.file_lines.len, (s.line_nr + error_context_after + 1 )) + for cline := context_start_line; cline < context_end_line; cline++ { + line := '${(cline+1):5d}| ' + s.file_lines[ cline ] + coloredline := if cline == s.line_nr && color_on { term.red(line) } else { line } + println( coloredline ) + if cline != s.line_nr { continue } + // The pointerline should have the same spaces/tabs as the offending + // line, so that it prints the ^ character exactly on the *same spot* + // where it is needed. That is the reason we can not just + // use strings.repeat(` `, col) to form it. + mut pointerline := []string + for i , c in line { + if i < col { + x := if c.is_space() { c } else { ` ` } + pointerline << x.str() + continue + } + pointerline << if color_on { term.bold( term.blue('^') ) } else { '^' } + break + } + println( ' ' + pointerline.join('') ) } - println(line) - println(pointerline) } - exit(1) } - fn (s Scanner) count_symbol_before(p int, sym byte) int { mut count := 0 for i:=p; i>=0; i-- { diff --git a/compiler/table.v b/compiler/table.v index 4bf4b4ca82..a9a0cabdaf 100644 --- a/compiler/table.v +++ b/compiler/table.v @@ -96,8 +96,8 @@ mut: scope_level int is_c bool // todo remove once `typ` is `Type`, not string is_moved bool - scanner_pos ScannerPos // TODO: use only scanner_pos, remove line_nr line_nr int + token Tok // TODO: use only var.token.line_nr, remove var.line_nr } struct Type { diff --git a/vlib/term/can_show_color_nix.v b/vlib/term/can_show_color_nix.v new file mode 100644 index 0000000000..f9f7a50ec8 --- /dev/null +++ b/vlib/term/can_show_color_nix.v @@ -0,0 +1,11 @@ +module term + +import os + +fn C.isatty(int) int + +pub fn can_show_color() bool { + if os.getenv('TERM') == 'dumb' { return false } + if C.isatty(1) != 0 { return true } + return false +} diff --git a/vlib/term/can_show_color_win.v b/vlib/term/can_show_color_win.v new file mode 100644 index 0000000000..b30730552e --- /dev/null +++ b/vlib/term/can_show_color_win.v @@ -0,0 +1,10 @@ +module term + +import os + +// TODO: implement proper checking on windows too. +// For now, just return false by default +pub fn can_show_color() bool { + if os.getenv('TERM') == 'dumb' { return false } + return false +}