From 32550c6d6956d53de6f06c1269e42a83c0b50ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=A4schle?= Date: Fri, 10 Apr 2020 00:09:34 +0200 Subject: [PATCH] scanner: even better error messages --- vlib/v/ast/ast.v | 1 + vlib/v/checker/checker.v | 44 ++++++++++- vlib/v/parser/parser.v | 18 ++++- vlib/v/scanner/scanner.v | 161 ++++++++++++++++++++------------------- vlib/v/token/position.v | 2 + vlib/v/token/token.v | 1 + vlib/v/util/errors.v | 7 +- 7 files changed, 148 insertions(+), 86 deletions(-) diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v index 1645c65e12..5737e70490 100644 --- a/vlib/v/ast/ast.v +++ b/vlib/v/ast/ast.v @@ -43,6 +43,7 @@ pub: pub struct IntegerLiteral { pub: val string + pos token.Position } pub struct FloatLiteral { diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v index 26a32708b2..da48e1e244 100644 --- a/vlib/v/checker/checker.v +++ b/vlib/v/checker/checker.v @@ -567,7 +567,11 @@ pub fn (c mut Checker) enum_decl(decl ast.EnumDecl) { ast.IntegerLiteral {} ast.PrefixExpr {} else { - c.error("default value for enum has to be an integer", field.pos) + mut pos := expr_pos(expr) + if pos.pos == 0 { + pos = field.pos + } + c.error("default value for enum has to be an integer", pos) } } } @@ -1036,6 +1040,42 @@ pub fn (c mut Checker) expr(node ast.Expr) table.Type { return table.void_type } +fn expr_pos(node ast.Expr) token.Position { + // all uncommented have to be implemented + match mut node { + ast.ArrayInit { return it.pos } + ast.AsCast { return it.pos } + ast.AssignExpr { return it.pos } + ast.Assoc { return it.pos } + // ast.BoolLiteral { } + // ast.CastExpr { } + ast.CallExpr { return it.pos } + // ast.CharLiteral { } + ast.EnumVal { return it.pos } + // ast.FloatLiteral { } + // ast.Ident { } + ast.IfExpr { return it.pos } + // ast.IfGuardExpr { } + ast.IndexExpr { return it.pos } + ast.InfixExpr { return it.pos } + ast.IntegerLiteral { return it.pos } + ast.MapInit { return it.pos } + ast.MatchExpr { return it.pos } + ast.PostfixExpr { return it.pos } + ast.PrefixExpr { return it.pos } + // ast.None { } + // ast.ParExpr { } + ast.SelectorExpr { return it.pos } + // ast.SizeOf { } + // ast.StringLiteral { } + // ast.StringInterLiteral { } + ast.StructInit { return it.pos } + // ast.Type { } + // ast.TypeOf { } + else { return token.Position{} } + } +} + pub fn (c mut Checker) ident(ident mut ast.Ident) table.Type { if ident.name == c.var_decl_name { // c.checked_ident { c.error('unresolved: `$ident.name`', ident.pos) @@ -1384,7 +1424,7 @@ fn (c mut Checker) warn_or_error(s string, pos token.Position, warn bool) { } typ := if warn { 'warning' } else { 'error' } kind := if c.pref.is_verbose { 'checker $typ #$c.nr_errors:' } else { '$typ:' } - ferror := util.formated_error(kind, s, c.file.path, pos) + ferror := util.formatted_error(kind, s, c.file.path, pos) c.errors << ferror if !(pos.line_nr in c.error_lines) { if warn { diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v index 88e96f5266..819085e2ec 100644 --- a/vlib/v/parser/parser.v +++ b/vlib/v/parser/parser.v @@ -516,13 +516,13 @@ pub fn (p &Parser) error(s string) { print_backtrace() kind = 'parser error:' } - ferror := util.formated_error(kind, s, p.file_name, p.tok.position()) + ferror := util.formatted_error(kind, s, p.file_name, p.tok.position()) eprintln(ferror) exit(1) } pub fn (p &Parser) warn(s string) { - ferror := util.formated_error('warning:', s, p.file_name, p.tok.position()) + ferror := util.formatted_error('warning:', s, p.file_name, p.tok.position()) eprintln(ferror) } @@ -1278,6 +1278,7 @@ fn (p mut Parser) string_expr() ast.Expr { } fn (p mut Parser) array_init() ast.ArrayInit { + first_pos := p.tok.position() p.check(.lsbr) // p.warn('array_init() exp=$p.expected_type') mut array_type := table.void_type @@ -1320,11 +1321,18 @@ fn (p mut Parser) array_init() ast.ArrayInit { if p.tok.kind == .not { p.next() } + last_pos := p.tok.position() + len := last_pos.pos - first_pos.pos + pos := token.Position{ + line_nr: first_pos.line_nr, + pos: first_pos.pos + len: len + } return ast.ArrayInit{ elem_type: elem_type typ: array_type exprs: exprs - pos: p.tok.position() + pos: pos } } @@ -1352,6 +1360,7 @@ fn (p mut Parser) map_init() ast.MapInit { fn (p mut Parser) parse_number_literal() ast.Expr { lit := p.tok.lit + pos := p.tok.position() mut node := ast.Expr{} if lit.index_any('.eE') >= 0 { node = ast.FloatLiteral{ @@ -1360,6 +1369,7 @@ fn (p mut Parser) parse_number_literal() ast.Expr { } else { node = ast.IntegerLiteral{ val: lit + pos: pos } } p.next() @@ -1851,8 +1861,8 @@ fn (p mut Parser) enum_decl() ast.EnumDecl { // mut default_exprs := []ast.Expr mut fields := []ast.EnumField for p.tok.kind != .eof && p.tok.kind != .rcbr { - val := p.check_name() pos := p.tok.position() + val := p.check_name() vals << val mut exprs := []ast.Expr // p.warn('enum val $val') diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 12a1b57e62..a8985c223d 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -77,12 +77,13 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner { } } -fn (s &Scanner) new_token(tok_kind token.Kind, lit string) token.Token { +fn (s &Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token { return token.Token{ kind: tok_kind lit: lit line_nr: s.line_nr + 1 - pos: s.pos + pos: s.pos - len + 1 + len: len } } @@ -333,7 +334,7 @@ fn (s mut Scanner) skip_whitespace() { fn (s mut Scanner) end_of_file() token.Token { s.pos = s.text.len s.inc_line_number() - return s.new_token(.eof, '') + return s.new_token(.eof, '', 1) } pub fn (s mut Scanner) scan() token.Token { @@ -358,10 +359,11 @@ pub fn (s mut Scanner) scan() token.Token { if s.is_inter_end { if s.text[s.pos] == s.quote { s.is_inter_end = false - return s.new_token(.string, '') + return s.new_token(.string, '', 1) } s.is_inter_end = false - return s.new_token(.string, s.ident_string()) + ident_string := s.ident_string() + return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes } s.skip_whitespace() // end of file @@ -379,7 +381,7 @@ pub fn (s mut Scanner) scan() token.Token { // Check if not .eof to prevent panic next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` } if token.is_key(name) { - return s.new_token(token.key_to_token(name), name) + return s.new_token(token.key_to_token(name), name, name.len) } // 'asdf $b' => "b" is the last name in the string, dont start parsing string // at the next ', skip it @@ -401,7 +403,7 @@ pub fn (s mut Scanner) scan() token.Token { // Otherwise the scanner would be stuck at s.pos = 0 s.pos++ } - return s.new_token(.name, name) + return s.new_token(.name, name, name.len) } // `123`, `.123` else if c.is_digit() || (c == `.` && nextc.is_digit()) { @@ -419,7 +421,7 @@ pub fn (s mut Scanner) scan() token.Token { s.pos += prefix_zero_num // jump these zeros } num := s.ident_number() - return s.new_token(.number, num) + return s.new_token(.number, num, num.len) } // Handle `'$fn()'` if c == `)` && s.is_inter_start { @@ -429,88 +431,90 @@ pub fn (s mut Scanner) scan() token.Token { if next_char == s.quote { s.is_inside_string = false } - return s.new_token(.rpar, '') + return s.new_token(.rpar, '', 1) } // all other tokens match c { `+` { if nextc == `+` { s.pos++ - return s.new_token(.inc, '') + return s.new_token(.inc, '', 2) } else if nextc == `=` { s.pos++ - return s.new_token(.plus_assign, '') + return s.new_token(.plus_assign, '', 2) } - return s.new_token(.plus, '') + return s.new_token(.plus, '', 1) } `-` { if nextc == `-` { s.pos++ - return s.new_token(.dec, '') + return s.new_token(.dec, '', 2) } else if nextc == `=` { s.pos++ - return s.new_token(.minus_assign, '') + return s.new_token(.minus_assign, '', 2) } - return s.new_token(.minus, '') + return s.new_token(.minus, '', 1) } `*` { if nextc == `=` { s.pos++ - return s.new_token(.mult_assign, '') + return s.new_token(.mult_assign, '', 2) } - return s.new_token(.mul, '') + return s.new_token(.mul, '', 1) } `^` { if nextc == `=` { s.pos++ - return s.new_token(.xor_assign, '') + return s.new_token(.xor_assign, '', 2) } - return s.new_token(.xor, '') + return s.new_token(.xor, '', 1) } `%` { if nextc == `=` { s.pos++ - return s.new_token(.mod_assign, '') + return s.new_token(.mod_assign, '', 2) } - return s.new_token(.mod, '') + return s.new_token(.mod, '', 1) } `?` { - return s.new_token(.question, '') + return s.new_token(.question, '', 1) } single_quote, double_quote { - return s.new_token(.string, s.ident_string()) + ident_string := s.ident_string() + return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes } `\`` { // ` // apostrophe balance comment. do not remove - return s.new_token(.chartoken, s.ident_char()) + ident_char := s.ident_char() + return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes } `(` { - return s.new_token(.lpar, '') + return s.new_token(.lpar, '', 1) } `)` { - return s.new_token(.rpar, '') + return s.new_token(.rpar, '', 1) } `[` { - return s.new_token(.lsbr, '') + return s.new_token(.lsbr, '', 1) } `]` { - return s.new_token(.rsbr, '') + return s.new_token(.rsbr, '', 1) } `{` { // Skip { in `${` in strings if s.is_inside_string { return s.scan() } - return s.new_token(.lcbr, '') + return s.new_token(.lcbr, '', 1) } `$` { if s.is_inside_string { - return s.new_token(.str_dollar, '') + return s.new_token(.str_dollar, '', 1) } else { - return s.new_token(.dollar, '') + return s.new_token(.dollar, '', 1) } } `}` { @@ -520,38 +524,39 @@ pub fn (s mut Scanner) scan() token.Token { s.pos++ if s.text[s.pos] == s.quote { s.is_inside_string = false - return s.new_token(.string, '') + return s.new_token(.string, '', 1) } - return s.new_token(.string, s.ident_string()) + ident_string := s.ident_string() + return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes } else { - return s.new_token(.rcbr, '') + return s.new_token(.rcbr, '', 1) } } `&` { if nextc == `=` { s.pos++ - return s.new_token(.and_assign, '') + return s.new_token(.and_assign, '', 2) } if nextc == `&` { s.pos++ - return s.new_token(.and, '') + return s.new_token(.and, '', 2) } - return s.new_token(.amp, '') + return s.new_token(.amp, '', 1) } `|` { if nextc == `|` { s.pos++ - return s.new_token(.logical_or, '') + return s.new_token(.logical_or, '', 2) } if nextc == `=` { s.pos++ - return s.new_token(.or_assign, '') + return s.new_token(.or_assign, '', 2) } - return s.new_token(.pipe, '') + return s.new_token(.pipe, '', 1) } `,` { - return s.new_token(.comma, '') + return s.new_token(.comma, '', 1) } `@` { s.pos++ @@ -566,28 +571,28 @@ pub fn (s mut Scanner) scan() token.Token { // println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN) // ... which is useful while debugging/tracing if name == 'FN' { - return s.new_token(.string, s.fn_name) + return s.new_token(.string, s.fn_name, 3) } if name == 'VEXE' { vexe := pref.vexe_path() - return s.new_token(.string, cescaped_path(vexe)) + return s.new_token(.string, cescaped_path(vexe), 5) } if name == 'FILE' { - return s.new_token(.string, cescaped_path(os.real_path(s.file_path))) + return s.new_token(.string, cescaped_path(os.real_path(s.file_path)), 5) } if name == 'LINE' { - return s.new_token(.string, (s.line_nr + 1).str()) + return s.new_token(.string, (s.line_nr + 1).str(), 5) } if name == 'COLUMN' { - return s.new_token(.string, (s.current_column()).str()) + return s.new_token(.string, s.current_column().str(), 7) } if name == 'VHASH' { - return s.new_token(.string, util.vhash()) + return s.new_token(.string, util.vhash(), 6) } if !token.is_key(name) { s.error('@ must be used before keywords (e.g. `@type string`)') } - return s.new_token(.name, name) + return s.new_token(.name, name, name.len) } /* case `\r`: @@ -608,11 +613,11 @@ pub fn (s mut Scanner) scan() token.Token { s.pos++ if s.text[s.pos + 1] == `.` { s.pos++ - return s.new_token(.ellipsis, '') + return s.new_token(.ellipsis, '', 3) } - return s.new_token(.dotdot, '') + return s.new_token(.dotdot, '', 2) } - return s.new_token(.dot, '') + return s.new_token(.dot, '', 1) } `#` { start := s.pos + 1 @@ -623,101 +628,101 @@ pub fn (s mut Scanner) scan() token.Token { // s.fgenln('// shebang line "$s.line_comment"') return s.scan() } - hash := s.text[start..s.pos] - return s.new_token(.hash, hash.trim_space()) + hash := s.text[start..s.pos].trim_space() + return s.new_token(.hash, hash, hash.len) } `>` { if nextc == `=` { s.pos++ - return s.new_token(.ge, '') + return s.new_token(.ge, '', 2) } else if nextc == `>` { if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { s.pos += 2 - return s.new_token(.right_shift_assign, '') + return s.new_token(.right_shift_assign, '', 3) } s.pos++ - return s.new_token(.right_shift, '') + return s.new_token(.right_shift, '', 2) } else { - return s.new_token(.gt, '') + return s.new_token(.gt, '', 1) } } 0xE2 { // case `≠`: if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 { s.pos += 2 - return s.new_token(.ne, '') + return s.new_token(.ne, '', 3) } // ⩽ else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD { s.pos += 2 - return s.new_token(.le, '') + return s.new_token(.le, '', 3) } // ⩾ else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE { s.pos += 2 - return s.new_token(.ge, '') + return s.new_token(.ge, '', 3) } } `<` { if nextc == `=` { s.pos++ - return s.new_token(.le, '') + return s.new_token(.le, '', 2) } else if nextc == `<` { if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { s.pos += 2 - return s.new_token(.left_shift_assign, '') + return s.new_token(.left_shift_assign, '', 3) } s.pos++ - return s.new_token(.left_shift, '') + return s.new_token(.left_shift, '', 2) } else { - return s.new_token(.lt, '') + return s.new_token(.lt, '', 1) } } `=` { if nextc == `=` { s.pos++ - return s.new_token(.eq, '') + return s.new_token(.eq, '', 2) } else if nextc == `>` { s.pos++ - return s.new_token(.arrow, '') + return s.new_token(.arrow, '', 2) } else { - return s.new_token(.assign, '') + return s.new_token(.assign, '', 1) } } `:` { if nextc == `=` { s.pos++ - return s.new_token(.decl_assign, '') + return s.new_token(.decl_assign, '', 2) } else { - return s.new_token(.colon, '') + return s.new_token(.colon, '', 1) } } `;` { - return s.new_token(.semicolon, '') + return s.new_token(.semicolon, '', 1) } `!` { if nextc == `=` { s.pos++ - return s.new_token(.ne, '') + return s.new_token(.ne, '', 2) } else { - return s.new_token(.not, '') + return s.new_token(.not, '', 1) } } `~` { - return s.new_token(.bit_not, '') + return s.new_token(.bit_not, '', 1) } `/` { if nextc == `=` { s.pos++ - return s.new_token(.div_assign, '') + return s.new_token(.div_assign, '', 2) } if nextc == `/` { start := s.pos + 1 @@ -739,7 +744,7 @@ pub fn (s mut Scanner) scan() token.Token { if is_separate_line_comment { comment = '|' + comment } - return s.new_token(.comment, comment) + return s.new_token(.comment, comment, comment.len + 2) } // s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"') // Skip the comment (return the next token) @@ -771,12 +776,12 @@ pub fn (s mut Scanner) scan() token.Token { s.pos++ if s.comments_mode == .parse_comments { comment := s.text[start..(s.pos - 1)].trim_space() - return s.new_token(.comment, comment) + return s.new_token(.comment, comment, comment.len + 4) } // Skip if not in fmt mode return s.scan() } - return s.new_token(.div, '') + return s.new_token(.div, '', 1) } else {} } @@ -1020,7 +1025,7 @@ pub fn (s &Scanner) error(msg string) { line_nr: s.line_nr pos: s.pos } - eprintln(util.formated_error('error', msg, s.file_path, pos)) + eprintln(util.formatted_error('error', msg, s.file_path, pos)) exit(1) } diff --git a/vlib/v/token/position.v b/vlib/v/token/position.v index 77897debdc..5747183666 100644 --- a/vlib/v/token/position.v +++ b/vlib/v/token/position.v @@ -7,6 +7,7 @@ pub struct Position { pub: line_nr int // the line number in the source where the token occured pos int // the position of the token in scanner text + len int // length of the literal in the source } [inline] @@ -14,5 +15,6 @@ pub fn (tok &Token) position() Position { return Position{ line_nr: tok.line_nr - 1 pos: tok.pos + len: tok.len } } diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v index a960c0c655..32777f0344 100644 --- a/vlib/v/token/token.v +++ b/vlib/v/token/token.v @@ -10,6 +10,7 @@ pub: line_nr int // the line number in the source where the token occured // name_idx int // name table index for O(1) lookup pos int // the position of the token in scanner text + len int // length of the literal } pub enum Kind { diff --git a/vlib/v/util/errors.v b/vlib/v/util/errors.v index 7d3abcc088..177d755dfe 100644 --- a/vlib/v/util/errors.v +++ b/vlib/v/util/errors.v @@ -41,7 +41,7 @@ pub fn new_error_manager() &EManager { return &EManager{ support_color: term.can_show_color_on_stderr() } } -pub fn formated_error(kind string /*error or warn*/, emsg string, filepath string, pos token.Position) string { +pub fn formatted_error(kind string /*error or warn*/, emsg string, filepath string, pos token.Position) string { mut path := filepath verror_paths_override := os.getenv('VERROR_PATHS') if verror_paths_override == 'absolute' { @@ -95,7 +95,10 @@ pub fn formated_error(kind string /*error or warn*/, emsg string, filepath strin } continue } - pointerline << if emanager.support_color { term.bold(term.blue('^')) } else { '^' } + for i in 0..pos.len { + underline := '~'.repeat(pos.len) + pointerline << if emanager.support_color { term.bold(term.blue(underline)) } else { underline } + } break } clines << ' ' + pointerline.join('')