scanner: even better error messages

pull/4322/head
Daniel Däschle 2020-04-10 00:09:34 +02:00 committed by GitHub
parent 320fad49c5
commit 32550c6d69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 148 additions and 86 deletions

View File

@ -43,6 +43,7 @@ pub:
pub struct IntegerLiteral {
pub:
val string
pos token.Position
}
pub struct FloatLiteral {

View File

@ -567,7 +567,11 @@ pub fn (c mut Checker) enum_decl(decl ast.EnumDecl) {
ast.IntegerLiteral {}
ast.PrefixExpr {}
else {
c.error("default value for enum has to be an integer", field.pos)
mut pos := expr_pos(expr)
if pos.pos == 0 {
pos = field.pos
}
c.error("default value for enum has to be an integer", pos)
}
}
}
@ -1036,6 +1040,42 @@ pub fn (c mut Checker) expr(node ast.Expr) table.Type {
return table.void_type
}
fn expr_pos(node ast.Expr) token.Position {
// all uncommented have to be implemented
match mut node {
ast.ArrayInit { return it.pos }
ast.AsCast { return it.pos }
ast.AssignExpr { return it.pos }
ast.Assoc { return it.pos }
// ast.BoolLiteral { }
// ast.CastExpr { }
ast.CallExpr { return it.pos }
// ast.CharLiteral { }
ast.EnumVal { return it.pos }
// ast.FloatLiteral { }
// ast.Ident { }
ast.IfExpr { return it.pos }
// ast.IfGuardExpr { }
ast.IndexExpr { return it.pos }
ast.InfixExpr { return it.pos }
ast.IntegerLiteral { return it.pos }
ast.MapInit { return it.pos }
ast.MatchExpr { return it.pos }
ast.PostfixExpr { return it.pos }
ast.PrefixExpr { return it.pos }
// ast.None { }
// ast.ParExpr { }
ast.SelectorExpr { return it.pos }
// ast.SizeOf { }
// ast.StringLiteral { }
// ast.StringInterLiteral { }
ast.StructInit { return it.pos }
// ast.Type { }
// ast.TypeOf { }
else { return token.Position{} }
}
}
pub fn (c mut Checker) ident(ident mut ast.Ident) table.Type {
if ident.name == c.var_decl_name { // c.checked_ident {
c.error('unresolved: `$ident.name`', ident.pos)
@ -1384,7 +1424,7 @@ fn (c mut Checker) warn_or_error(s string, pos token.Position, warn bool) {
}
typ := if warn { 'warning' } else { 'error' }
kind := if c.pref.is_verbose { 'checker $typ #$c.nr_errors:' } else { '$typ:' }
ferror := util.formated_error(kind, s, c.file.path, pos)
ferror := util.formatted_error(kind, s, c.file.path, pos)
c.errors << ferror
if !(pos.line_nr in c.error_lines) {
if warn {

View File

@ -516,13 +516,13 @@ pub fn (p &Parser) error(s string) {
print_backtrace()
kind = 'parser error:'
}
ferror := util.formated_error(kind, s, p.file_name, p.tok.position())
ferror := util.formatted_error(kind, s, p.file_name, p.tok.position())
eprintln(ferror)
exit(1)
}
pub fn (p &Parser) warn(s string) {
ferror := util.formated_error('warning:', s, p.file_name, p.tok.position())
ferror := util.formatted_error('warning:', s, p.file_name, p.tok.position())
eprintln(ferror)
}
@ -1278,6 +1278,7 @@ fn (p mut Parser) string_expr() ast.Expr {
}
fn (p mut Parser) array_init() ast.ArrayInit {
first_pos := p.tok.position()
p.check(.lsbr)
// p.warn('array_init() exp=$p.expected_type')
mut array_type := table.void_type
@ -1320,11 +1321,18 @@ fn (p mut Parser) array_init() ast.ArrayInit {
if p.tok.kind == .not {
p.next()
}
last_pos := p.tok.position()
len := last_pos.pos - first_pos.pos
pos := token.Position{
line_nr: first_pos.line_nr,
pos: first_pos.pos
len: len
}
return ast.ArrayInit{
elem_type: elem_type
typ: array_type
exprs: exprs
pos: p.tok.position()
pos: pos
}
}
@ -1352,6 +1360,7 @@ fn (p mut Parser) map_init() ast.MapInit {
fn (p mut Parser) parse_number_literal() ast.Expr {
lit := p.tok.lit
pos := p.tok.position()
mut node := ast.Expr{}
if lit.index_any('.eE') >= 0 {
node = ast.FloatLiteral{
@ -1360,6 +1369,7 @@ fn (p mut Parser) parse_number_literal() ast.Expr {
} else {
node = ast.IntegerLiteral{
val: lit
pos: pos
}
}
p.next()
@ -1851,8 +1861,8 @@ fn (p mut Parser) enum_decl() ast.EnumDecl {
// mut default_exprs := []ast.Expr
mut fields := []ast.EnumField
for p.tok.kind != .eof && p.tok.kind != .rcbr {
val := p.check_name()
pos := p.tok.position()
val := p.check_name()
vals << val
mut exprs := []ast.Expr
// p.warn('enum val $val')

View File

@ -77,12 +77,13 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner {
}
}
fn (s &Scanner) new_token(tok_kind token.Kind, lit string) token.Token {
fn (s &Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
return token.Token{
kind: tok_kind
lit: lit
line_nr: s.line_nr + 1
pos: s.pos
pos: s.pos - len + 1
len: len
}
}
@ -333,7 +334,7 @@ fn (s mut Scanner) skip_whitespace() {
fn (s mut Scanner) end_of_file() token.Token {
s.pos = s.text.len
s.inc_line_number()
return s.new_token(.eof, '')
return s.new_token(.eof, '', 1)
}
pub fn (s mut Scanner) scan() token.Token {
@ -358,10 +359,11 @@ pub fn (s mut Scanner) scan() token.Token {
if s.is_inter_end {
if s.text[s.pos] == s.quote {
s.is_inter_end = false
return s.new_token(.string, '')
return s.new_token(.string, '', 1)
}
s.is_inter_end = false
return s.new_token(.string, s.ident_string())
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
}
s.skip_whitespace()
// end of file
@ -379,7 +381,7 @@ pub fn (s mut Scanner) scan() token.Token {
// Check if not .eof to prevent panic
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
if token.is_key(name) {
return s.new_token(token.key_to_token(name), name)
return s.new_token(token.key_to_token(name), name, name.len)
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
@ -401,7 +403,7 @@ pub fn (s mut Scanner) scan() token.Token {
// Otherwise the scanner would be stuck at s.pos = 0
s.pos++
}
return s.new_token(.name, name)
return s.new_token(.name, name, name.len)
}
// `123`, `.123`
else if c.is_digit() || (c == `.` && nextc.is_digit()) {
@ -419,7 +421,7 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos += prefix_zero_num // jump these zeros
}
num := s.ident_number()
return s.new_token(.number, num)
return s.new_token(.number, num, num.len)
}
// Handle `'$fn()'`
if c == `)` && s.is_inter_start {
@ -429,88 +431,90 @@ pub fn (s mut Scanner) scan() token.Token {
if next_char == s.quote {
s.is_inside_string = false
}
return s.new_token(.rpar, '')
return s.new_token(.rpar, '', 1)
}
// all other tokens
match c {
`+` {
if nextc == `+` {
s.pos++
return s.new_token(.inc, '')
return s.new_token(.inc, '', 2)
}
else if nextc == `=` {
s.pos++
return s.new_token(.plus_assign, '')
return s.new_token(.plus_assign, '', 2)
}
return s.new_token(.plus, '')
return s.new_token(.plus, '', 1)
}
`-` {
if nextc == `-` {
s.pos++
return s.new_token(.dec, '')
return s.new_token(.dec, '', 2)
}
else if nextc == `=` {
s.pos++
return s.new_token(.minus_assign, '')
return s.new_token(.minus_assign, '', 2)
}
return s.new_token(.minus, '')
return s.new_token(.minus, '', 1)
}
`*` {
if nextc == `=` {
s.pos++
return s.new_token(.mult_assign, '')
return s.new_token(.mult_assign, '', 2)
}
return s.new_token(.mul, '')
return s.new_token(.mul, '', 1)
}
`^` {
if nextc == `=` {
s.pos++
return s.new_token(.xor_assign, '')
return s.new_token(.xor_assign, '', 2)
}
return s.new_token(.xor, '')
return s.new_token(.xor, '', 1)
}
`%` {
if nextc == `=` {
s.pos++
return s.new_token(.mod_assign, '')
return s.new_token(.mod_assign, '', 2)
}
return s.new_token(.mod, '')
return s.new_token(.mod, '', 1)
}
`?` {
return s.new_token(.question, '')
return s.new_token(.question, '', 1)
}
single_quote, double_quote {
return s.new_token(.string, s.ident_string())
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
}
`\`` {
// ` // apostrophe balance comment. do not remove
return s.new_token(.chartoken, s.ident_char())
ident_char := s.ident_char()
return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
}
`(` {
return s.new_token(.lpar, '')
return s.new_token(.lpar, '', 1)
}
`)` {
return s.new_token(.rpar, '')
return s.new_token(.rpar, '', 1)
}
`[` {
return s.new_token(.lsbr, '')
return s.new_token(.lsbr, '', 1)
}
`]` {
return s.new_token(.rsbr, '')
return s.new_token(.rsbr, '', 1)
}
`{` {
// Skip { in `${` in strings
if s.is_inside_string {
return s.scan()
}
return s.new_token(.lcbr, '')
return s.new_token(.lcbr, '', 1)
}
`$` {
if s.is_inside_string {
return s.new_token(.str_dollar, '')
return s.new_token(.str_dollar, '', 1)
}
else {
return s.new_token(.dollar, '')
return s.new_token(.dollar, '', 1)
}
}
`}` {
@ -520,38 +524,39 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos++
if s.text[s.pos] == s.quote {
s.is_inside_string = false
return s.new_token(.string, '')
return s.new_token(.string, '', 1)
}
return s.new_token(.string, s.ident_string())
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
}
else {
return s.new_token(.rcbr, '')
return s.new_token(.rcbr, '', 1)
}
}
`&` {
if nextc == `=` {
s.pos++
return s.new_token(.and_assign, '')
return s.new_token(.and_assign, '', 2)
}
if nextc == `&` {
s.pos++
return s.new_token(.and, '')
return s.new_token(.and, '', 2)
}
return s.new_token(.amp, '')
return s.new_token(.amp, '', 1)
}
`|` {
if nextc == `|` {
s.pos++
return s.new_token(.logical_or, '')
return s.new_token(.logical_or, '', 2)
}
if nextc == `=` {
s.pos++
return s.new_token(.or_assign, '')
return s.new_token(.or_assign, '', 2)
}
return s.new_token(.pipe, '')
return s.new_token(.pipe, '', 1)
}
`,` {
return s.new_token(.comma, '')
return s.new_token(.comma, '', 1)
}
`@` {
s.pos++
@ -566,28 +571,28 @@ pub fn (s mut Scanner) scan() token.Token {
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN)
// ... which is useful while debugging/tracing
if name == 'FN' {
return s.new_token(.string, s.fn_name)
return s.new_token(.string, s.fn_name, 3)
}
if name == 'VEXE' {
vexe := pref.vexe_path()
return s.new_token(.string, cescaped_path(vexe))
return s.new_token(.string, cescaped_path(vexe), 5)
}
if name == 'FILE' {
return s.new_token(.string, cescaped_path(os.real_path(s.file_path)))
return s.new_token(.string, cescaped_path(os.real_path(s.file_path)), 5)
}
if name == 'LINE' {
return s.new_token(.string, (s.line_nr + 1).str())
return s.new_token(.string, (s.line_nr + 1).str(), 5)
}
if name == 'COLUMN' {
return s.new_token(.string, (s.current_column()).str())
return s.new_token(.string, s.current_column().str(), 7)
}
if name == 'VHASH' {
return s.new_token(.string, util.vhash())
return s.new_token(.string, util.vhash(), 6)
}
if !token.is_key(name) {
s.error('@ must be used before keywords (e.g. `@type string`)')
}
return s.new_token(.name, name)
return s.new_token(.name, name, name.len)
}
/*
case `\r`:
@ -608,11 +613,11 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos++
if s.text[s.pos + 1] == `.` {
s.pos++
return s.new_token(.ellipsis, '')
return s.new_token(.ellipsis, '', 3)
}
return s.new_token(.dotdot, '')
return s.new_token(.dotdot, '', 2)
}
return s.new_token(.dot, '')
return s.new_token(.dot, '', 1)
}
`#` {
start := s.pos + 1
@ -623,101 +628,101 @@ pub fn (s mut Scanner) scan() token.Token {
// s.fgenln('// shebang line "$s.line_comment"')
return s.scan()
}
hash := s.text[start..s.pos]
return s.new_token(.hash, hash.trim_space())
hash := s.text[start..s.pos].trim_space()
return s.new_token(.hash, hash, hash.len)
}
`>` {
if nextc == `=` {
s.pos++
return s.new_token(.ge, '')
return s.new_token(.ge, '', 2)
}
else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.right_shift_assign, '')
return s.new_token(.right_shift_assign, '', 3)
}
s.pos++
return s.new_token(.right_shift, '')
return s.new_token(.right_shift, '', 2)
}
else {
return s.new_token(.gt, '')
return s.new_token(.gt, '', 1)
}
}
0xE2 {
// case `≠`:
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
s.pos += 2
return s.new_token(.ne, '')
return s.new_token(.ne, '', 3)
}
// ⩽
else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
s.pos += 2
return s.new_token(.le, '')
return s.new_token(.le, '', 3)
}
// ⩾
else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
return s.new_token(.ge, '')
return s.new_token(.ge, '', 3)
}
}
`<` {
if nextc == `=` {
s.pos++
return s.new_token(.le, '')
return s.new_token(.le, '', 2)
}
else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.left_shift_assign, '')
return s.new_token(.left_shift_assign, '', 3)
}
s.pos++
return s.new_token(.left_shift, '')
return s.new_token(.left_shift, '', 2)
}
else {
return s.new_token(.lt, '')
return s.new_token(.lt, '', 1)
}
}
`=` {
if nextc == `=` {
s.pos++
return s.new_token(.eq, '')
return s.new_token(.eq, '', 2)
}
else if nextc == `>` {
s.pos++
return s.new_token(.arrow, '')
return s.new_token(.arrow, '', 2)
}
else {
return s.new_token(.assign, '')
return s.new_token(.assign, '', 1)
}
}
`:` {
if nextc == `=` {
s.pos++
return s.new_token(.decl_assign, '')
return s.new_token(.decl_assign, '', 2)
}
else {
return s.new_token(.colon, '')
return s.new_token(.colon, '', 1)
}
}
`;` {
return s.new_token(.semicolon, '')
return s.new_token(.semicolon, '', 1)
}
`!` {
if nextc == `=` {
s.pos++
return s.new_token(.ne, '')
return s.new_token(.ne, '', 2)
}
else {
return s.new_token(.not, '')
return s.new_token(.not, '', 1)
}
}
`~` {
return s.new_token(.bit_not, '')
return s.new_token(.bit_not, '', 1)
}
`/` {
if nextc == `=` {
s.pos++
return s.new_token(.div_assign, '')
return s.new_token(.div_assign, '', 2)
}
if nextc == `/` {
start := s.pos + 1
@ -739,7 +744,7 @@ pub fn (s mut Scanner) scan() token.Token {
if is_separate_line_comment {
comment = '|' + comment
}
return s.new_token(.comment, comment)
return s.new_token(.comment, comment, comment.len + 2)
}
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token)
@ -771,12 +776,12 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos++
if s.comments_mode == .parse_comments {
comment := s.text[start..(s.pos - 1)].trim_space()
return s.new_token(.comment, comment)
return s.new_token(.comment, comment, comment.len + 4)
}
// Skip if not in fmt mode
return s.scan()
}
return s.new_token(.div, '')
return s.new_token(.div, '', 1)
}
else {}
}
@ -1020,7 +1025,7 @@ pub fn (s &Scanner) error(msg string) {
line_nr: s.line_nr
pos: s.pos
}
eprintln(util.formated_error('error', msg, s.file_path, pos))
eprintln(util.formatted_error('error', msg, s.file_path, pos))
exit(1)
}

View File

@ -7,6 +7,7 @@ pub struct Position {
pub:
line_nr int // the line number in the source where the token occured
pos int // the position of the token in scanner text
len int // length of the literal in the source
}
[inline]
@ -14,5 +15,6 @@ pub fn (tok &Token) position() Position {
return Position{
line_nr: tok.line_nr - 1
pos: tok.pos
len: tok.len
}
}

View File

@ -10,6 +10,7 @@ pub:
line_nr int // the line number in the source where the token occured
// name_idx int // name table index for O(1) lookup
pos int // the position of the token in scanner text
len int // length of the literal
}
pub enum Kind {

View File

@ -41,7 +41,7 @@ pub fn new_error_manager() &EManager {
return &EManager{ support_color: term.can_show_color_on_stderr() }
}
pub fn formated_error(kind string /*error or warn*/, emsg string, filepath string, pos token.Position) string {
pub fn formatted_error(kind string /*error or warn*/, emsg string, filepath string, pos token.Position) string {
mut path := filepath
verror_paths_override := os.getenv('VERROR_PATHS')
if verror_paths_override == 'absolute' {
@ -95,7 +95,10 @@ pub fn formated_error(kind string /*error or warn*/, emsg string, filepath strin
}
continue
}
pointerline << if emanager.support_color { term.bold(term.blue('^')) } else { '^' }
for i in 0..pos.len {
underline := '~'.repeat(pos.len)
pointerline << if emanager.support_color { term.bold(term.blue(underline)) } else { underline }
}
break
}
clines << ' ' + pointerline.join('')