parser: add some infrastructure for more specific errors

* compiler: change s.line_nr in just one place, so that s.last_nl_pos will be updated in tandem too.

* Cleanup spurious spaces.

* Store ScannerPos info inside the cached tokens.

Use the stored information when errors are encountered.

* Fix #2079 ( cannot use type...in assignment ).

* do not store scannerpos per each token, instead rescan the source once on error to get the position.

* compiler: implement highlighting for errors. Use only line/col info stored in the cached tokens.

* fixing building on windows

* Split can_show_color to _nix and _win files.
pull/2159/head
Delyan Angelov 2019-09-29 04:33:23 +03:00 committed by Alexander Medvednikov
parent cbf5de7c8f
commit 8b8cd13929
6 changed files with 181 additions and 116 deletions

View File

@ -33,7 +33,7 @@ mut:
is_decl bool // type myfn fn(int, int)
defer_text []string
//gen_types []string
fn_name_sp ScannerPos
fn_name_token Tok
}
fn (p &Parser) find_var(name string) ?Var {
@ -110,9 +110,8 @@ fn (p mut Parser) known_var(name string) bool {
fn (p mut Parser) register_var(v Var) {
mut new_var := {v | idx: p.var_idx, scope_level: p.cur_fn.scope_level}
if v.line_nr == 0 {
spos := p.scanner.get_scanner_pos()
new_var.scanner_pos = spos
new_var.line_nr = spos.line_nr
new_var.token = p.cur_tok()
new_var.line_nr = new_var.token.line_nr
}
// Expand the array
if p.var_idx >= p.local_vars.len {
@ -214,7 +213,7 @@ fn (p mut Parser) fn_decl() {
ref: is_amp
ptr: is_mut
line_nr: p.scanner.line_nr
scanner_pos: p.scanner.get_scanner_pos()
token: p.cur_tok()
}
f.args << receiver
p.register_var(receiver)
@ -227,7 +226,7 @@ fn (p mut Parser) fn_decl() {
else {
f.name = p.check_name()
}
f.fn_name_sp = p.scanner.get_scanner_pos()
f.fn_name_token = p.cur_tok()
// C function header def? (fn C.NSMakeRect(int,int,int,int))
is_c := f.name == 'C' && p.tok == .dot
// Just fn signature? only builtin.v + default build mode
@ -335,7 +334,7 @@ fn (p mut Parser) fn_decl() {
// Special case for main() args
if f.name == 'main__main' && !has_receiver {
if str_args != '' || typ != 'void' {
p.error_with_position('fn main must have no arguments and no return values', f.fn_name_sp)
p.error_with_tok('fn main must have no arguments and no return values', f.fn_name_token)
}
}
dll_export_linkage := if p.os == .msvc && p.attr == 'live' && p.pref.is_so {
@ -440,7 +439,7 @@ fn (p mut Parser) fn_decl() {
if f.name == 'main__main' || f.name == 'main' || f.name == 'WinMain' {
if p.pref.is_test && !p.scanner.file_path.contains('/volt') {
p.error_with_position('tests cannot have function `main`', f.fn_name_sp)
p.error_with_tok('tests cannot have function `main`', f.fn_name_token)
}
}
// println('is_c=$is_c name=$f.name')
@ -473,7 +472,7 @@ fn (p mut Parser) fn_decl() {
}
}
if typ != 'void' && !p.returns {
p.error_with_position('$f.name must return "$typ"', f.fn_name_sp)
p.error_with_tok('$f.name must return "$typ"', f.fn_name_token)
}
if p.attr == 'live' && p.pref.is_so {
//p.genln('// live_function body end')
@ -536,10 +535,10 @@ fn (p mut Parser) check_unused_variables() {
break
}
if !var.is_used && !p.pref.is_repl && !var.is_arg && !p.pref.translated {
p.production_error('`$var.name` declared and not used', var.scanner_pos )
p.production_error_with_token('`$var.name` declared and not used', var.token )
}
if !var.is_changed && var.is_mut && !p.pref.is_repl && !p.pref.translated {
p.error_with_position( '`$var.name` is declared as mutable, but it was never changed', var.scanner_pos )
p.error_with_tok( '`$var.name` is declared as mutable, but it was never changed', var.token )
}
}
}
@ -699,6 +698,7 @@ fn (p mut Parser) fn_args(f mut Fn) {
if f.is_interface {
int_arg := Var {
typ: f.receiver_typ
token: p.cur_tok()
}
f.args << int_arg
}
@ -714,7 +714,7 @@ fn (p mut Parser) fn_args(f mut Fn) {
is_arg: true
// is_mut: is_mut
line_nr: p.scanner.line_nr
scanner_pos: p.scanner.get_scanner_pos()
token: p.cur_tok()
}
// f.register_var(v)
f.args << v
@ -757,7 +757,7 @@ fn (p mut Parser) fn_args(f mut Fn) {
is_mut: is_mut
ptr: is_mut
line_nr: p.scanner.line_nr
scanner_pos: p.scanner.get_scanner_pos()
token: p.cur_tok()
}
p.register_var(v)
f.args << v

View File

@ -12,11 +12,11 @@ import (
// TODO rename to Token
// TODO rename enum Token to TokenType
struct Tok {
tok Token
lit string
line_nr int
tok Token // the token number/enum; for quick comparisons
lit string // literal representation of the token
line_nr int // the line number in the source where the token occured
name_idx int // name table index for O(1) lookup
col int
col int // the column where the token ends
}
struct Parser {
@ -154,6 +154,7 @@ fn (v mut V) new_parser(scanner &Scanner, id string) Parser {
}
if p.pref.is_repl {
p.scanner.should_print_line_on_error = false
p.scanner.should_print_errors_in_color = false
}
v.cgen.line_directives = v.pref.is_debuggable
// v.cgen.file = path
@ -163,7 +164,7 @@ fn (v mut V) new_parser(scanner &Scanner, id string) Parser {
fn (p mut Parser) scan_tokens() {
for {
res := p.scanner.scan()
p.tokens << Tok {
p.tokens << Tok{
tok: res.tok
lit: res.lit
line_nr: p.scanner.line_nr
@ -216,7 +217,7 @@ fn (p &Parser) cur_tok() Tok {
fn (p &Parser) peek_token() Tok {
if p.token_idx >= p.tokens.len - 2 {
return Tok{tok:Token.eof}
return Tok{ tok:Token.eof }
}
tok := p.tokens[p.token_idx]
return tok
@ -886,30 +887,13 @@ if p.scanner.line_comment != '' {
}
}
/////////////////////////////////////////////////////////////////
fn (p &Parser) warn(s string) {
println('warning: $p.scanner.file_path:${p.scanner.line_nr+1}: $s')
e := normalized_error( s )
println('warning: $p.scanner.file_path:${p.scanner.line_nr+1}: $e')
}
fn (p mut Parser) error_with_position(e string, sp ScannerPos) {
p.scanner.goto_scanner_position( sp )
p.error( e )
}
fn (p mut Parser) production_error(e string, sp ScannerPos) {
if p.pref.is_prod {
p.scanner.goto_scanner_position( sp )
p.error( e )
}else {
// on a warning, restore the scanner state after printing the warning:
cpos := p.scanner.get_scanner_pos()
p.scanner.goto_scanner_position( sp )
p.warn(e)
p.scanner.goto_scanner_position( cpos )
}
}
fn (p mut Parser) error(s string) {
fn (p mut Parser) print_error_context(){
// Dump all vars and types for debugging
if p.pref.is_debug {
// os.write_to_file('/var/tmp/lang.types', '')//pes(p.table.types))
@ -935,14 +919,53 @@ fn (p mut Parser) error(s string) {
print_backtrace()
}
// p.scanner.debug_tokens()
// Print `[]int` instead of `array_int` in errors
e := s.replace('array_', '[]')
.replace('__', '.')
.replace('Option_', '?')
.replace('main.', '')
p.scanner.error_with_col(e, p.tokens[p.token_idx-1].col)
}
fn normalized_error( s string ) string {
// Print `[]int` instead of `array_int` in errors
return s.replace('array_', '[]')
.replace('__', '.')
.replace('Option_', '?')
.replace('main.', '')
}
fn (p mut Parser) error_with_position(s string, sp ScannerPos) {
p.print_error_context()
e := normalized_error( s )
p.scanner.goto_scanner_position( sp )
p.scanner.error_with_col(e, sp.pos - sp.last_nl_pos)
}
fn (p mut Parser) warn_with_position(e string, sp ScannerPos) {
// on a warning, restore the scanner state after printing the warning:
cpos := p.scanner.get_scanner_pos()
p.scanner.goto_scanner_position( sp )
p.warn(e)
p.scanner.goto_scanner_position( cpos )
}
fn (p mut Parser) production_error_with_token(e string, tok Tok) {
if p.pref.is_prod {
p.error_with_tok( e, tok )
}else {
p.warn_with_token( e, tok )
}
}
fn (p &Parser) warn_with_token(s string, tok Tok) {
e := normalized_error( s )
println('warning: $p.scanner.file_path:${tok.line_nr+1}:${tok.col}: $e')
}
fn (p mut Parser) error_with_tok(s string, tok Tok) {
p.error_with_position(s, p.scanner.get_scanner_pos_of_token(tok) )
}
fn (p mut Parser) error(s string) {
// no positioning info, so just assume that the last token was the culprit:
p.error_with_tok(s, p.tokens[p.token_idx-1] )
}
/////////////////////////////////////////////////////////////////
fn (p &Parser) first_pass() bool {
return p.pass == .decl
}
@ -1382,6 +1405,7 @@ fn (p mut Parser) statement(add_semi bool) string {
// is_map: are we in map assignment? (m[key] = val) if yes, dont generate '='
// this can be `user = ...` or `user.field = ...`, in both cases `v` is `user`
fn (p mut Parser) assign_statement(v Var, ph int, is_map bool) {
errtok := p.cur_tok()
//p.log('assign_statement() name=$v.name tok=')
is_vid := p.fileis('vid') // TODO remove
tok := p.tok
@ -1436,8 +1460,7 @@ fn ($v.name mut $v.typ) $p.cur_fn.name (...) {
p.cgen.resetln(left + 'opt_ok($expr, sizeof($typ))')
}
else if !p.builtin_mod && !p.check_types_no_throw(expr_type, p.assigned_type) {
p.scanner.line_nr--
p.error('cannot use type `$expr_type` as type `$p.assigned_type` in assignment')
p.error_with_tok( 'cannot use type `$expr_type` as type `$p.assigned_type` in assignment', errtok)
}
if (is_str || is_ustr) && tok == .plus_assign && !p.is_js {
p.gen(')')
@ -1488,7 +1511,7 @@ fn (p mut Parser) var_decl() {
}
typ := types[i]
// println('var decl tok=${p.strtok()} ismut=$is_mut')
var_scanner_pos := p.scanner.get_scanner_pos()
var_token := p.cur_tok()
// name := p.check_name()
// p.var_decl_name = name
// Don't allow declaring a variable with the same name. Even in a child scope
@ -1515,8 +1538,8 @@ fn (p mut Parser) var_decl() {
typ: typ
is_mut: is_mut
is_alloc: p.is_alloc || typ.starts_with('array_')
scanner_pos: var_scanner_pos
line_nr: var_scanner_pos.line_nr
line_nr: var_token.line_nr
token: var_token
})
//if p.fileis('str.v') {
//if p.is_alloc { println('REG VAR IS ALLOC $name') }
@ -3783,7 +3806,7 @@ fn (p &Parser) prepend_mod(name string) string {
fn (p mut Parser) go_statement() {
p.check(.key_go)
mut gopos := p.scanner.get_scanner_pos()
mut gotoken := p.cur_tok()
// TODO copypasta of name_expr() ?
if p.peek() == .dot {
// Method
@ -3792,12 +3815,12 @@ fn (p mut Parser) go_statement() {
return
}
p.mark_var_used(v)
gopos = p.scanner.get_scanner_pos()
gotoken = p.cur_tok()
p.next()
p.check(.dot)
typ := p.table.find_type(v.typ)
method := p.table.find_method(typ, p.lit) or {
p.error_with_position('go method missing $var_name', gopos)
p.error_with_tok('go method missing $var_name', gotoken)
return
}
p.async_fn_call(method, 0, var_name, v.typ)
@ -3807,11 +3830,11 @@ fn (p mut Parser) go_statement() {
// Normal function
f := p.table.find_fn(p.prepend_mod(f_name)) or {
println( p.table.debug_fns() )
p.error_with_position('can not find function $f_name', gopos)
p.error_with_tok('can not find function $f_name', gotoken)
return
}
if f.name == 'println' || f.name == 'print' {
p.error_with_position('`go` cannot be used with `println`', gopos)
p.error_with_tok('`go` cannot be used with `println`', gotoken)
}
p.async_fn_call(f, 0, '', '')
}

View File

@ -7,11 +7,14 @@ module main
import (
os
strings
term
)
const (
single_quote = `\'`
double_quote = `"`
error_context_before = 2 // how many lines of source context to print before the pointer line
error_context_after = 2 // ^^^ same, but after
)
struct Scanner {
@ -34,7 +37,9 @@ mut:
prev_tok Token
fn_name string // needed for @FN
should_print_line_on_error bool
should_print_errors_in_color bool
quote byte // which quote is used to denote current string: ' or "
file_lines []string // filled *only on error* by rescanning the source till the error (and several lines more)
}
// new scanner from file.
@ -71,6 +76,7 @@ fn new_scanner(text string) &Scanner {
text: text
fmt_out: strings.new_builder(1000)
should_print_line_on_error: true
should_print_errors_in_color: true
}
}
@ -98,6 +104,48 @@ fn (s mut Scanner) goto_scanner_position(scp ScannerPos) {
s.last_nl_pos = scp.last_nl_pos
}
// get_scanner_pos_of_token rescans *the whole source* till it reaches {t.line_nr, t.col} .
fn (s mut Scanner) get_scanner_pos_of_token(t Tok) ScannerPos {
// This rescanning is done just once on error, so it is fine for now.
// Be careful for the performance implications, if you want to
// do it more frequently. The alternative would be to store
// the scanpos (12 bytes) for each token, and there are potentially many tokens.
tline := t.line_nr
tcol := if t.line_nr == 0 { t.col + 1 } else { t.col - 1 }
// save the current scanner position, it will be restored later
cpos := s.get_scanner_pos()
mut sptoken := ScannerPos{}
// Starting from the start, scan the source lines
// till the desired tline is reached, then
// s.pos + tcol would be the proper position
// of the token. Continue scanning for some more lines of context too.
s.goto_scanner_position(ScannerPos{})
s.file_lines = []string
mut prevlinepos := 0
for {
prevlinepos = s.pos
if s.pos >= s.text.len { break }
if s.line_nr > tline + 10 { break }
////////////////////////////////////////
if tline == s.line_nr {
sptoken = s.get_scanner_pos()
sptoken.pos += tcol
}
s.ignore_line() s.eat_single_newline()
sline := s.text.substr( prevlinepos, s.pos ).trim_right('\r\n')
s.file_lines << sline
}
//////////////////////////////////////////////////
s.goto_scanner_position(cpos)
return sptoken
}
fn (s mut Scanner) eat_single_newline(){
if s.pos >= s.text.len { return }
if s.expect('\r\n', s.pos) { s.pos += 2 return }
if s.text[ s.pos ] == `\n` { s.pos ++ return }
if s.text[ s.pos ] == `\r` { s.pos ++ return }
}
// TODO remove once multiple return values are implemented
struct ScanRes {
@ -600,83 +648,56 @@ fn (s mut Scanner) scan() ScanRes {
return scan_res(.eof, '')
}
fn (s &Scanner) find_current_line_start_position() int {
if s.pos >= s.text.len { return s.pos }
mut linestart := s.pos
for {
if linestart <= 0 {
linestart = 1
break
}
if s.text[linestart] == 10 || s.text[linestart] == 13 {
linestart++
break
}
linestart--
}
return linestart
}
fn (s &Scanner) find_current_line_end_position() int {
if s.pos >= s.text.len { return s.pos }
mut lineend := s.pos
for {
if lineend >= s.text.len {
lineend = s.text.len
break
}
if s.text[lineend] == 10 || s.text[lineend] == 13 {
break
}
lineend++
}
return lineend
}
fn (s &Scanner) current_column() int {
return s.pos - s.find_current_line_start_position()
return s.pos - s.last_nl_pos
}
fn imax(a,b int) int { return if a > b { a } else { b } }
fn imin(a,b int) int { return if a < b { a } else { b } }
fn (s &Scanner) error(msg string) {
s.error_with_col(msg, 0)
}
fn (s &Scanner) error_with_col(msg string, col int) {
column := col-1
linestart := s.find_current_line_start_position()
lineend := s.find_current_line_end_position()
fullpath := os.realpath( s.file_path )
color_on := s.should_print_errors_in_color && term.can_show_color()
final_message := if color_on { term.red( term.bold( msg ) ) } else { msg }
// The filepath:line:col: format is the default C compiler
// error output format. It allows editors and IDE's like
// emacs to quickly find the errors in the output
// and jump to their source with a keyboard shortcut.
// Using only the filename leads to inability of IDE/editors
// to find the source file, when it is in another folder.
//println('${s.file_path}:${s.line_nr + 1}:${column+1}: $msg')
println('${fullpath}:${s.line_nr + 1}:${column+1}: $msg')
eprintln('${fullpath}:${s.line_nr + 1}:${col}: $final_message')
if s.should_print_line_on_error && lineend > linestart {
line := s.text.substr( linestart, lineend )
// The pointerline should have the same spaces/tabs as the offending
// line, so that it prints the ^ character exactly on the *same spot*
// where it is needed. That is the reason we can not just
// use strings.repeat(` `, column) to form it.
pointerline := line.clone()
mut pl := pointerline.str
for i,c in line {
pl[i] = ` `
if i == column { pl[i] = `^` }
else if c.is_space() { pl[i] = c }
if s.should_print_line_on_error && s.file_lines.len > 0 {
context_start_line := imax(0, (s.line_nr - error_context_before + 1 ))
context_end_line := imin(s.file_lines.len, (s.line_nr + error_context_after + 1 ))
for cline := context_start_line; cline < context_end_line; cline++ {
line := '${(cline+1):5d}| ' + s.file_lines[ cline ]
coloredline := if cline == s.line_nr && color_on { term.red(line) } else { line }
println( coloredline )
if cline != s.line_nr { continue }
// The pointerline should have the same spaces/tabs as the offending
// line, so that it prints the ^ character exactly on the *same spot*
// where it is needed. That is the reason we can not just
// use strings.repeat(` `, col) to form it.
mut pointerline := []string
for i , c in line {
if i < col {
x := if c.is_space() { c } else { ` ` }
pointerline << x.str()
continue
}
pointerline << if color_on { term.bold( term.blue('^') ) } else { '^' }
break
}
println( ' ' + pointerline.join('') )
}
println(line)
println(pointerline)
}
exit(1)
}
fn (s Scanner) count_symbol_before(p int, sym byte) int {
mut count := 0
for i:=p; i>=0; i-- {

View File

@ -96,8 +96,8 @@ mut:
scope_level int
is_c bool // todo remove once `typ` is `Type`, not string
is_moved bool
scanner_pos ScannerPos // TODO: use only scanner_pos, remove line_nr
line_nr int
token Tok // TODO: use only var.token.line_nr, remove var.line_nr
}
struct Type {

View File

@ -0,0 +1,11 @@
module term
import os
fn C.isatty(int) int
pub fn can_show_color() bool {
if os.getenv('TERM') == 'dumb' { return false }
if C.isatty(1) != 0 { return true }
return false
}

View File

@ -0,0 +1,10 @@
module term
import os
// TODO: implement proper checking on windows too.
// For now, just return false by default
pub fn can_show_color() bool {
if os.getenv('TERM') == 'dumb' { return false }
return false
}