From 32550c6d6956d53de6f06c1269e42a83c0b50ef7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20D=C3=A4schle?= <daniel.daeschle@gmail.com>
Date: Fri, 10 Apr 2020 00:09:34 +0200
Subject: [PATCH] scanner: even better error messages

---
 vlib/v/ast/ast.v         |   1 +
 vlib/v/checker/checker.v |  44 ++++++++++-
 vlib/v/parser/parser.v   |  18 ++++-
 vlib/v/scanner/scanner.v | 161 ++++++++++++++++++++-------------------
 vlib/v/token/position.v  |   2 +
 vlib/v/token/token.v     |   1 +
 vlib/v/util/errors.v     |   7 +-
 7 files changed, 148 insertions(+), 86 deletions(-)

diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v
index 1645c65e12..5737e70490 100644
--- a/vlib/v/ast/ast.v
+++ b/vlib/v/ast/ast.v
@@ -43,6 +43,7 @@ pub:
 pub struct IntegerLiteral {
 pub:
 	val string
+	pos token.Position
 }
 
 pub struct FloatLiteral {
diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v
index 26a32708b2..da48e1e244 100644
--- a/vlib/v/checker/checker.v
+++ b/vlib/v/checker/checker.v
@@ -567,7 +567,11 @@ pub fn (c mut Checker) enum_decl(decl ast.EnumDecl) {
 				ast.IntegerLiteral {}
 				ast.PrefixExpr {}
 				else {
-					c.error("default value for enum has to be an integer", field.pos)
+					mut pos := expr_pos(expr)
+					if pos.pos == 0 {
+						pos = field.pos
+					}
+					c.error("default value for enum has to be an integer", pos)
 				}
 			}
 		}
@@ -1036,6 +1040,42 @@ pub fn (c mut Checker) expr(node ast.Expr) table.Type {
 	return table.void_type
 }
 
+fn expr_pos(node ast.Expr) token.Position {
+	// all uncommented have to be implemented
+	match mut node {
+		ast.ArrayInit { return it.pos }
+		ast.AsCast { return it.pos }
+		ast.AssignExpr { return it.pos }
+		ast.Assoc { return it.pos }
+		// ast.BoolLiteral { }
+		// ast.CastExpr { }
+		ast.CallExpr { return it.pos }
+		// ast.CharLiteral { }
+		ast.EnumVal { return it.pos }
+		// ast.FloatLiteral { }
+		// ast.Ident { }
+		ast.IfExpr { return it.pos }
+		// ast.IfGuardExpr { }
+		ast.IndexExpr { return it.pos }
+		ast.InfixExpr { return it.pos }
+		ast.IntegerLiteral { return it.pos }
+		ast.MapInit { return it.pos }
+		ast.MatchExpr { return it.pos }
+		ast.PostfixExpr { return it.pos }
+		ast.PrefixExpr { return it.pos }
+		// ast.None { }
+		// ast.ParExpr { }
+		ast.SelectorExpr { return it.pos }
+		// ast.SizeOf { }
+		// ast.StringLiteral { }
+		// ast.StringInterLiteral { }
+		ast.StructInit { return it.pos }
+		// ast.Type { }
+		// ast.TypeOf { }
+		else { return token.Position{} }
+	}
+}
+
 pub fn (c mut Checker) ident(ident mut ast.Ident) table.Type {
 	if ident.name == c.var_decl_name {		// c.checked_ident {
 		c.error('unresolved: `$ident.name`', ident.pos)
@@ -1384,7 +1424,7 @@ fn (c mut Checker) warn_or_error(s string, pos token.Position, warn bool) {
 	}
 	typ := if warn { 'warning' } else { 'error' }
 	kind := if c.pref.is_verbose { 'checker $typ #$c.nr_errors:' } else { '$typ:' }
-	ferror := util.formated_error(kind, s, c.file.path, pos)
+	ferror := util.formatted_error(kind, s, c.file.path, pos)
 	c.errors << ferror
 	if !(pos.line_nr in c.error_lines) {
 		if warn {
diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v
index 88e96f5266..819085e2ec 100644
--- a/vlib/v/parser/parser.v
+++ b/vlib/v/parser/parser.v
@@ -516,13 +516,13 @@ pub fn (p &Parser) error(s string) {
 		print_backtrace()
 		kind = 'parser error:'
 	}
-	ferror := util.formated_error(kind, s, p.file_name, p.tok.position())
+	ferror := util.formatted_error(kind, s, p.file_name, p.tok.position())
 	eprintln(ferror)
 	exit(1)
 }
 
 pub fn (p &Parser) warn(s string) {
-	ferror := util.formated_error('warning:', s, p.file_name, p.tok.position())
+	ferror := util.formatted_error('warning:', s, p.file_name, p.tok.position())
 	eprintln(ferror)
 }
 
@@ -1278,6 +1278,7 @@ fn (p mut Parser) string_expr() ast.Expr {
 }
 
 fn (p mut Parser) array_init() ast.ArrayInit {
+	first_pos := p.tok.position()
 	p.check(.lsbr)
 	// p.warn('array_init() exp=$p.expected_type')
 	mut array_type := table.void_type
@@ -1320,11 +1321,18 @@ fn (p mut Parser) array_init() ast.ArrayInit {
 	if p.tok.kind == .not {
 		p.next()
 	}
+	last_pos := p.tok.position()
+	len := last_pos.pos - first_pos.pos
+	pos := token.Position{
+		line_nr: first_pos.line_nr,
+		pos: first_pos.pos
+		len: len
+	}
 	return ast.ArrayInit{
 		elem_type: elem_type
 		typ: array_type
 		exprs: exprs
-		pos: p.tok.position()
+		pos: pos
 	}
 }
 
@@ -1352,6 +1360,7 @@ fn (p mut Parser) map_init() ast.MapInit {
 
 fn (p mut Parser) parse_number_literal() ast.Expr {
 	lit := p.tok.lit
+	pos := p.tok.position()
 	mut node := ast.Expr{}
 	if lit.index_any('.eE') >= 0 {
 		node = ast.FloatLiteral{
@@ -1360,6 +1369,7 @@ fn (p mut Parser) parse_number_literal() ast.Expr {
 	} else {
 		node = ast.IntegerLiteral{
 			val: lit
+			pos: pos
 		}
 	}
 	p.next()
@@ -1851,8 +1861,8 @@ fn (p mut Parser) enum_decl() ast.EnumDecl {
 	// mut default_exprs := []ast.Expr
 	mut fields := []ast.EnumField
 	for p.tok.kind != .eof && p.tok.kind != .rcbr {
-		val := p.check_name()
 		pos := p.tok.position()
+		val := p.check_name()
 		vals << val
 		mut exprs := []ast.Expr
 		// p.warn('enum val $val')
diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v
index 12a1b57e62..a8985c223d 100644
--- a/vlib/v/scanner/scanner.v
+++ b/vlib/v/scanner/scanner.v
@@ -77,12 +77,13 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner {
 	}
 }
 
-fn (s &Scanner) new_token(tok_kind token.Kind, lit string) token.Token {
+fn (s &Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
 	return token.Token{
 		kind: tok_kind
 		lit: lit
 		line_nr: s.line_nr + 1
-		pos: s.pos
+		pos: s.pos - len + 1
+		len: len
 	}
 }
 
@@ -333,7 +334,7 @@ fn (s mut Scanner) skip_whitespace() {
 fn (s mut Scanner) end_of_file() token.Token {
 	s.pos = s.text.len
 	s.inc_line_number()
-	return s.new_token(.eof, '')
+	return s.new_token(.eof, '', 1)
 }
 
 pub fn (s mut Scanner) scan() token.Token {
@@ -358,10 +359,11 @@ pub fn (s mut Scanner) scan() token.Token {
 	if s.is_inter_end {
 		if s.text[s.pos] == s.quote {
 			s.is_inter_end = false
-			return s.new_token(.string, '')
+			return s.new_token(.string, '', 1)
 		}
 		s.is_inter_end = false
-		return s.new_token(.string, s.ident_string())
+		ident_string := s.ident_string()
+		return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 	}
 	s.skip_whitespace()
 	// end of file
@@ -379,7 +381,7 @@ pub fn (s mut Scanner) scan() token.Token {
 		// Check if not .eof to prevent panic
 		next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
 		if token.is_key(name) {
-			return s.new_token(token.key_to_token(name), name)
+			return s.new_token(token.key_to_token(name), name, name.len)
 		}
 		// 'asdf $b' => "b" is the last name in the string, dont start parsing string
 		// at the next ', skip it
@@ -401,7 +403,7 @@ pub fn (s mut Scanner) scan() token.Token {
 			// Otherwise the scanner would be stuck at s.pos = 0
 			s.pos++
 		}
-		return s.new_token(.name, name)
+		return s.new_token(.name, name, name.len)
 	}
 	// `123`, `.123`
 	else if c.is_digit() || (c == `.` && nextc.is_digit()) {
@@ -419,7 +421,7 @@ pub fn (s mut Scanner) scan() token.Token {
 			s.pos += prefix_zero_num // jump these zeros
 		}
 		num := s.ident_number()
-		return s.new_token(.number, num)
+		return s.new_token(.number, num, num.len)
 	}
 	// Handle `'$fn()'`
 	if c == `)` && s.is_inter_start {
@@ -429,88 +431,90 @@ pub fn (s mut Scanner) scan() token.Token {
 		if next_char == s.quote {
 			s.is_inside_string = false
 		}
-		return s.new_token(.rpar, '')
+		return s.new_token(.rpar, '', 1)
 	}
 	// all other tokens
 	match c {
 		`+` {
 			if nextc == `+` {
 				s.pos++
-				return s.new_token(.inc, '')
+				return s.new_token(.inc, '', 2)
 			}
 			else if nextc == `=` {
 				s.pos++
-				return s.new_token(.plus_assign, '')
+				return s.new_token(.plus_assign, '', 2)
 			}
-			return s.new_token(.plus, '')
+			return s.new_token(.plus, '', 1)
 		}
 		`-` {
 			if nextc == `-` {
 				s.pos++
-				return s.new_token(.dec, '')
+				return s.new_token(.dec, '', 2)
 			}
 			else if nextc == `=` {
 				s.pos++
-				return s.new_token(.minus_assign, '')
+				return s.new_token(.minus_assign, '', 2)
 			}
-			return s.new_token(.minus, '')
+			return s.new_token(.minus, '', 1)
 		}
 		`*` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.mult_assign, '')
+				return s.new_token(.mult_assign, '', 2)
 			}
-			return s.new_token(.mul, '')
+			return s.new_token(.mul, '', 1)
 		}
 		`^` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.xor_assign, '')
+				return s.new_token(.xor_assign, '', 2)
 			}
-			return s.new_token(.xor, '')
+			return s.new_token(.xor, '', 1)
 		}
 		`%` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.mod_assign, '')
+				return s.new_token(.mod_assign, '', 2)
 			}
-			return s.new_token(.mod, '')
+			return s.new_token(.mod, '', 1)
 		}
 		`?` {
-			return s.new_token(.question, '')
+			return s.new_token(.question, '', 1)
 		}
 		single_quote, double_quote {
-			return s.new_token(.string, s.ident_string())
+			ident_string := s.ident_string()
+			return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 		}
 		`\`` {
 			// ` // apostrophe balance comment. do not remove
-			return s.new_token(.chartoken, s.ident_char())
+			ident_char := s.ident_char()
+			return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
 		}
 		`(` {
-			return s.new_token(.lpar, '')
+			return s.new_token(.lpar, '', 1)
 		}
 		`)` {
-			return s.new_token(.rpar, '')
+			return s.new_token(.rpar, '', 1)
 		}
 		`[` {
-			return s.new_token(.lsbr, '')
+			return s.new_token(.lsbr, '', 1)
 		}
 		`]` {
-			return s.new_token(.rsbr, '')
+			return s.new_token(.rsbr, '', 1)
 		}
 		`{` {
 			// Skip { in `${` in strings
 			if s.is_inside_string {
 				return s.scan()
 			}
-			return s.new_token(.lcbr, '')
+			return s.new_token(.lcbr, '', 1)
 		}
 		`$` {
 			if s.is_inside_string {
-				return s.new_token(.str_dollar, '')
+				return s.new_token(.str_dollar, '', 1)
 			}
 			else {
-				return s.new_token(.dollar, '')
+				return s.new_token(.dollar, '', 1)
 			}
 		}
 		`}` {
@@ -520,38 +524,39 @@ pub fn (s mut Scanner) scan() token.Token {
 				s.pos++
 				if s.text[s.pos] == s.quote {
 					s.is_inside_string = false
-					return s.new_token(.string, '')
+					return s.new_token(.string, '', 1)
 				}
-				return s.new_token(.string, s.ident_string())
+				ident_string := s.ident_string()
+				return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 			}
 			else {
-				return s.new_token(.rcbr, '')
+				return s.new_token(.rcbr, '', 1)
 			}
 		}
 		`&` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.and_assign, '')
+				return s.new_token(.and_assign, '', 2)
 			}
 			if nextc == `&` {
 				s.pos++
-				return s.new_token(.and, '')
+				return s.new_token(.and, '', 2)
 			}
-			return s.new_token(.amp, '')
+			return s.new_token(.amp, '', 1)
 		}
 		`|` {
 			if nextc == `|` {
 				s.pos++
-				return s.new_token(.logical_or, '')
+				return s.new_token(.logical_or, '', 2)
 			}
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.or_assign, '')
+				return s.new_token(.or_assign, '', 2)
 			}
-			return s.new_token(.pipe, '')
+			return s.new_token(.pipe, '', 1)
 		}
 		`,` {
-			return s.new_token(.comma, '')
+			return s.new_token(.comma, '', 1)
 		}
 		`@` {
 			s.pos++
@@ -566,28 +571,28 @@ pub fn (s mut Scanner) scan() token.Token {
 			// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN)
 			// ... which is useful while debugging/tracing
 			if name == 'FN' {
-				return s.new_token(.string, s.fn_name)
+				return s.new_token(.string, s.fn_name, 3)
 			}
 			if name == 'VEXE' {
 				vexe := pref.vexe_path()
-				return s.new_token(.string, cescaped_path(vexe))
+				return s.new_token(.string, cescaped_path(vexe), 5)
 			}
 			if name == 'FILE' {
-				return s.new_token(.string, cescaped_path(os.real_path(s.file_path)))
+				return s.new_token(.string, cescaped_path(os.real_path(s.file_path)), 5)
 			}
 			if name == 'LINE' {
-				return s.new_token(.string, (s.line_nr + 1).str())
+				return s.new_token(.string, (s.line_nr + 1).str(), 5)
 			}
 			if name == 'COLUMN' {
-				return s.new_token(.string, (s.current_column()).str())
+				return s.new_token(.string, s.current_column().str(), 7)
 			}
 			if name == 'VHASH' {
-				return s.new_token(.string, util.vhash())
+				return s.new_token(.string, util.vhash(), 6)
 			}
 			if !token.is_key(name) {
 				s.error('@ must be used before keywords (e.g. `@type string`)')
 			}
-			return s.new_token(.name, name)
+			return s.new_token(.name, name, name.len)
 		}
 		/*
 	case `\r`:
@@ -608,11 +613,11 @@ pub fn (s mut Scanner) scan() token.Token {
 				s.pos++
 				if s.text[s.pos + 1] == `.` {
 					s.pos++
-					return s.new_token(.ellipsis, '')
+					return s.new_token(.ellipsis, '', 3)
 				}
-				return s.new_token(.dotdot, '')
+				return s.new_token(.dotdot, '', 2)
 			}
-			return s.new_token(.dot, '')
+			return s.new_token(.dot, '', 1)
 		}
 		`#` {
 			start := s.pos + 1
@@ -623,101 +628,101 @@ pub fn (s mut Scanner) scan() token.Token {
 				// s.fgenln('// shebang line "$s.line_comment"')
 				return s.scan()
 			}
-			hash := s.text[start..s.pos]
-			return s.new_token(.hash, hash.trim_space())
+			hash := s.text[start..s.pos].trim_space()
+			return s.new_token(.hash, hash, hash.len)
 		}
 		`>` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.ge, '')
+				return s.new_token(.ge, '', 2)
 			}
 			else if nextc == `>` {
 				if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 					s.pos += 2
-					return s.new_token(.right_shift_assign, '')
+					return s.new_token(.right_shift_assign, '', 3)
 				}
 				s.pos++
-				return s.new_token(.right_shift, '')
+				return s.new_token(.right_shift, '', 2)
 			}
 			else {
-				return s.new_token(.gt, '')
+				return s.new_token(.gt, '', 1)
 			}
 		}
 		0xE2 {
 			// case `≠`:
 			if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
 				s.pos += 2
-				return s.new_token(.ne, '')
+				return s.new_token(.ne, '', 3)
 			}
 			// ⩽
 			else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
 				s.pos += 2
-				return s.new_token(.le, '')
+				return s.new_token(.le, '', 3)
 			}
 			// ⩾
 			else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
 				s.pos += 2
-				return s.new_token(.ge, '')
+				return s.new_token(.ge, '', 3)
 			}
 		}
 		`<` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.le, '')
+				return s.new_token(.le, '', 2)
 			}
 			else if nextc == `<` {
 				if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 					s.pos += 2
-					return s.new_token(.left_shift_assign, '')
+					return s.new_token(.left_shift_assign, '', 3)
 				}
 				s.pos++
-				return s.new_token(.left_shift, '')
+				return s.new_token(.left_shift, '', 2)
 			}
 			else {
-				return s.new_token(.lt, '')
+				return s.new_token(.lt, '', 1)
 			}
 		}
 		`=` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.eq, '')
+				return s.new_token(.eq, '', 2)
 			}
 			else if nextc == `>` {
 				s.pos++
-				return s.new_token(.arrow, '')
+				return s.new_token(.arrow, '', 2)
 			}
 			else {
-				return s.new_token(.assign, '')
+				return s.new_token(.assign, '', 1)
 			}
 		}
 		`:` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.decl_assign, '')
+				return s.new_token(.decl_assign, '', 2)
 			}
 			else {
-				return s.new_token(.colon, '')
+				return s.new_token(.colon, '', 1)
 			}
 		}
 		`;` {
-			return s.new_token(.semicolon, '')
+			return s.new_token(.semicolon, '', 1)
 		}
 		`!` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.ne, '')
+				return s.new_token(.ne, '', 2)
 			}
 			else {
-				return s.new_token(.not, '')
+				return s.new_token(.not, '', 1)
 			}
 		}
 		`~` {
-			return s.new_token(.bit_not, '')
+			return s.new_token(.bit_not, '', 1)
 		}
 		`/` {
 			if nextc == `=` {
 				s.pos++
-				return s.new_token(.div_assign, '')
+				return s.new_token(.div_assign, '', 2)
 			}
 			if nextc == `/` {
 				start := s.pos + 1
@@ -739,7 +744,7 @@ pub fn (s mut Scanner) scan() token.Token {
 					if is_separate_line_comment {
 						comment = '|' + comment
 					}
-					return s.new_token(.comment, comment)
+					return s.new_token(.comment, comment, comment.len + 2)
 				}
 				// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
 				// Skip the comment (return the next token)
@@ -771,12 +776,12 @@ pub fn (s mut Scanner) scan() token.Token {
 				s.pos++
 				if s.comments_mode == .parse_comments {
 					comment := s.text[start..(s.pos - 1)].trim_space()
-					return s.new_token(.comment, comment)
+					return s.new_token(.comment, comment, comment.len + 4)
 				}
 				// Skip if not in fmt mode
 				return s.scan()
 			}
-			return s.new_token(.div, '')
+			return s.new_token(.div, '', 1)
 		}
 		else {}
 	}
@@ -1020,7 +1025,7 @@ pub fn (s &Scanner) error(msg string) {
 		line_nr: s.line_nr
 		pos: s.pos
 	}
-	eprintln(util.formated_error('error', msg, s.file_path, pos))
+	eprintln(util.formatted_error('error', msg, s.file_path, pos))
 	exit(1)
 }
 
diff --git a/vlib/v/token/position.v b/vlib/v/token/position.v
index 77897debdc..5747183666 100644
--- a/vlib/v/token/position.v
+++ b/vlib/v/token/position.v
@@ -7,6 +7,7 @@ pub struct Position {
 pub:
 	line_nr int // the line number in the source where the token occured
 	pos     int // the position of the token in scanner text
+	len     int // length of the literal in the source
 }
 
 [inline]
@@ -14,5 +15,6 @@ pub fn (tok &Token) position() Position {
 	return Position{
 		line_nr: tok.line_nr - 1
 		pos: tok.pos
+		len: tok.len
 	}
 }
diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v
index a960c0c655..32777f0344 100644
--- a/vlib/v/token/token.v
+++ b/vlib/v/token/token.v
@@ -10,6 +10,7 @@ pub:
 	line_nr int // the line number in the source where the token occured
 	// name_idx int // name table index for O(1) lookup
 	pos     int // the position of the token in scanner text
+	len     int // length of the literal
 }
 
 pub enum Kind {
diff --git a/vlib/v/util/errors.v b/vlib/v/util/errors.v
index 7d3abcc088..177d755dfe 100644
--- a/vlib/v/util/errors.v
+++ b/vlib/v/util/errors.v
@@ -41,7 +41,7 @@ pub fn new_error_manager() &EManager {
 	return &EManager{ support_color: term.can_show_color_on_stderr() }
 }
 
-pub fn formated_error(kind string /*error or warn*/, emsg string, filepath string, pos token.Position) string {
+pub fn formatted_error(kind string /*error or warn*/, emsg string, filepath string, pos token.Position) string {
 	mut path := filepath
 	verror_paths_override := os.getenv('VERROR_PATHS')
 	if verror_paths_override == 'absolute' {
@@ -95,7 +95,10 @@ pub fn formated_error(kind string /*error or warn*/, emsg string, filepath strin
 					}
 					continue
 				}
-				pointerline << if emanager.support_color { term.bold(term.blue('^')) } else { '^' }
+				for i in 0..pos.len {
+					underline := '~'.repeat(pos.len)
+					pointerline << if emanager.support_color { term.bold(term.blue(underline)) } else { underline }
+				}
 				break
 			}
 			clines << '       ' + pointerline.join('')