From aa4e22c287a5987129404d6bb63cdf8fa0fd85dd Mon Sep 17 00:00:00 2001
From: Ned Palacios <7358345+nedpals@users.noreply.github.com>
Date: Tue, 23 Mar 2021 13:23:46 +0800
Subject: [PATCH] scanner, token: add column information to tokens (#9407)

---
 vlib/v/ast/ast.v         |  2 ++
 vlib/v/checker/checker.v |  3 +--
 vlib/v/parser/if_match.v |  3 +++
 vlib/v/parser/struct.v   |  1 +
 vlib/v/scanner/scanner.v | 30 +++++++++++++++---------------
 vlib/v/token/position.v  |  5 ++++-
 vlib/v/token/token.v     |  1 +
 vlib/v/util/errors.v     | 28 ++++++----------------------
 8 files changed, 33 insertions(+), 40 deletions(-)

diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v
index bd99fd74c0..1a5e39e13a 100644
--- a/vlib/v/ast/ast.v
+++ b/vlib/v/ast/ast.v
@@ -1440,6 +1440,7 @@ pub fn (expr Expr) position() token.Position {
 				line_nr: expr.pos.line_nr
 				pos: left_pos.pos
 				len: right_pos.pos - left_pos.pos + right_pos.len
+				col: left_pos.col
 				last_line: right_pos.last_line
 			}
 		}
@@ -1563,6 +1564,7 @@ pub fn (node Node) position() token.Position {
 						line_nr: -1
 						pos: -1
 						last_line: -1
+						col: -1
 					}
 				}
 			}
diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v
index 9dd63eb759..f84929c62e 100644
--- a/vlib/v/checker/checker.v
+++ b/vlib/v/checker/checker.v
@@ -4402,8 +4402,7 @@ fn (mut c Checker) at_expr(mut node ast.AtExpr) table.Type {
 			node.val = (node.pos.line_nr + 1).str()
 		}
 		.column_nr {
-			_, column := util.filepath_pos_to_source_and_column(c.file.path, node.pos)
-			node.val = (column + 1).str()
+			node.val = (node.pos.col + 1).str()
 		}
 		.vhash {
 			node.val = util.vhash()
diff --git a/vlib/v/parser/if_match.v b/vlib/v/parser/if_match.v
index d1e0bac9e6..01e93098e3 100644
--- a/vlib/v/parser/if_match.v
+++ b/vlib/v/parser/if_match.v
@@ -257,6 +257,7 @@ fn (mut p Parser) match_expr() ast.MatchExpr {
 		line_nr: match_first_pos.line_nr
 		pos: match_first_pos.pos
 		len: match_last_pos.pos - match_first_pos.pos + match_last_pos.len
+		col: match_first_pos.col
 	}
 	if p.tok.kind == .rcbr {
 		p.check(.rcbr)
@@ -401,6 +402,7 @@ fn (mut p Parser) select_expr() ast.SelectExpr {
 			line_nr: branch_first_pos.line_nr
 			pos: branch_first_pos.pos
 			len: branch_last_pos.pos - branch_first_pos.pos + branch_last_pos.len
+			col: branch_first_pos.col
 		}
 		post_comments := p.eat_comments({})
 		pos.update_last_line(p.prev_tok.line_nr)
@@ -425,6 +427,7 @@ fn (mut p Parser) select_expr() ast.SelectExpr {
 		line_nr: match_first_pos.line_nr
 		pos: match_first_pos.pos
 		len: match_last_pos.pos - match_first_pos.pos + match_last_pos.len
+		col: match_first_pos.col
 	}
 	if p.tok.kind == .rcbr {
 		p.check(.rcbr)
diff --git a/vlib/v/parser/struct.v b/vlib/v/parser/struct.v
index 5120c7169a..2371c3a948 100644
--- a/vlib/v/parser/struct.v
+++ b/vlib/v/parser/struct.v
@@ -389,6 +389,7 @@ fn (mut p Parser) struct_init(short_syntax bool) ast.StructInit {
 				line_nr: first_field_pos.line_nr
 				pos: first_field_pos.pos
 				len: field_len
+				col: first_field_pos.col
 			}
 		}
 		i++
diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v
index f0e6ab5230..4768171747 100644
--- a/vlib/v/scanner/scanner.v
+++ b/vlib/v/scanner/scanner.v
@@ -26,7 +26,8 @@ pub mut:
 	text              string // the whole text of the file
 	pos               int    // current position in the file, first character is s.text[0]
 	line_nr           int    // current line number
-	last_nl_pos       int    // for calculating column
+	last_nl_pos       int = -1 // for calculating column
+	is_crlf           bool   // special check when computing columns
 	is_inside_string  bool   // set to true in a string, *at the start* of an $var or ${expr}
 	is_inter_start    bool   // for hacky string interpolation TODO simplify
 	is_inter_end      bool
@@ -177,6 +178,7 @@ fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Tok
 		kind: tok_kind
 		lit: lit
 		line_nr: s.line_nr + line_offset
+		col: mu.max(1, s.current_column() - len + 1)
 		pos: s.pos - len + 1
 		len: len
 		tidx: cidx
@@ -189,6 +191,7 @@ fn (s &Scanner) new_eof_token() token.Token {
 		kind: .eof
 		lit: ''
 		line_nr: s.line_nr + 1
+		col: 1
 		pos: s.pos
 		len: 1
 		tidx: s.tidx
@@ -203,6 +206,7 @@ fn (mut s Scanner) new_multiline_token(tok_kind token.Kind, lit string, len int,
 		kind: tok_kind
 		lit: lit
 		line_nr: start_line + 1
+		col: mu.max(1, s.current_column() - len + 1)
 		pos: s.pos - len + 1
 		len: len
 		tidx: cidx
@@ -495,6 +499,9 @@ fn (mut s Scanner) skip_whitespace() {
 		if util.is_nl(s.text[s.pos]) && s.is_vh {
 			return
 		}
+		if s.pos + 1 < s.text.len && s.text[s.pos] == `\r` && s.text[s.pos + 1] == `\n` {
+			s.is_crlf = true
+		}
 		// Count \r\n as one line
 		if util.is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos - 1) {
 			s.inc_line_number()
@@ -863,19 +870,6 @@ fn (mut s Scanner) text_scan() token.Token {
 				}
 				return s.new_token(.name, name, name.len)
 			}
-			/*
-			case `\r`:
-		if nextc == `\n` {
-			s.pos++
-			s.last_nl_pos = s.pos
-			return s.new_token(.nl, '')
-		}
-	 }
-	case `\n`:
-		s.last_nl_pos = s.pos
-		return s.new_token(.nl, '')
-	 }
-			*/
 			`.` {
 				if nextc == `.` {
 					s.pos++
@@ -1298,7 +1292,10 @@ fn (mut s Scanner) eat_to_end_of_line() {
 
 [inline]
 fn (mut s Scanner) inc_line_number() {
-	s.last_nl_pos = s.pos
+	s.last_nl_pos = mu.min(s.text.len - 1, s.pos)
+	if s.is_crlf {
+		s.last_nl_pos++
+	}
 	s.line_nr++
 	s.line_ends << s.pos
 	if s.line_nr > s.nr_lines {
@@ -1331,6 +1328,7 @@ pub fn (mut s Scanner) warn(msg string) {
 	pos := token.Position{
 		line_nr: s.line_nr
 		pos: s.pos
+		col: s.current_column() - 1
 	}
 	if s.pref.output_mode == .stdout {
 		eprintln(util.formatted_error('warning:', msg, s.file_path, pos))
@@ -1348,6 +1346,7 @@ pub fn (mut s Scanner) error(msg string) {
 	pos := token.Position{
 		line_nr: s.line_nr
 		pos: s.pos
+		col: s.current_column() - 1
 	}
 	if s.pref.output_mode == .stdout {
 		eprintln(util.formatted_error('error:', msg, s.file_path, pos))
@@ -1371,6 +1370,7 @@ fn (mut s Scanner) vet_error(msg string, fix vet.FixKind) {
 		file_path: s.file_path
 		pos: token.Position{
 			line_nr: s.line_nr
+			col: s.current_column() - 1
 		}
 		kind: .error
 		fix: fix
diff --git a/vlib/v/token/position.v b/vlib/v/token/position.v
index 7f722bef29..fc99cfc0ef 100644
--- a/vlib/v/token/position.v
+++ b/vlib/v/token/position.v
@@ -8,12 +8,13 @@ pub:
 	len     int // length of the literal in the source
 	line_nr int // the line number in the source where the token occured
 	pos     int // the position of the token in scanner text
+	col     int // the column in the source where the token occured
 pub mut:
 	last_line int // the line number where the ast object ends (used by vfmt)
 }
 
 pub fn (pos Position) str() string {
-	return 'Position{ line_nr: $pos.line_nr, last_line: $pos.last_line, pos: $pos.pos, len: $pos.len }'
+	return 'Position{ line_nr: $pos.line_nr, last_line: $pos.last_line, pos: $pos.pos, col: $pos.col, len: $pos.len }'
 }
 
 pub fn (pos Position) extend(end Position) Position {
@@ -30,6 +31,7 @@ pub fn (pos Position) extend_with_last_line(end Position, last_line int) Positio
 		line_nr: pos.line_nr
 		last_line: last_line - 1
 		pos: pos.pos
+		col: pos.col
 	}
 }
 
@@ -44,5 +46,6 @@ pub fn (tok &Token) position() Position {
 		line_nr: tok.line_nr - 1
 		pos: tok.pos
 		last_line: tok.line_nr - 1
+		col: tok.col - 1
 	}
 }
diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v
index 8db550706c..d1b6bc7d87 100644
--- a/vlib/v/token/token.v
+++ b/vlib/v/token/token.v
@@ -8,6 +8,7 @@ pub:
 	kind    Kind   // the token number/enum; for quick comparisons
 	lit     string // literal representation of the token
 	line_nr int    // the line number in the source where the token occured
+	col     int    // the column in the source where the token occured
 	// name_idx int // name table index for O(1) lookup
 	pos  int // the position of the token in scanner text
 	len  int // length of the literal
diff --git a/vlib/v/util/errors.v b/vlib/v/util/errors.v
index de5ec6cd05..bb560ebc84 100644
--- a/vlib/v/util/errors.v
+++ b/vlib/v/util/errors.v
@@ -83,9 +83,9 @@ pub fn formatted_error(kind string, omsg string, filepath string, pos token.Posi
 		}
 	}
 	//
-	source, column := filepath_pos_to_source_and_column(filepath, pos)
-	position := '$path:${pos.line_nr + 1}:${mu.max(1, column + 1)}:'
-	scontext := source_context(kind, source, column, pos).join('\n')
+	source := read_file(filepath) or { '' }
+	position := '$path:${pos.line_nr + 1}:${mu.max(1, pos.col + 1)}:'
+	scontext := source_context(kind, source, pos).join('\n')
 	final_position := bold(position)
 	final_kind := bold(color(kind, kind))
 	final_msg := emsg
@@ -94,23 +94,7 @@ pub fn formatted_error(kind string, omsg string, filepath string, pos token.Posi
 	return '$final_position $final_kind $final_msg$final_context'.trim_space()
 }
 
-pub fn filepath_pos_to_source_and_column(filepath string, pos token.Position) (string, int) {
-	// TODO: optimize this; may be use a cache.
-	// The column should not be so computationally hard to get.
-	source := read_file(filepath) or { '' }
-	mut p := mu.max(0, mu.min(source.len - 1, pos.pos))
-	if source.len > 0 {
-		for ; p >= 0; p-- {
-			if source[p] == `\n` || source[p] == `\r` {
-				break
-			}
-		}
-	}
-	column := mu.max(0, pos.pos - p - 1)
-	return source, column
-}
-
-pub fn source_context(kind string, source string, column int, pos token.Position) []string {
+pub fn source_context(kind string, source string, pos token.Position) []string {
 	mut clines := []string{}
 	if source.len == 0 {
 		return clines
@@ -121,8 +105,8 @@ pub fn source_context(kind string, source string, column int, pos token.Position
 	tab_spaces := '    '
 	for iline := bline; iline <= aline; iline++ {
 		sline := source_lines[iline]
-		start_column := mu.max(0, mu.min(column, sline.len))
-		end_column := mu.max(0, mu.min(column + mu.max(0, pos.len), sline.len))
+		start_column := mu.max(0, mu.min(pos.col, sline.len))
+		end_column := mu.max(0, mu.min(pos.col + mu.max(0, pos.len), sline.len))
 		cline := if iline == pos.line_nr {
 			sline[..start_column] + color(kind, sline[start_column..end_column]) +
 				sline[end_column..]