From aa4e22c287a5987129404d6bb63cdf8fa0fd85dd Mon Sep 17 00:00:00 2001 From: Ned Palacios <7358345+nedpals@users.noreply.github.com> Date: Tue, 23 Mar 2021 13:23:46 +0800 Subject: [PATCH] scanner, token: add column information to tokens (#9407) --- vlib/v/ast/ast.v | 2 ++ vlib/v/checker/checker.v | 3 +-- vlib/v/parser/if_match.v | 3 +++ vlib/v/parser/struct.v | 1 + vlib/v/scanner/scanner.v | 30 +++++++++++++++--------------- vlib/v/token/position.v | 5 ++++- vlib/v/token/token.v | 1 + vlib/v/util/errors.v | 28 ++++++---------------------- 8 files changed, 33 insertions(+), 40 deletions(-) diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v index bd99fd74c0..1a5e39e13a 100644 --- a/vlib/v/ast/ast.v +++ b/vlib/v/ast/ast.v @@ -1440,6 +1440,7 @@ pub fn (expr Expr) position() token.Position { line_nr: expr.pos.line_nr pos: left_pos.pos len: right_pos.pos - left_pos.pos + right_pos.len + col: left_pos.col last_line: right_pos.last_line } } @@ -1563,6 +1564,7 @@ pub fn (node Node) position() token.Position { line_nr: -1 pos: -1 last_line: -1 + col: -1 } } } diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v index 9dd63eb759..f84929c62e 100644 --- a/vlib/v/checker/checker.v +++ b/vlib/v/checker/checker.v @@ -4402,8 +4402,7 @@ fn (mut c Checker) at_expr(mut node ast.AtExpr) table.Type { node.val = (node.pos.line_nr + 1).str() } .column_nr { - _, column := util.filepath_pos_to_source_and_column(c.file.path, node.pos) - node.val = (column + 1).str() + node.val = (node.pos.col + 1).str() } .vhash { node.val = util.vhash() diff --git a/vlib/v/parser/if_match.v b/vlib/v/parser/if_match.v index d1e0bac9e6..01e93098e3 100644 --- a/vlib/v/parser/if_match.v +++ b/vlib/v/parser/if_match.v @@ -257,6 +257,7 @@ fn (mut p Parser) match_expr() ast.MatchExpr { line_nr: match_first_pos.line_nr pos: match_first_pos.pos len: match_last_pos.pos - match_first_pos.pos + match_last_pos.len + col: match_first_pos.col } if p.tok.kind == .rcbr { p.check(.rcbr) @@ -401,6 +402,7 @@ fn (mut p Parser) select_expr() ast.SelectExpr { line_nr: branch_first_pos.line_nr pos: branch_first_pos.pos len: branch_last_pos.pos - branch_first_pos.pos + branch_last_pos.len + col: branch_first_pos.col } post_comments := p.eat_comments({}) pos.update_last_line(p.prev_tok.line_nr) @@ -425,6 +427,7 @@ fn (mut p Parser) select_expr() ast.SelectExpr { line_nr: match_first_pos.line_nr pos: match_first_pos.pos len: match_last_pos.pos - match_first_pos.pos + match_last_pos.len + col: match_first_pos.col } if p.tok.kind == .rcbr { p.check(.rcbr) diff --git a/vlib/v/parser/struct.v b/vlib/v/parser/struct.v index 5120c7169a..2371c3a948 100644 --- a/vlib/v/parser/struct.v +++ b/vlib/v/parser/struct.v @@ -389,6 +389,7 @@ fn (mut p Parser) struct_init(short_syntax bool) ast.StructInit { line_nr: first_field_pos.line_nr pos: first_field_pos.pos len: field_len + col: first_field_pos.col } } i++ diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index f0e6ab5230..4768171747 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -26,7 +26,8 @@ pub mut: text string // the whole text of the file pos int // current position in the file, first character is s.text[0] line_nr int // current line number - last_nl_pos int // for calculating column + last_nl_pos int = -1 // for calculating column + is_crlf bool // special check when computing columns is_inside_string bool // set to true in a string, *at the start* of an $var or ${expr} is_inter_start bool // for hacky string interpolation TODO simplify is_inter_end bool @@ -177,6 +178,7 @@ fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Tok kind: tok_kind lit: lit line_nr: s.line_nr + line_offset + col: mu.max(1, s.current_column() - len + 1) pos: s.pos - len + 1 len: len tidx: cidx @@ -189,6 +191,7 @@ fn (s &Scanner) new_eof_token() token.Token { kind: .eof lit: '' line_nr: s.line_nr + 1 + col: 1 pos: s.pos len: 1 tidx: s.tidx @@ -203,6 +206,7 @@ fn (mut s Scanner) new_multiline_token(tok_kind token.Kind, lit string, len int, kind: tok_kind lit: lit line_nr: start_line + 1 + col: mu.max(1, s.current_column() - len + 1) pos: s.pos - len + 1 len: len tidx: cidx @@ -495,6 +499,9 @@ fn (mut s Scanner) skip_whitespace() { if util.is_nl(s.text[s.pos]) && s.is_vh { return } + if s.pos + 1 < s.text.len && s.text[s.pos] == `\r` && s.text[s.pos + 1] == `\n` { + s.is_crlf = true + } // Count \r\n as one line if util.is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos - 1) { s.inc_line_number() @@ -863,19 +870,6 @@ fn (mut s Scanner) text_scan() token.Token { } return s.new_token(.name, name, name.len) } - /* - case `\r`: - if nextc == `\n` { - s.pos++ - s.last_nl_pos = s.pos - return s.new_token(.nl, '') - } - } - case `\n`: - s.last_nl_pos = s.pos - return s.new_token(.nl, '') - } - */ `.` { if nextc == `.` { s.pos++ @@ -1298,7 +1292,10 @@ fn (mut s Scanner) eat_to_end_of_line() { [inline] fn (mut s Scanner) inc_line_number() { - s.last_nl_pos = s.pos + s.last_nl_pos = mu.min(s.text.len - 1, s.pos) + if s.is_crlf { + s.last_nl_pos++ + } s.line_nr++ s.line_ends << s.pos if s.line_nr > s.nr_lines { @@ -1331,6 +1328,7 @@ pub fn (mut s Scanner) warn(msg string) { pos := token.Position{ line_nr: s.line_nr pos: s.pos + col: s.current_column() - 1 } if s.pref.output_mode == .stdout { eprintln(util.formatted_error('warning:', msg, s.file_path, pos)) @@ -1348,6 +1346,7 @@ pub fn (mut s Scanner) error(msg string) { pos := token.Position{ line_nr: s.line_nr pos: s.pos + col: s.current_column() - 1 } if s.pref.output_mode == .stdout { eprintln(util.formatted_error('error:', msg, s.file_path, pos)) @@ -1371,6 +1370,7 @@ fn (mut s Scanner) vet_error(msg string, fix vet.FixKind) { file_path: s.file_path pos: token.Position{ line_nr: s.line_nr + col: s.current_column() - 1 } kind: .error fix: fix diff --git a/vlib/v/token/position.v b/vlib/v/token/position.v index 7f722bef29..fc99cfc0ef 100644 --- a/vlib/v/token/position.v +++ b/vlib/v/token/position.v @@ -8,12 +8,13 @@ pub: len int // length of the literal in the source line_nr int // the line number in the source where the token occured pos int // the position of the token in scanner text + col int // the column in the source where the token occured pub mut: last_line int // the line number where the ast object ends (used by vfmt) } pub fn (pos Position) str() string { - return 'Position{ line_nr: $pos.line_nr, last_line: $pos.last_line, pos: $pos.pos, len: $pos.len }' + return 'Position{ line_nr: $pos.line_nr, last_line: $pos.last_line, pos: $pos.pos, col: $pos.col, len: $pos.len }' } pub fn (pos Position) extend(end Position) Position { @@ -30,6 +31,7 @@ pub fn (pos Position) extend_with_last_line(end Position, last_line int) Positio line_nr: pos.line_nr last_line: last_line - 1 pos: pos.pos + col: pos.col } } @@ -44,5 +46,6 @@ pub fn (tok &Token) position() Position { line_nr: tok.line_nr - 1 pos: tok.pos last_line: tok.line_nr - 1 + col: tok.col - 1 } } diff --git a/vlib/v/token/token.v b/vlib/v/token/token.v index 8db550706c..d1b6bc7d87 100644 --- a/vlib/v/token/token.v +++ b/vlib/v/token/token.v @@ -8,6 +8,7 @@ pub: kind Kind // the token number/enum; for quick comparisons lit string // literal representation of the token line_nr int // the line number in the source where the token occured + col int // the column in the source where the token occured // name_idx int // name table index for O(1) lookup pos int // the position of the token in scanner text len int // length of the literal diff --git a/vlib/v/util/errors.v b/vlib/v/util/errors.v index de5ec6cd05..bb560ebc84 100644 --- a/vlib/v/util/errors.v +++ b/vlib/v/util/errors.v @@ -83,9 +83,9 @@ pub fn formatted_error(kind string, omsg string, filepath string, pos token.Posi } } // - source, column := filepath_pos_to_source_and_column(filepath, pos) - position := '$path:${pos.line_nr + 1}:${mu.max(1, column + 1)}:' - scontext := source_context(kind, source, column, pos).join('\n') + source := read_file(filepath) or { '' } + position := '$path:${pos.line_nr + 1}:${mu.max(1, pos.col + 1)}:' + scontext := source_context(kind, source, pos).join('\n') final_position := bold(position) final_kind := bold(color(kind, kind)) final_msg := emsg @@ -94,23 +94,7 @@ pub fn formatted_error(kind string, omsg string, filepath string, pos token.Posi return '$final_position $final_kind $final_msg$final_context'.trim_space() } -pub fn filepath_pos_to_source_and_column(filepath string, pos token.Position) (string, int) { - // TODO: optimize this; may be use a cache. - // The column should not be so computationally hard to get. - source := read_file(filepath) or { '' } - mut p := mu.max(0, mu.min(source.len - 1, pos.pos)) - if source.len > 0 { - for ; p >= 0; p-- { - if source[p] == `\n` || source[p] == `\r` { - break - } - } - } - column := mu.max(0, pos.pos - p - 1) - return source, column -} - -pub fn source_context(kind string, source string, column int, pos token.Position) []string { +pub fn source_context(kind string, source string, pos token.Position) []string { mut clines := []string{} if source.len == 0 { return clines @@ -121,8 +105,8 @@ pub fn source_context(kind string, source string, column int, pos token.Position tab_spaces := ' ' for iline := bline; iline <= aline; iline++ { sline := source_lines[iline] - start_column := mu.max(0, mu.min(column, sline.len)) - end_column := mu.max(0, mu.min(column + mu.max(0, pos.len), sline.len)) + start_column := mu.max(0, mu.min(pos.col, sline.len)) + end_column := mu.max(0, mu.min(pos.col + mu.max(0, pos.len), sline.len)) cline := if iline == pos.line_nr { sline[..start_column] + color(kind, sline[start_column..end_column]) + sline[end_column..]