v/vlib/v/scanner/scanner.v

// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module scanner

import math.mathutil as mu
import os
import strconv
import v.token
import v.pref
import v.util
import v.vet
import v.errors

const (
	single_quote = `\'`
	double_quote = `"`
	// char used as number separator
	num_sep      = `_`
	b_lf         = 10
	b_cr         = 13
)

pub struct Scanner {
pub mut:
	file_path         string // '/path/to/file.v'
	file_base         string // 'file.v'
	text              string // the whole text of the file
	pos               int    // current position in the file, first character is s.text[0]
	line_nr           int    // current line number
	last_nl_pos       int = -1 // for calculating column
	is_crlf           bool   // special check when computing columns
	is_inside_string  bool   // set to true in a string, *at the start* of an $var or ${expr}
	is_inter_start    bool   // for hacky string interpolation TODO simplify
	is_inter_end      bool
	is_enclosed_inter bool
	line_comment      string
	// prev_tok                 TokenKind
	is_started                  bool
	is_print_line_on_error      bool
	is_print_colored_error      bool
	is_print_rel_paths_on_error bool
	quote                       byte // which quote is used to denote current string: ' or "
	inter_quote                 byte
	line_ends                   []int // the positions of source lines ends   (i.e. \n signs)
	nr_lines                    int   // total number of lines in the source file that were scanned
	is_vh                       bool  // Keep newlines
	is_fmt                      bool  // Used for v fmt.
	comments_mode               CommentsMode
	is_inside_toplvl_statement  bool // *only* used in comments_mode: .toplevel_comments, toggled by parser
	all_tokens                  []token.Token // *only* used in comments_mode: .toplevel_comments, contains all tokens
	tidx                        int
	eofs                        int
	pref                        &pref.Preferences
	errors                      []errors.Error
	warnings                    []errors.Warning
	notices                     []errors.Notice
	vet_errors                  []vet.Error
}

/*
How the .toplevel_comments mode works:

In this mode, the scanner scans *everything* at once, before parsing starts,
including all the comments, and stores the results in an buffer s.all_tokens.

Then .scan() just returns s.all_tokens[ s.tidx++ ] *ignoring* the
comment tokens. In other words, by default in this mode, the parser
*will not see any comments* inside top level statements, so it has
no reason to complain about them.

When the parser determines, that it is outside of a top level statement,
it tells the scanner to backtrack s.tidx to the current p.tok index,
then it changes .is_inside_toplvl_statement to false , and refills its
lookahead buffer (i.e. p.peek_tok), from the scanner.

In effect, from the parser's point of view, the next tokens, that it will
receive with p.next(), will be the same, as if comments are not ignored
anymore, *between* top level statements.

When the parser determines, that it is going again inside a top level
statement, it does the same, this time setting .is_inside_toplvl_statement
to true, again refilling the lookahead buffer => calling .next() in this
mode, will again ignore all the comment tokens, till the top level statement
is finished.
*/
// The different kinds of scanner modes:
//
// .skip_comments - simplest/fastest, just ignores all comments early.
// This mode is used by the compiler itself.
//
// .parse_comments is used by vfmt. Ideally it should handle inline /* */
// comments too, i.e. it returns every kind of comment as a new token.
//
// .toplevel_comments is used by vdoc, parses *only* top level ones
// that are *outside* structs/enums/fns.
pub enum CommentsMode {
	skip_comments
	parse_comments
	toplevel_comments
}

// new scanner from file.
pub fn new_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
	if !os.exists(file_path) {
		verror("$file_path doesn't exist")
	}
	raw_text := util.read_file(file_path) or {
		verror(err)
		return voidptr(0)
	}
	mut s := &Scanner{
		pref: pref
		text: raw_text
		is_print_line_on_error: true
		is_print_colored_error: true
		is_print_rel_paths_on_error: true
		is_fmt: pref.is_fmt
		comments_mode: comments_mode
		file_path: file_path
		file_base: os.base(file_path)
	}
	s.init_scanner()
	return s
}

// new scanner from string.
pub fn new_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
	mut s := &Scanner{
		pref: pref
		text: text
		is_print_line_on_error: true
		is_print_colored_error: true
		is_print_rel_paths_on_error: true
		is_fmt: pref.is_fmt
		comments_mode: comments_mode
		file_path: 'internal_memory'
		file_base: 'internal_memory'
	}
	s.init_scanner()
	return s
}

fn (mut s Scanner) init_scanner() {
	util.get_timers().measure_pause('PARSE')
	s.scan_all_tokens_in_buffer(s.comments_mode)
	util.get_timers().measure_resume('PARSE')
}

[unsafe]
pub fn (mut s Scanner) free() {
	unsafe {
		s.text.free()
	}
}

[inline]
fn (s &Scanner) should_parse_comment() bool {
	return (s.comments_mode == .parse_comments)
		|| (s.comments_mode == .toplevel_comments && !s.is_inside_toplvl_statement)
}

// NB: this is called by v's parser
pub fn (mut s Scanner) set_is_inside_toplevel_statement(newstate bool) {
	s.is_inside_toplvl_statement = newstate
}

pub fn (mut s Scanner) set_current_tidx(cidx int) {
	mut tidx := if cidx < 0 { 0 } else { cidx }
	tidx = if tidx > s.all_tokens.len { s.all_tokens.len } else { tidx }
	s.tidx = tidx
}

[inline]
fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
	cidx := s.tidx
	s.tidx++
	line_offset := if tok_kind == .hash { 0 } else { 1 }
	return token.Token{
		kind: tok_kind
		lit: lit
		line_nr: s.line_nr + line_offset
		col: mu.max(1, s.current_column() - len + 1)
		pos: s.pos - len + 1
		len: len
		tidx: cidx
	}
}

[inline]
fn (s &Scanner) new_eof_token() token.Token {
	return token.Token{
		kind: .eof
		lit: ''
		line_nr: s.line_nr + 1
		col: s.current_column()
		pos: s.pos
		len: 1
		tidx: s.tidx
	}
}

[inline]
fn (mut s Scanner) new_multiline_token(tok_kind token.Kind, lit string, len int, start_line int) token.Token {
	cidx := s.tidx
	s.tidx++
	return token.Token{
		kind: tok_kind
		lit: lit
		line_nr: start_line + 1
		col: mu.max(1, s.current_column() - len + 1)
		pos: s.pos - len + 1
		len: len
		tidx: cidx
	}
}

[direct_array_access; inline]
fn (mut s Scanner) ident_name() string {
	start := s.pos
	s.pos++
	for s.pos < s.text.len {
		c := s.text[s.pos]
		if !(util.is_name_char(c) || c.is_digit()) {
			break
		}
		s.pos++
	}
	name := s.text[start..s.pos]
	s.pos--
	return name
}

fn (s Scanner) num_lit(start int, end int) string {
	if s.is_fmt {
		return s.text[start..end]
	}
	unsafe {
		txt := s.text.str
		mut b := malloc(end - start + 1) // add a byte for the endstring 0
		mut i1 := 0
		for i := start; i < end; i++ {
			if txt[i] != scanner.num_sep {
				b[i1] = txt[i]
				i1++
			}
		}
		b[i1] = 0 // C string compatibility
		return b.vstring_with_len(i1)
	}
}

fn (mut s Scanner) ident_bin_number() string {
	mut has_wrong_digit := false
	mut first_wrong_digit_pos := 0
	mut first_wrong_digit := `\0`
	start_pos := s.pos
	s.pos += 2 // skip '0b'
	if s.pos < s.text.len && s.text[s.pos] == scanner.num_sep {
		s.error('separator `_` is only valid between digits in a numeric literal')
	}
	for s.pos < s.text.len {
		c := s.text[s.pos]
		if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
			s.error('cannot use `_` consecutively')
		}
		if !c.is_bin_digit() && c != scanner.num_sep {
			if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
				break
			} else if !has_wrong_digit {
				has_wrong_digit = true
				first_wrong_digit_pos = s.pos
				first_wrong_digit = c
			}
		}
		s.pos++
	}
	if s.text[s.pos - 1] == scanner.num_sep {
		s.pos--
		s.error('cannot use `_` at the end of a numeric literal')
	} else if start_pos + 2 == s.pos {
		s.pos-- // adjust error position
		s.error('number part of this binary is not provided')
	} else if has_wrong_digit {
		s.pos = first_wrong_digit_pos // adjust error position
		s.error('this binary number has unsuitable digit `$first_wrong_digit.str()`')
	}
	number := s.num_lit(start_pos, s.pos)
	s.pos--
	return number
}

fn (mut s Scanner) ident_hex_number() string {
	mut has_wrong_digit := false
	mut first_wrong_digit_pos := 0
	mut first_wrong_digit := `\0`
	start_pos := s.pos
	if s.pos + 2 >= s.text.len {
		return '0x'
	}
	s.pos += 2 // skip '0x'
	if s.pos < s.text.len && s.text[s.pos] == scanner.num_sep {
		s.error('separator `_` is only valid between digits in a numeric literal')
	}
	for s.pos < s.text.len {
		c := s.text[s.pos]
		if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
			s.error('cannot use `_` consecutively')
		}
		if !c.is_hex_digit() && c != scanner.num_sep {
			if !c.is_letter() || s.is_inside_string {
				break
			} else if !has_wrong_digit {
				has_wrong_digit = true
				first_wrong_digit_pos = s.pos
				first_wrong_digit = c
			}
		}
		s.pos++
	}
	if s.text[s.pos - 1] == scanner.num_sep {
		s.pos--
		s.error('cannot use `_` at the end of a numeric literal')
	} else if start_pos + 2 == s.pos {
		s.pos-- // adjust error position
		s.error('number part of this hexadecimal is not provided')
	} else if has_wrong_digit {
		s.pos = first_wrong_digit_pos // adjust error position
		s.error('this hexadecimal number has unsuitable digit `$first_wrong_digit.str()`')
	}
	number := s.num_lit(start_pos, s.pos)
	s.pos--
	return number
}

fn (mut s Scanner) ident_oct_number() string {
	mut has_wrong_digit := false
	mut first_wrong_digit_pos := 0
	mut first_wrong_digit := `\0`
	start_pos := s.pos
	s.pos += 2 // skip '0o'
	if s.pos < s.text.len && s.text[s.pos] == scanner.num_sep {
		s.error('separator `_` is only valid between digits in a numeric literal')
	}
	for s.pos < s.text.len {
		c := s.text[s.pos]
		if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
			s.error('cannot use `_` consecutively')
		}
		if !c.is_oct_digit() && c != scanner.num_sep {
			if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
				break
			} else if !has_wrong_digit {
				has_wrong_digit = true
				first_wrong_digit_pos = s.pos
				first_wrong_digit = c
			}
		}
		s.pos++
	}
	if s.text[s.pos - 1] == scanner.num_sep {
		s.pos--
		s.error('cannot use `_` at the end of a numeric literal')
	} else if start_pos + 2 == s.pos {
		s.pos-- // adjust error position
		s.error('number part of this octal is not provided')
	} else if has_wrong_digit {
		s.pos = first_wrong_digit_pos // adjust error position
		s.error('this octal number has unsuitable digit `$first_wrong_digit.str()`')
	}
	number := s.num_lit(start_pos, s.pos)
	s.pos--
	return number
}

fn (mut s Scanner) ident_dec_number() string {
	mut has_wrong_digit := false
	mut first_wrong_digit_pos := 0
	mut first_wrong_digit := `\0`
	start_pos := s.pos
	// scan integer part
	for s.pos < s.text.len {
		c := s.text[s.pos]
		if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
			s.error('cannot use `_` consecutively')
		}
		if !c.is_digit() && c != scanner.num_sep {
			if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
				break
			} else if !has_wrong_digit {
				has_wrong_digit = true
				first_wrong_digit_pos = s.pos
				first_wrong_digit = c
			}
		}
		s.pos++
	}
	if s.text[s.pos - 1] == scanner.num_sep {
		s.pos--
		s.error('cannot use `_` at the end of a numeric literal')
	}
	mut call_method := false // true for, e.g., 5.str(), 5.5.str(), 5e5.str()
	mut is_range := false // true for, e.g., 5..10
	// scan fractional part
	if s.pos < s.text.len && s.text[s.pos] == `.` {
		s.pos++
		if s.pos < s.text.len {
			// 5.5, 5.5.str()
			if s.text[s.pos].is_digit() {
				for s.pos < s.text.len {
					c := s.text[s.pos]
					if !c.is_digit() {
						if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
							// 5.5.str()
							if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
								call_method = true
							}
							break
						} else if !has_wrong_digit {
							has_wrong_digit = true
							first_wrong_digit_pos = s.pos
							first_wrong_digit = c
						}
					}
					s.pos++
				}
			} else if s.text[s.pos] == `.` {
				// 5.. (a range)
				is_range = true
				s.pos--
			} else if s.text[s.pos] in [`e`, `E`] {
				// 5.e5
			} else if s.text[s.pos].is_letter() {
				// 5.str()
				call_method = true
				s.pos--
			} else {
				// 5.
			}
		}
	}
	// scan exponential part
	mut has_exp := false
	if s.pos < s.text.len && s.text[s.pos] in [`e`, `E`] {
		has_exp = true
		s.pos++
		if s.pos < s.text.len && s.text[s.pos] in [`-`, `+`] {
			s.pos++
		}
		for s.pos < s.text.len {
			c := s.text[s.pos]
			if !c.is_digit() {
				if !c.is_letter() || s.is_inside_string {
					// 5e5.str()
					if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
						call_method = true
					}
					break
				} else if !has_wrong_digit {
					has_wrong_digit = true
					first_wrong_digit_pos = s.pos
					first_wrong_digit = c
				}
			}
			s.pos++
		}
	}
	if has_wrong_digit {
		// error check: wrong digit
		s.pos = first_wrong_digit_pos // adjust error position
		s.error('this number has unsuitable digit `$first_wrong_digit.str()`')
	} else if s.text[s.pos - 1] in [`e`, `E`] {
		// error check: 5e
		s.pos-- // adjust error position
		s.error('exponent has no digits')
	} else if s.pos < s.text.len && s.text[s.pos] == `.` && !is_range && !call_method {
		// error check: 1.23.4, 123.e+3.4
		if has_exp {
			s.error('exponential part should be integer')
		} else {
			s.error('too many decimal points in number')
		}
	}
	number := s.num_lit(start_pos, s.pos)
	s.pos--
	return number
}

fn (mut s Scanner) ident_number() string {
	if s.expect('0b', s.pos) {
		return s.ident_bin_number()
	} else if s.expect('0x', s.pos) {
		return s.ident_hex_number()
	} else if s.expect('0o', s.pos) {
		return s.ident_oct_number()
	} else {
		return s.ident_dec_number()
	}
}

[direct_array_access; inline]
fn (mut s Scanner) skip_whitespace() {
	for s.pos < s.text.len {
		c := s.text[s.pos]
		if !(c == 32 || (c > 8 && c < 14) || (c == 0x85) || (c == 0xa0)) {
			return
		}
		c_is_nl := c == scanner.b_cr || c == scanner.b_lf
		if c_is_nl && s.is_vh {
			return
		}
		if s.pos + 1 < s.text.len && c == scanner.b_cr && s.text[s.pos + 1] == scanner.b_lf {
			s.is_crlf = true
		}
		// Count \r\n as one line
		if c_is_nl && !(s.pos > 0 && s.text[s.pos - 1] == scanner.b_cr && c == scanner.b_lf) {
			s.inc_line_number()
		}
		s.pos++
	}
}

fn (mut s Scanner) end_of_file() token.Token {
	s.eofs++
	if s.eofs > 50 {
		s.line_nr--
		panic(
			'the end of file `$s.file_path` has been reached 50 times already, the v parser is probably stuck.\n' +
			'This should not happen. Please report the bug here, and include the last 2-3 lines of your source code:\n' +
			'https://github.com/vlang/v/issues/new?labels=Bug&template=bug_report.md')
	}
	if s.pos != s.text.len && s.eofs == 1 {
		s.inc_line_number()
	}
	s.pos = s.text.len
	return s.new_eof_token()
}

pub fn (mut s Scanner) scan_all_tokens_in_buffer(mode CommentsMode) {
	// s.scan_all_tokens_in_buffer is used mainly by vdoc,
	// in order to implement the .toplevel_comments mode.
	util.timing_start('SCAN')
	defer {
		util.timing_measure_cumulative('SCAN')
	}
	oldmode := s.comments_mode
	s.comments_mode = mode
	s.scan_remaining_text()
	s.comments_mode = oldmode
	s.tidx = 0
	$if debugscanner ? {
		for t in s.all_tokens {
			eprintln('> tidx:${t.tidx:-5} | kind: ${t.kind:-10} | lit: $t.lit')
		}
	}
}

pub fn (mut s Scanner) scan_remaining_text() {
	for {
		t := s.text_scan()
		if s.comments_mode == .skip_comments && t.kind == .comment {
			continue
		}
		s.all_tokens << t
		if t.kind == .eof {
			break
		}
	}
}

pub fn (mut s Scanner) scan() token.Token {
	return s.buffer_scan()
}

pub fn (mut s Scanner) buffer_scan() token.Token {
	for {
		cidx := s.tidx
		s.tidx++
		if cidx >= s.all_tokens.len {
			return s.end_of_file()
		}
		if s.all_tokens[cidx].kind == .comment {
			if !s.should_parse_comment() {
				continue
			}
		}
		return s.all_tokens[cidx]
	}
	return s.new_eof_token()
}

[inline]
pub fn (s &Scanner) peek_token(n int) token.Token {
	idx := s.tidx + n
	if idx >= s.all_tokens.len {
		return s.new_eof_token()
	}
	t := s.all_tokens[idx]
	return t
}

[direct_array_access; inline]
fn (s &Scanner) look_ahead(n int) byte {
	if s.pos + n < s.text.len {
		return s.text[s.pos + n]
	} else {
		return `\0`
	}
}

fn (mut s Scanner) text_scan() token.Token {
	// The for loop here is so that instead of doing
	// `return s.scan()` (which will use a new call stack frame),
	// text_scan can just do continue, keeping
	// memory & stack usage low.
	// That optimization mostly matters for long sections
	// of comments and string literals.
	for {
		// if s.comments_mode == .parse_comments {
		// println('\nscan()')
		// }
		// if s.line_comment != '' {
		// s.fgenln('// LC "$s.line_comment"')
		// s.line_comment = ''
		// }
		if s.is_started {
			s.pos++
		} else {
			s.is_started = true
		}
		if !s.is_inside_string {
			s.skip_whitespace()
		}
		if s.pos >= s.text.len {
			return s.end_of_file()
		}
		// End of $var, start next string
		if s.is_inter_end {
			if s.text[s.pos] == s.quote {
				s.is_inter_end = false
				return s.new_token(.string, '', 1)
			}
			s.is_inter_end = false
			ident_string := s.ident_string()
			return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
		}
		s.skip_whitespace()
		// end of file
		if s.pos >= s.text.len {
			return s.end_of_file()
		}
		// handle each char
		c := s.text[s.pos]
		nextc := s.look_ahead(1)
		// name or keyword
		if util.is_name_char(c) {
			name := s.ident_name()
			// tmp hack to detect . in ${}
			// Check if not .eof to prevent panic
			next_char := s.look_ahead(1)
			kind := token.keywords[name]
			if kind != .unknown {
				return s.new_token(kind, name, name.len)
			}
			// 'asdf $b' => "b" is the last name in the string, dont start parsing string
			// at the next ', skip it
			if s.is_inside_string {
				if next_char == s.quote {
					s.is_inter_end = true
					s.is_inter_start = false
					s.is_inside_string = false
				}
			}
			// end of `$expr`
			// allow `'$a.b'` and `'$a.c()'`
			if s.is_inter_start && next_char == `\\`
				&& s.look_ahead(2) !in [`x`, `n`, `r`, `\\`, `t`, `e`, `"`, `\'`] {
				s.warn('unknown escape sequence \\${s.look_ahead(2)}')
			}
			if s.is_inter_start && next_char == `(` {
				if s.look_ahead(2) != `)` {
					s.warn('use `\${f(expr)}` instead of `\$f(expr)`')
				}
			} else if s.is_inter_start && next_char != `.` {
				s.is_inter_end = true
				s.is_inter_start = false
			}
			if s.pos == 0 && next_char == ` ` {
				// If a single letter name at the start of the file, increment
				// Otherwise the scanner would be stuck at s.pos = 0
				s.pos++
			}
			return s.new_token(.name, name, name.len)
		} else if c.is_digit() || (c == `.` && nextc.is_digit()) {
			// `123`, `.123`
			if !s.is_inside_string {
				// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
				mut start_pos := s.pos
				for start_pos < s.text.len && s.text[start_pos] == `0` {
					start_pos++
				}
				mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
				// for 0b, 0o, 0x the heading zero shouldn't be jumped
				if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
					prefix_zero_num--
				}
				s.pos += prefix_zero_num // jump these zeros
			}
			num := s.ident_number()
			return s.new_token(.number, num, num.len)
		}
		// Handle `'$fn()'`
		if c == `)` && s.is_inter_start {
			next_char := s.look_ahead(1)
			if next_char != `.` {
				s.is_inter_end = true
				s.is_inter_start = false
				if next_char == s.quote {
					s.is_inside_string = false
				}
				return s.new_token(.rpar, '', 1)
			}
		}
		// all other tokens
		match c {
			`+` {
				if nextc == `+` {
					s.pos++
					return s.new_token(.inc, '', 2)
				} else if nextc == `=` {
					s.pos++
					return s.new_token(.plus_assign, '', 2)
				}
				return s.new_token(.plus, '', 1)
			}
			`-` {
				if nextc == `-` {
					s.pos++
					return s.new_token(.dec, '', 2)
				} else if nextc == `=` {
					s.pos++
					return s.new_token(.minus_assign, '', 2)
				}
				return s.new_token(.minus, '', 1)
			}
			`*` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.mult_assign, '', 2)
				}
				return s.new_token(.mul, '', 1)
			}
			`^` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.xor_assign, '', 2)
				}
				return s.new_token(.xor, '', 1)
			}
			`%` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.mod_assign, '', 2)
				}
				return s.new_token(.mod, '', 1)
			}
			`?` {
				return s.new_token(.question, '', 1)
			}
			scanner.single_quote, scanner.double_quote {
				start_line := s.line_nr
				ident_string := s.ident_string()
				return s.new_multiline_token(.string, ident_string, ident_string.len + 2,
					start_line) // + two quotes
			}
			`\`` {
				// ` // apostrophe balance comment. do not remove
				ident_char := s.ident_char()
				return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
			}
			`(` {
				// TODO `$if vet {` for performance
				if s.pref.is_vet && s.text[s.pos + 1] == ` ` {
					s.vet_error('Looks like you are adding a space after `(`', .vfmt)
				}
				return s.new_token(.lpar, '', 1)
			}
			`)` {
				// TODO `$if vet {` for performance
				if s.pref.is_vet && s.text[s.pos - 1] == ` ` {
					s.vet_error('Looks like you are adding a space before `)`', .vfmt)
				}
				return s.new_token(.rpar, '', 1)
			}
			`[` {
				return s.new_token(.lsbr, '', 1)
			}
			`]` {
				return s.new_token(.rsbr, '', 1)
			}
			`{` {
				// Skip { in `${` in strings
				if s.is_inside_string {
					continue
				}
				return s.new_token(.lcbr, '', 1)
			}
			`$` {
				if s.is_inside_string {
					return s.new_token(.str_dollar, '', 1)
				} else {
					return s.new_token(.dollar, '', 1)
				}
			}
			`}` {
				// s = `hello $name !`
				// s = `hello ${name} !`
				if s.is_enclosed_inter {
					if s.pos < s.text.len - 1 {
						s.pos++
					} else {
						s.error('unfinished string literal')
					}
					if s.text[s.pos] == s.quote {
						s.is_inside_string = false
						s.is_enclosed_inter = false
						return s.new_token(.string, '', 1)
					}
					s.is_enclosed_inter = false
					ident_string := s.ident_string()
					return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
				} else {
					return s.new_token(.rcbr, '', 1)
				}
			}
			`&` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.and_assign, '', 2)
				}
				afternextc := s.look_ahead(2)
				if nextc == `&` && afternextc.is_space() {
					s.pos++
					return s.new_token(.and, '', 2)
				}
				return s.new_token(.amp, '', 1)
			}
			`|` {
				if nextc == `|` {
					s.pos++
					return s.new_token(.logical_or, '', 2)
				}
				if nextc == `=` {
					s.pos++
					return s.new_token(.or_assign, '', 2)
				}
				return s.new_token(.pipe, '', 1)
			}
			`,` {
				return s.new_token(.comma, '', 1)
			}
			`@` {
				mut name := ''
				if nextc != `\0` {
					s.pos++
					name = s.ident_name()
				}
				if s.is_fmt {
					return s.new_token(.name, '@' + name, name.len + 1)
				}
				// @FN, @STRUCT, @MOD etc. See full list in token.valid_at_tokens
				if '@' + name in token.valid_at_tokens {
					return s.new_token(.at, '@' + name, name.len + 1)
				}
				if !token.is_key(name) {
					mut at_error_msg := '@ must be used before keywords or compile time variables (e.g. `@type string` or `@FN`)'
					// If name is all uppercase, the user is probably looking for a compile time variable ("at-token")
					if name.is_upper() {
						at_error_msg += '\nAvailable compile time variables:\n$token.valid_at_tokens'
					}
					s.error(at_error_msg)
				}
				return s.new_token(.name, name, name.len)
			}
			`.` {
				if nextc == `.` {
					s.pos++
					if s.pos + 1 < s.text.len && s.text[s.pos + 1] == `.` {
						s.pos++
						return s.new_token(.ellipsis, '', 3)
					}
					return s.new_token(.dotdot, '', 2)
				}
				return s.new_token(.dot, '', 1)
			}
			`#` {
				start := s.pos + 1
				s.ignore_line()
				if nextc == `!` {
					// treat shebang line (#!) as a comment
					comment := s.text[start - 1..s.pos].trim_space()
					// s.fgenln('// shebang line "$s.line_comment"')
					return s.new_token(.comment, comment, comment.len + 2)
				}
				hash := s.text[start..s.pos].trim_space()
				return s.new_token(.hash, hash, hash.len + 2)
			}
			`>` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.ge, '', 2)
				} else if nextc == `>` {
					if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
						s.pos += 2
						return s.new_token(.right_shift_assign, '', 3)
					}
					s.pos++
					return s.new_token(.right_shift, '', 2)
				} else {
					return s.new_token(.gt, '', 1)
				}
			}
			`<` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.le, '', 2)
				} else if nextc == `<` {
					if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
						s.pos += 2
						return s.new_token(.left_shift_assign, '', 3)
					}
					s.pos++
					return s.new_token(.left_shift, '', 2)
				} else if nextc == `-` {
					s.pos++
					return s.new_token(.arrow, '', 2)
				} else {
					return s.new_token(.lt, '', 1)
				}
			}
			`=` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.eq, '', 2)
				} else {
					return s.new_token(.assign, '', 1)
				}
			}
			`:` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.decl_assign, '', 2)
				} else {
					return s.new_token(.colon, '', 1)
				}
			}
			`;` {
				return s.new_token(.semicolon, '', 1)
			}
			`!` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.ne, '', 2)
				} else if s.text.len > s.pos + 3 && nextc == `i` && s.text[s.pos + 2] == `n`
					&& s.text[s.pos + 3].is_space() {
					s.pos += 2
					return s.new_token(.not_in, '', 3)
				} else if s.text.len > s.pos + 3 && nextc == `i` && s.text[s.pos + 2] == `s`
					&& s.text[s.pos + 3].is_space() {
					s.pos += 2
					return s.new_token(.not_is, '', 3)
				} else {
					return s.new_token(.not, '', 1)
				}
			}
			`~` {
				return s.new_token(.bit_not, '', 1)
			}
			`/` {
				if nextc == `=` {
					s.pos++
					return s.new_token(.div_assign, '', 2)
				}
				if nextc == `/` {
					start := s.pos + 1
					s.ignore_line()
					mut comment_line_end := s.pos
					if s.text[s.pos - 1] == scanner.b_cr {
						comment_line_end--
					} else {
						// fix line_nr, \n was read; the comment is marked on the next line
						s.pos--
						s.line_nr--
					}
					if s.should_parse_comment() {
						s.line_comment = s.text[start + 1..comment_line_end]
						mut comment := s.line_comment
						// Find out if this comment is on its own line (for vfmt)
						mut is_separate_line_comment := true
						for j := start - 2; j >= 0 && s.text[j] != scanner.b_lf; j-- {
							if s.text[j] !in [`\t`, ` `] {
								is_separate_line_comment = false
							}
						}
						if is_separate_line_comment {
							// NB: ´\x01´ is used to preserve the initial whitespace in comments
							//     that are on a separate line
							comment = '\x01' + comment
						}
						return s.new_token(.comment, comment, comment.len + 2)
					}
					// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
					// Skip the comment (return the next token)
					continue
				}
				// Multiline comments
				if nextc == `*` {
					start := s.pos + 2
					start_line := s.line_nr
					mut nest_count := 1
					// Skip comment
					for nest_count > 0 && s.pos < s.text.len - 1 {
						s.pos++
						if s.pos >= s.text.len {
							s.line_nr--
							s.error('comment not terminated')
						}
						if s.text[s.pos] == scanner.b_lf {
							s.inc_line_number()
							continue
						}
						if s.expect('/*', s.pos) {
							nest_count++
							continue
						}
						if s.expect('*/', s.pos) {
							nest_count--
						}
					}
					s.pos++
					if s.should_parse_comment() {
						mut comment := s.text[start..(s.pos - 1)].trim(' ')
						if !comment.contains('\n') {
							comment = '\x01' + comment
						}
						return s.new_multiline_token(.comment, comment, comment.len + 4,
							start_line)
					}
					// Skip if not in fmt mode
					continue
				}
				return s.new_token(.div, '', 1)
			}
			else {}
		}
		$if windows {
			if c == `\0` {
				return s.end_of_file()
			}
		}
		s.invalid_character()
		break
	}
	return s.end_of_file()
}

fn (mut s Scanner) invalid_character() {
	len := utf8_char_len(s.text[s.pos])
	end := mu.min(s.pos + len, s.text.len)
	c := s.text[s.pos..end]
	s.error('invalid character `$c`')
}

fn (s &Scanner) current_column() int {
	return s.pos - s.last_nl_pos
}

fn (s &Scanner) count_symbol_before(p int, sym byte) int {
	mut count := 0
	for i := p; i >= 0; i-- {
		if s.text[i] != sym {
			break
		}
		count++
	}
	return count
}

[direct_array_access]
fn (mut s Scanner) ident_string() string {
	q := s.text[s.pos]
	is_quote := q == scanner.single_quote || q == scanner.double_quote
	is_raw := is_quote && s.pos > 0 && s.text[s.pos - 1] == `r` && !s.is_inside_string
	is_cstr := is_quote && s.pos > 0 && s.text[s.pos - 1] == `c` && !s.is_inside_string
	if is_quote {
		if s.is_inside_string || s.is_enclosed_inter || s.is_inter_start {
			s.inter_quote = q
		} else {
			s.quote = q
		}
	}
	// if s.file_path.contains('string_test') {
	// println('\nident_string() at char=${s.text[s.pos].str()}')
	// println('linenr=$s.line_nr quote=  $qquote ${qquote.str()}')
	// }
	mut n_cr_chars := 0
	mut start := s.pos
	start_char := s.text[start]
	if start_char == s.quote
		|| (start_char == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter)) {
		start++
	} else if start_char == scanner.b_lf {
		s.inc_line_number()
	}
	s.is_inside_string = false
	mut u_escapes_pos := []int{} // pos list of \uXXXX
	slash := `\\`
	for {
		s.pos++
		if s.pos >= s.text.len {
			s.error('unfinished string literal')
			break
		}
		c := s.text[s.pos]
		prevc := s.text[s.pos - 1]
		// end of string
		if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
			// handle '123\\'  slash at the end
			break
		}
		if c == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter) {
			break
		}
		if c == scanner.b_cr {
			n_cr_chars++
		}
		if c == scanner.b_lf {
			s.inc_line_number()
		}
		// Don't allow \0
		if c == `0` && s.pos > 2 && prevc == slash {
			if (s.pos < s.text.len - 1 && s.text[s.pos + 1].is_digit())
				|| s.count_symbol_before(s.pos - 1, slash) % 2 == 0 {
			} else if !is_cstr && !is_raw {
				s.error(r'cannot use `\0` (NULL character) in the string literal')
			}
		}
		// Don't allow \x00
		if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
			if s.count_symbol_before(s.pos - 3, slash) % 2 == 0 {
			} else if !is_cstr && !is_raw {
				s.error(r'cannot use `\x00` (NULL character) in the string literal')
			}
		}
		// Escape `\x` `\u`
		if prevc == slash && !is_raw && !is_cstr && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
			// Escape `\x`
			if c == `x` && (s.text[s.pos + 1] == s.quote || !s.text[s.pos + 1].is_hex_digit()) {
				s.error(r'`\x` used with no following hex digits')
			}
			// Escape `\u`
			if c == `u` {
				if s.text[s.pos + 1] == s.quote || s.text[s.pos + 2] == s.quote
					|| s.text[s.pos + 3] == s.quote || s.text[s.pos + 4] == s.quote
					|| !s.text[s.pos + 1].is_hex_digit() || !s.text[s.pos + 2].is_hex_digit()
					|| !s.text[s.pos + 3].is_hex_digit() || !s.text[s.pos + 4].is_hex_digit() {
					s.error(r'`\u` incomplete unicode character value')
				}
				u_escapes_pos << s.pos - 1
			}
		}
		// ${var} (ignore in vfmt mode) (skip \$)
		if prevc == `$` && c == `{` && !is_raw && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
			s.is_inside_string = true
			s.is_enclosed_inter = true
			// so that s.pos points to $ at the next step
			s.pos -= 2
			break
		}
		// $var
		if prevc == `$` && util.is_name_char(c) && !is_raw
			&& s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
			s.is_inside_string = true
			s.is_inter_start = true
			s.pos -= 2
			break
		}
	}
	mut lit := ''
	mut end := s.pos
	if s.is_inside_string {
		end++
	}
	if start <= s.pos {
		mut string_so_far := s.text[start..end]
		if !s.is_fmt && u_escapes_pos.len > 0 {
			string_so_far = decode_u_escapes(string_so_far, start, u_escapes_pos)
		}
		if n_cr_chars > 0 {
			string_so_far = string_so_far.replace('\r', '')
		}
		if string_so_far.contains('\\\n') {
			lit = trim_slash_line_break(string_so_far)
		} else {
			lit = string_so_far
		}
	}
	return lit
}

fn decode_u_escapes(s string, start int, escapes_pos []int) string {
	if escapes_pos.len == 0 {
		return s
	}
	mut ss := []string{cap: escapes_pos.len * 2 + 1}
	ss << s[..escapes_pos.first() - start]
	for i, pos in escapes_pos {
		idx := pos - start
		end_idx := idx + 6 // "\uXXXX".len == 6
		ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32)))
		if i + 1 < escapes_pos.len {
			ss << s[end_idx..escapes_pos[i + 1] - start]
		} else {
			ss << s[end_idx..]
		}
	}
	return ss.join('')
}

fn trim_slash_line_break(s string) string {
	mut start := 0
	mut ret_str := s
	for {
		idx := ret_str.index_after('\\\n', start)
		if idx != -1 {
			ret_str = ret_str[..idx] + ret_str[idx + 2..].trim_left(' \n\t\v\f\r')
			start = idx
		} else {
			break
		}
	}
	return ret_str
}

fn (mut s Scanner) ident_char() string {
	start := s.pos
	slash := `\\`
	mut len := 0
	for {
		s.pos++
		if s.pos >= s.text.len {
			break
		}
		if s.text[s.pos] != slash {
			len++
		}
		double_slash := s.expect('\\\\', s.pos - 2)
		if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
			// ` // apostrophe balance comment. do not remove
			if double_slash {
				len++
			}
			break
		}
	}
	len--
	c := s.text[start + 1..s.pos]
	if len != 1 {
		u := c.ustring()
		if u.len != 1 {
			s.error('invalid character literal (more than one character)\n' +
				'use quotes for strings, backticks for characters')
		}
	}
	// Escapes a `'` character
	if c == "'" {
		return '\\' + c
	}
	return c
}

[direct_array_access; inline]
fn (s &Scanner) expect(want string, start_pos int) bool {
	end_pos := start_pos + want.len
	if start_pos < 0 || end_pos < 0 || start_pos >= s.text.len || end_pos > s.text.len {
		return false
	}
	for pos in start_pos .. end_pos {
		if s.text[pos] != want[pos - start_pos] {
			return false
		}
	}
	return true
}

[inline]
fn (mut s Scanner) ignore_line() {
	s.eat_to_end_of_line()
	s.inc_line_number()
}

[direct_array_access; inline]
fn (mut s Scanner) eat_to_end_of_line() {
	for s.pos < s.text.len && s.text[s.pos] != scanner.b_lf {
		s.pos++
	}
}

[inline]
fn (mut s Scanner) inc_line_number() {
	s.last_nl_pos = mu.min(s.text.len - 1, s.pos)
	if s.is_crlf {
		s.last_nl_pos++
	}
	s.line_nr++
	s.line_ends << s.pos
	if s.line_nr > s.nr_lines {
		s.nr_lines = s.line_nr
	}
}

pub fn (mut s Scanner) note(msg string) {
	pos := token.Position{
		line_nr: s.line_nr
		pos: s.pos
	}
	if s.pref.output_mode == .stdout {
		eprintln(util.formatted_error('notice:', msg, s.file_path, pos))
	} else {
		s.notices << errors.Notice{
			file_path: s.file_path
			pos: pos
			reporter: .scanner
			message: msg
		}
	}
}

pub fn (mut s Scanner) warn(msg string) {
	if s.pref.warns_are_errors {
		s.error(msg)
		return
	}
	pos := token.Position{
		line_nr: s.line_nr
		pos: s.pos
		col: s.current_column() - 1
	}
	if s.pref.output_mode == .stdout {
		eprintln(util.formatted_error('warning:', msg, s.file_path, pos))
	} else {
		s.warnings << errors.Warning{
			file_path: s.file_path
			pos: pos
			reporter: .scanner
			message: msg
		}
	}
}

pub fn (mut s Scanner) error(msg string) {
	pos := token.Position{
		line_nr: s.line_nr
		pos: s.pos
		col: s.current_column() - 1
	}
	if s.pref.output_mode == .stdout {
		eprintln(util.formatted_error('error:', msg, s.file_path, pos))
		exit(1)
	} else {
		if s.pref.fatal_errors {
			exit(1)
		}
		s.errors << errors.Error{
			file_path: s.file_path
			pos: pos
			reporter: .scanner
			message: msg
		}
	}
}

fn (mut s Scanner) vet_error(msg string, fix vet.FixKind) {
	ve := vet.Error{
		message: msg
		file_path: s.file_path
		pos: token.Position{
			line_nr: s.line_nr
			col: s.current_column() - 1
		}
		kind: .error
		fix: fix
		typ: .default
	}
	s.vet_errors << ve
}

pub fn verror(s string) {
	util.verror('scanner error', s)
}

pub fn (mut s Scanner) codegen(newtext string) {
	$if debug_codegen ? {
		eprintln('scanner.codegen:\n $newtext')
	}
	// codegen makes sense only during normal compilation
	// feeding code generated V code to vfmt or vdoc will
	// cause them to output/document ephemeral stuff.
	if s.comments_mode == .skip_comments {
		s.all_tokens.delete_last() // remove .eof from end of .all_tokens
		s.text += newtext
		old_tidx := s.tidx
		s.tidx = s.all_tokens.len
		s.scan_remaining_text()
		s.tidx = old_tidx
	}
}

fn (mut s Scanner) trace(fbase string, message string) {
	if s.file_base == fbase {
		println('> s.trace | ${fbase:-10s} | $message')
	}
}
-												all: update copyright to 2019-2021 (#8029)


											
										
										
											2021-01-18 13:20:06 +01:00
+								// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								// Use of this source code is governed by an MIT license
 								// that can be found in the LICENSE file.
 								module scanner
-												math: add a pure V `math.mathutil`, with generic `min`, `max` and `abs` functions (#9176), and use it consistently


											
										
										
											2021-03-12 10:28:04 +01:00
+								import math.mathutil as mu
-												parser: deprecate import(); replace remaining import()s

											
										
										
											2020-04-26 07:35:59 +02:00
+								import os
-												v.scanner: decode \uXXXX in scanner (#9298)


											
										
										
											2021-03-14 11:09:17 +01:00
+								import strconv
-												parser: deprecate import(); replace remaining import()s

											
										
										
											2020-04-26 07:35:59 +02:00
+								import v.token
 								import v.pref
 								import v.util
-												vvet: move to own subdir, prepare richer suggestions (#7989)


											
										
										
											2021-01-09 15:11:49 +01:00
+								import v.vet
-												scanner: store errors and warnings on silent mode (#6883)


											
										
										
											2020-11-20 10:44:19 +01:00
+								import v.errors
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
 								const (
 									single_quote = `\'`
 									double_quote = `"`
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									// char used as number separator
 									num_sep      = `_`
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+									b_lf         = 10
 									b_cr         = 13
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								)
 								pub struct Scanner {
-												parser/checker: pub: struct fields

											
										
										
											2020-05-09 15:16:48 +02:00
+								pub mut:
-												scanner: implement s.trace/2

											
										
										
											2021-01-28 14:21:19 +01:00
+									file_path         string // '/path/to/file.v'
 									file_base         string // 'file.v'
 									text              string // the whole text of the file
 									pos               int    // current position in the file, first character is s.text[0]
 									line_nr           int    // current line number
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+									last_nl_pos       int = -1 // for calculating column
 									is_crlf           bool   // special check when computing columns
-												scanner: implement s.trace/2

											
										
										
											2021-01-28 14:21:19 +01:00
+									is_inside_string  bool   // set to true in a string, *at the start* of an $var or ${expr}
 									is_inter_start    bool   // for hacky string interpolation TODO simplify
-												fmt: align each contiguous field of struct. not the whole. (#7981)


											
										
										
											2021-01-12 04:38:43 +01:00
+									is_inter_end      bool
 									is_enclosed_inter bool
 									line_comment      string
-												run vfmt on v/

											
										
										
											2019-12-28 09:43:22 +01:00
+									// prev_tok                 TokenKind
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+									is_started                  bool
 									is_print_line_on_error      bool
 									is_print_colored_error      bool
 									is_print_rel_paths_on_error bool
 									quote                       byte // which quote is used to denote current string: ' or "
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+									inter_quote                 byte
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+									line_ends                   []int // the positions of source lines ends   (i.e. \n signs)
-												fmt: align struct field comments (#7632)


											
										
										
											2021-01-03 21:10:25 +01:00
+									nr_lines                    int   // total number of lines in the source file that were scanned
 									is_vh                       bool  // Keep newlines
 									is_fmt                      bool  // Used for v fmt.
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+									comments_mode               CommentsMode
-												all: mutability check (part 1); enable mutable sumtype args

											
										
										
											2020-09-22 05:28:29 +02:00
+									is_inside_toplvl_statement  bool // *only* used in comments_mode: .toplevel_comments, toggled by parser
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+									all_tokens                  []token.Token // *only* used in comments_mode: .toplevel_comments, contains all tokens
 									tidx                        int
-												scanner: prevent infinite looping, when reaching .eof due to parser bugs

											
										
										
											2020-05-28 18:22:11 +02:00
+									eofs                        int
-												vet: prohibit spaces after `(`

											
										
										
											2020-07-11 11:41:39 +02:00
+									pref                        &pref.Preferences
-												scanner: store errors and warnings on silent mode (#6883)


											
										
										
											2020-11-20 10:44:19 +01:00
+									errors                      []errors.Error
 									warnings                    []errors.Warning
-												v: support compiler notices. Use them for `[deprecated_after: '2021-05-01']` tags

Compiler notices are like warnings, with these differences:
   a) notices use a different color.
   b) notices use a different label.
   c) notices do not prevent compilation with -prod.
   (warnings are converted to errors with -prod)

											
										
										
											2021-03-22 18:43:06 +01:00
+									notices                     []errors.Notice
-												vvet: move to own subdir, prepare richer suggestions (#7989)


											
										
										
											2021-01-09 15:11:49 +01:00
+									vet_errors                  []vet.Error
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								/*
 								How the .toplevel_comments mode works:
-												fmt: comments

											
										
										
											2020-02-29 17:51:35 +01:00
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								In this mode, the scanner scans *everything* at once, before parsing starts,
 								including all the comments, and stores the results in an buffer s.all_tokens.
 								Then .scan() just returns s.all_tokens[ s.tidx++ ] *ignoring* the
 								comment tokens. In other words, by default in this mode, the parser
 								*will not see any comments* inside top level statements, so it has
 								no reason to complain about them.
 								When the parser determines, that it is outside of a top level statement,
 								it tells the scanner to backtrack s.tidx to the current p.tok index,
 								then it changes .is_inside_toplvl_statement to false , and refills its
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+								lookahead buffer (i.e. p.peek_tok), from the scanner.
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
 								In effect, from the parser's point of view, the next tokens, that it will
 								receive with p.next(), will be the same, as if comments are not ignored
 								anymore, *between* top level statements.
 								When the parser determines, that it is going again inside a top level
 								statement, it does the same, this time setting .is_inside_toplvl_statement
 								to true, again refilling the lookahead buffer => calling .next() in this
 								mode, will again ignore all the comment tokens, till the top level statement
 								is finished.
 								*/
 								// The different kinds of scanner modes:
 								//
 								// .skip_comments - simplest/fastest, just ignores all comments early.
 								// This mode is used by the compiler itself.
 								//
 								// .parse_comments is used by vfmt. Ideally it should handle inline /* */
 								// comments too, i.e. it returns every kind of comment as a new token.
 								//
 								// .toplevel_comments is used by vdoc, parses *only* top level ones
 								// that are *outside* structs/enums/fns.
-												fmt: comments

											
										
										
											2020-02-29 17:51:35 +01:00
+								pub enum CommentsMode {
 									skip_comments
 									parse_comments
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+									toplevel_comments
-												fmt: comments

											
										
										
											2020-02-29 17:51:35 +01:00
+								}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								// new scanner from file.
-												vet: prohibit spaces after `(`

											
										
										
											2020-07-11 11:41:39 +02:00
+								pub fn new_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									if !os.exists(file_path) {
 										verror("$file_path doesn't exist")
 									}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									raw_text := util.read_file(file_path) or {
-												errors: print more informative and pretty errors

											
										
										
											2020-04-06 18:39:58 +02:00
+										verror(err)
-												checker: restrict numeric promotions to cases where no data is lost 


											
										
										
											2020-05-27 05:42:48 +02:00
+										return voidptr(0)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									}
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+									mut s := &Scanner{
 										pref: pref
 										text: raw_text
 										is_print_line_on_error: true
 										is_print_colored_error: true
 										is_print_rel_paths_on_error: true
 										is_fmt: pref.is_fmt
 										comments_mode: comments_mode
 										file_path: file_path
 										file_base: os.base(file_path)
 									}
 									s.init_scanner()
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									return s
 								}
 								// new scanner from string.
-												vet: prohibit spaces after `(`

											
										
										
											2020-07-11 11:41:39 +02:00
+								pub fn new_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+									mut s := &Scanner{
-												vet: prohibit spaces after `(`

											
										
										
											2020-07-11 11:41:39 +02:00
+										pref: pref
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										text: text
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+										is_print_line_on_error: true
 										is_print_colored_error: true
 										is_print_rel_paths_on_error: true
-												scanner: minor cleanup and optimization in scanner.v (#7204)


											
										
										
											2020-12-09 02:09:46 +01:00
+										is_fmt: pref.is_fmt
-												fmt: comments

											
										
										
											2020-02-29 17:51:35 +01:00
+										comments_mode: comments_mode
-												scanner: minor cleanup and optimization in scanner.v (#7204)


											
										
										
											2020-12-09 02:09:46 +01:00
+										file_path: 'internal_memory'
-												scanner: implement s.trace/2

											
										
										
											2021-01-28 14:21:19 +01:00
+										file_base: 'internal_memory'
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									}
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+									s.init_scanner()
 									return s
 								}
 								fn (mut s Scanner) init_scanner() {
 									util.get_timers().measure_pause('PARSE')
 									s.scan_all_tokens_in_buffer(s.comments_mode)
 									util.get_timers().measure_resume('PARSE')
 								}
 								[unsafe]
 								pub fn (mut s Scanner) free() {
 									unsafe {
 										s.text.free()
 									}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								}
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								[inline]
 								fn (s &Scanner) should_parse_comment() bool {
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+									return (s.comments_mode == .parse_comments)
 										|| (s.comments_mode == .toplevel_comments && !s.is_inside_toplvl_statement)
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								// NB: this is called by v's parser
 								pub fn (mut s Scanner) set_is_inside_toplevel_statement(newstate bool) {
 									s.is_inside_toplvl_statement = newstate
 								}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								pub fn (mut s Scanner) set_current_tidx(cidx int) {
 									mut tidx := if cidx < 0 { 0 } else { cidx }
 									tidx = if tidx > s.all_tokens.len { s.all_tokens.len } else { tidx }
 									s.tidx = tidx
 								}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
-												scanner: reduce memory, increase speed for long commented sections

											
										
										
											2020-07-11 20:27:39 +02:00
+								[inline]
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
 									cidx := s.tidx
 									s.tidx++
-												fmt: keep newlines between toplevel stmts (#8383)


											
										
										
											2021-01-29 11:17:59 +01:00
+									line_offset := if tok_kind == .hash { 0 } else { 1 }
-												v: update parser / token & split parsing into methods 


											
										
										
											2019-12-28 09:15:32 +01:00
+									return token.Token{
-												v2: [] expr for known arrays; p.expected_type

											
										
										
											2020-02-12 17:39:35 +01:00
+										kind: tok_kind
 										lit: lit
-												fmt: keep newlines between toplevel stmts (#8383)


											
										
										
											2021-01-29 11:17:59 +01:00
+										line_nr: s.line_nr + line_offset
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+										col: mu.max(1, s.current_column() - len + 1)
-												scanner: even better error messages 


											
										
										
											2020-04-10 00:09:34 +02:00
+										pos: s.pos - len + 1
 										len: len
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+										tidx: cidx
-												v2: [] expr for known arrays; p.expected_type

											
										
										
											2020-02-12 17:39:35 +01:00
+									}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								}
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+								[inline]
 								fn (s &Scanner) new_eof_token() token.Token {
 									return token.Token{
 										kind: .eof
 										lit: ''
 										line_nr: s.line_nr + 1
-												scanner: fix eof token position (#9432)


											
										
										
											2021-03-23 08:51:09 +01:00
+										col: s.current_column()
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+										pos: s.pos
 										len: 1
 										tidx: s.tidx
 									}
 								}
-												fmt: keep single empty lines (#8189)


											
										
										
											2021-01-19 14:49:40 +01:00
+								[inline]
-												scanner: fix typo (#8345)


											
										
										
											2021-01-27 13:53:41 +01:00
+								fn (mut s Scanner) new_multiline_token(tok_kind token.Kind, lit string, len int, start_line int) token.Token {
-												fmt: keep single empty lines (#8189)


											
										
										
											2021-01-19 14:49:40 +01:00
+									cidx := s.tidx
 									s.tidx++
 									return token.Token{
 										kind: tok_kind
 										lit: lit
 										line_nr: start_line + 1
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+										col: mu.max(1, s.current_column() - len + 1)
-												fmt: keep single empty lines (#8189)


											
										
										
											2021-01-19 14:49:40 +01:00
+										pos: s.pos - len + 1
 										len: len
 										tidx: cidx
 									}
 								}
-												v.scanner: apply `[direct_array_access]` for key frequently called low level functions

											
										
										
											2021-05-05 22:45:23 +02:00
+								[direct_array_access; inline]
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_name() string {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									start := s.pos
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+									s.pos++
-												v.scanner: apply `[direct_array_access]` for key frequently called low level functions

											
										
										
											2021-05-05 22:45:23 +02:00
+									for s.pos < s.text.len {
 										c := s.text[s.pos]
 										if !(util.is_name_char(c) || c.is_digit()) {
 											break
 										}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										s.pos++
 									}
 									name := s.text[start..s.pos]
 									s.pos--
 									return name
 								}
-												fmt: keep _ separator in number literals (#7495)


											
										
										
											2020-12-23 13:48:43 +01:00
+								fn (s Scanner) num_lit(start int, end int) string {
 									if s.is_fmt {
 										return s.text[start..end]
 									}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									unsafe {
-												fmt: keep _ separator in number literals (#7495)


											
										
										
											2020-12-23 13:48:43 +01:00
+										txt := s.text.str
-												remove `as` casts for basic types

											
										
										
											2020-02-07 22:10:48 +01:00
+										mut b := malloc(end - start + 1) // add a byte for the endstring 0
 										mut i1 := 0
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										for i := start; i < end; i++ {
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+											if txt[i] != scanner.num_sep {
-												remove `as` casts for basic types

											
										
										
											2020-02-07 22:10:48 +01:00
+												b[i1] = txt[i]
 												i1++
 											}
-bxxxx binary literal support; _ in literals (1_000_000)


											
										
										
											2020-01-23 03:28:25 +01:00
+										}
-												remove `as` casts for basic types

											
										
										
											2020-02-07 22:10:48 +01:00
+										b[i1] = 0 // C string compatibility
-												builtin: x.vstring() instead of string(x) (#6102)


											
										
										
											2020-08-10 18:05:26 +02:00
+										return b.vstring_with_len(i1)
-bxxxx binary literal support; _ in literals (1_000_000)


											
										
										
											2020-01-23 03:28:25 +01:00
+									}
 								}
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_bin_number() string {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									mut has_wrong_digit := false
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+									mut first_wrong_digit_pos := 0
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									mut first_wrong_digit := `\0`
-bxxxx binary literal support; _ in literals (1_000_000)


											
										
										
											2020-01-23 03:28:25 +01:00
+									start_pos := s.pos
 									s.pos += 2 // skip '0b'
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									if s.pos < s.text.len && s.text[s.pos] == scanner.num_sep {
-												scanner: add check for `_` in num literals (#5849)


											
										
										
											2020-07-16 16:29:07 +02:00
+										s.error('separator `_` is only valid between digits in a numeric literal')
 									}
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+									for s.pos < s.text.len {
-bxxxx binary literal support; _ in literals (1_000_000)


											
										
										
											2020-01-23 03:28:25 +01:00
+										c := s.text[s.pos]
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+											s.error('cannot use `_` consecutively')
 										}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if !c.is_bin_digit() && c != scanner.num_sep {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+											if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												break
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											} else if !has_wrong_digit {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												has_wrong_digit = true
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+												first_wrong_digit_pos = s.pos
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												first_wrong_digit = c
 											}
-bxxxx binary literal support; _ in literals (1_000_000)


											
										
										
											2020-01-23 03:28:25 +01:00
+										}
 										s.pos++
 									}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									if s.text[s.pos - 1] == scanner.num_sep {
-												all: update assoc syntax (#8274)


											
										
										
											2021-01-22 23:24:48 +01:00
+										s.pos--
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+										s.error('cannot use `_` at the end of a numeric literal')
-												all: mutability check (part 1); enable mutable sumtype args

											
										
										
											2020-09-22 05:28:29 +02:00
+									} else if start_pos + 2 == s.pos {
-												scanner: fix err pos related to num literals


											
										
										
											2020-05-21 15:20:36 +02:00
+										s.pos-- // adjust error position
-												scanner: fix bin/oct/hex without number parts 



											
										
										
											2020-02-24 18:02:36 +01:00
+										s.error('number part of this binary is not provided')
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									} else if has_wrong_digit {
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+										s.pos = first_wrong_digit_pos // adjust error position
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										s.error('this binary number has unsuitable digit `$first_wrong_digit.str()`')
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									}
-												fmt: keep _ separator in number literals (#7495)


											
										
										
											2020-12-23 13:48:43 +01:00
+									number := s.num_lit(start_pos, s.pos)
-bxxxx binary literal support; _ in literals (1_000_000)


											
										
										
											2020-01-23 03:28:25 +01:00
+									s.pos--
 									return number
 								}
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_hex_number() string {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									mut has_wrong_digit := false
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+									mut first_wrong_digit_pos := 0
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									mut first_wrong_digit := `\0`
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									start_pos := s.pos
-												ci: add a parser fuzzer step too (#7288)


											
										
										
											2020-12-12 14:20:38 +01:00
+									if s.pos + 2 >= s.text.len {
 										return '0x'
 									}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									s.pos += 2 // skip '0x'
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									if s.pos < s.text.len && s.text[s.pos] == scanner.num_sep {
-												scanner: add check for `_` in num literals (#5849)


											
										
										
											2020-07-16 16:29:07 +02:00
+										s.error('separator `_` is only valid between digits in a numeric literal')
 									}
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+									for s.pos < s.text.len {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										c := s.text[s.pos]
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+											s.error('cannot use `_` consecutively')
 										}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if !c.is_hex_digit() && c != scanner.num_sep {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+											if !c.is_letter() || s.is_inside_string {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												break
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											} else if !has_wrong_digit {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												has_wrong_digit = true
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+												first_wrong_digit_pos = s.pos
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												first_wrong_digit = c
 											}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
 										s.pos++
 									}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									if s.text[s.pos - 1] == scanner.num_sep {
-												all: update assoc syntax (#8274)


											
										
										
											2021-01-22 23:24:48 +01:00
+										s.pos--
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+										s.error('cannot use `_` at the end of a numeric literal')
-												all: mutability check (part 1); enable mutable sumtype args

											
										
										
											2020-09-22 05:28:29 +02:00
+									} else if start_pos + 2 == s.pos {
-												scanner: fix err pos related to num literals


											
										
										
											2020-05-21 15:20:36 +02:00
+										s.pos-- // adjust error position
-												scanner: fix bin/oct/hex without number parts 



											
										
										
											2020-02-24 18:02:36 +01:00
+										s.error('number part of this hexadecimal is not provided')
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									} else if has_wrong_digit {
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+										s.pos = first_wrong_digit_pos // adjust error position
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										s.error('this hexadecimal number has unsuitable digit `$first_wrong_digit.str()`')
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									}
-												fmt: keep _ separator in number literals (#7495)


											
										
										
											2020-12-23 13:48:43 +01:00
+									number := s.num_lit(start_pos, s.pos)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									s.pos--
 									return number
 								}
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_oct_number() string {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									mut has_wrong_digit := false
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+									mut first_wrong_digit_pos := 0
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									mut first_wrong_digit := `\0`
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									start_pos := s.pos
-												scanner: enable 0oxx to handle octals


											
										
										
											2020-02-23 12:33:07 +01:00
+									s.pos += 2 // skip '0o'
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									if s.pos < s.text.len && s.text[s.pos] == scanner.num_sep {
-												scanner: add check for `_` in num literals (#5849)


											
										
										
											2020-07-16 16:29:07 +02:00
+										s.error('separator `_` is only valid between digits in a numeric literal')
 									}
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+									for s.pos < s.text.len {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										c := s.text[s.pos]
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+											s.error('cannot use `_` consecutively')
 										}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if !c.is_oct_digit() && c != scanner.num_sep {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+											if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												break
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											} else if !has_wrong_digit {
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												has_wrong_digit = true
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+												first_wrong_digit_pos = s.pos
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+												first_wrong_digit = c
 											}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
 										s.pos++
 									}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									if s.text[s.pos - 1] == scanner.num_sep {
-												all: update assoc syntax (#8274)


											
										
										
											2021-01-22 23:24:48 +01:00
+										s.pos--
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+										s.error('cannot use `_` at the end of a numeric literal')
-												all: mutability check (part 1); enable mutable sumtype args

											
										
										
											2020-09-22 05:28:29 +02:00
+									} else if start_pos + 2 == s.pos {
-												scanner: fix err pos related to num literals


											
										
										
											2020-05-21 15:20:36 +02:00
+										s.pos-- // adjust error position
-												scanner: fix bin/oct/hex without number parts 



											
										
										
											2020-02-24 18:02:36 +01:00
+										s.error('number part of this octal is not provided')
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									} else if has_wrong_digit {
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+										s.pos = first_wrong_digit_pos // adjust error position
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										s.error('this octal number has unsuitable digit `$first_wrong_digit.str()`')
-												scanner: add check for bin/oct/hex with wrong digits


											
										
										
											2020-02-25 11:11:12 +01:00
+									}
-												fmt: keep _ separator in number literals (#7495)


											
										
										
											2020-12-23 13:48:43 +01:00
+									number := s.num_lit(start_pos, s.pos)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									s.pos--
 									return number
 								}
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_dec_number() string {
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+									mut has_wrong_digit := false
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+									mut first_wrong_digit_pos := 0
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+									mut first_wrong_digit := `\0`
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									start_pos := s.pos
 									// scan integer part
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+									for s.pos < s.text.len {
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+										c := s.text[s.pos]
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if c == scanner.num_sep && s.text[s.pos - 1] == scanner.num_sep {
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+											s.error('cannot use `_` consecutively')
 										}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+										if !c.is_digit() && c != scanner.num_sep {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+											if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+												break
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											} else if !has_wrong_digit {
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+												has_wrong_digit = true
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+												first_wrong_digit_pos = s.pos
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+												first_wrong_digit = c
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+											}
 										}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										s.pos++
 									}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									if s.text[s.pos - 1] == scanner.num_sep {
-												all: update assoc syntax (#8274)


											
										
										
											2021-01-22 23:24:48 +01:00
+										s.pos--
-												scanner: more checks for `_` as num_sep (#5992)


											
										
										
											2020-07-28 08:09:19 +02:00
+										s.error('cannot use `_` at the end of a numeric literal')
 									}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									mut call_method := false // true for, e.g., 5.str(), 5.5.str(), 5e5.str()
 									mut is_range := false // true for, e.g., 5..10
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									// scan fractional part
 									if s.pos < s.text.len && s.text[s.pos] == `.` {
 										s.pos++
-												scanner: fix number literal calling method


											
										
										
											2020-03-31 12:47:32 +02:00
+										if s.pos < s.text.len {
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+											// 5.5, 5.5.str()
-												scanner: fix number literal calling method


											
										
										
											2020-03-31 12:47:32 +02:00
+											if s.text[s.pos].is_digit() {
 												for s.pos < s.text.len {
 													c := s.text[s.pos]
 													if !c.is_digit() {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+														if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+															// 5.5.str()
 															if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
-												scanner: fix number literal calling method


											
										
										
											2020-03-31 12:47:32 +02:00
+																call_method = true
 															}
 															break
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+														} else if !has_wrong_digit {
-												scanner: fix number literal calling method


											
										
										
											2020-03-31 12:47:32 +02:00
+															has_wrong_digit = true
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+															first_wrong_digit_pos = s.pos
-												scanner: fix number literal calling method


											
										
										
											2020-03-31 12:47:32 +02:00
+															first_wrong_digit = c
 														}
 													}
 													s.pos++
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+												}
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											} else if s.text[s.pos] == `.` {
 												// 5.. (a range)
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+												is_range = true
 												s.pos--
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											} else if s.text[s.pos] in [`e`, `E`] {
 												// 5.e5
 											} else if s.text[s.pos].is_letter() {
 												// 5.str()
-												parser: do not allow string{} outside of builtin

											
										
										
											2020-03-31 21:03:21 +02:00
+												call_method = true
-												scanner: fix number literal calling method


											
										
										
											2020-03-31 12:47:32 +02:00
+												s.pos--
-												scanner: floats without fraction (fix #5262) (#6064)


											
										
										
											2020-08-05 05:11:29 +02:00
+											} else {
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+												// 5.
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+											}
-												parser: do not allow string{} outside of builtin

											
										
										
											2020-03-31 21:03:21 +02:00
+										}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									}
 									// scan exponential part
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+									mut has_exp := false
 									if s.pos < s.text.len && s.text[s.pos] in [`e`, `E`] {
 										has_exp = true
-												scanner: fix special case - `e` or `E` at the end


											
										
										
											2020-02-25 11:10:05 +01:00
+										s.pos++
 										if s.pos < s.text.len && s.text[s.pos] in [`-`, `+`] {
-												scanner: exponent without sign


											
										
										
											2020-02-17 02:35:01 +01:00
+											s.pos++
 										}
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+										for s.pos < s.text.len {
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+											c := s.text[s.pos]
 											if !c.is_digit() {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+												if !c.is_letter() || s.is_inside_string {
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+													// 5e5.str()
 													if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
-												scanner: fix number literal calling method


											
										
										
											2020-03-31 12:47:32 +02:00
+														call_method = true
 													}
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+													break
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+												} else if !has_wrong_digit {
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+													has_wrong_digit = true
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+													first_wrong_digit_pos = s.pos
-												scanner: simplify and unify style



											
										
										
											2020-02-25 22:58:51 +01:00
+													first_wrong_digit = c
-												scanner: add check for wrong decimal numbers 


											
										
										
											2020-02-25 14:33:29 +01:00
+												}
 											}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											s.pos++
 										}
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+									}
 									if has_wrong_digit {
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										// error check: wrong digit
-												scanner: fix err pos related to num literal



											
										
										
											2020-05-21 18:43:57 +02:00
+										s.pos = first_wrong_digit_pos // adjust error position
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										s.error('this number has unsuitable digit `$first_wrong_digit.str()`')
 									} else if s.text[s.pos - 1] in [`e`, `E`] {
 										// error check: 5e
-												scanner: fix err pos related to num literals


											
										
										
											2020-05-21 15:20:36 +02:00
+										s.pos-- // adjust error position
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+										s.error('exponent has no digits')
-												all: mutability check (part 1); enable mutable sumtype args

											
										
										
											2020-09-22 05:28:29 +02:00
+									} else if s.pos < s.text.len && s.text[s.pos] == `.` && !is_range && !call_method {
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										// error check: 1.23.4, 123.e+3.4
-												scanner: refactor ident_dec_number 


											
										
										
											2020-04-18 17:49:27 +02:00
+										if has_exp {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											s.error('exponential part should be integer')
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+										} else {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											s.error('too many decimal points in number')
 										}
 									}
-												fmt: keep _ separator in number literals (#7495)


											
										
										
											2020-12-23 13:48:43 +01:00
+									number := s.num_lit(start_pos, s.pos)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									s.pos--
 									return number
 								}
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_number() string {
-bxxxx binary literal support; _ in literals (1_000_000)


											
										
										
											2020-01-23 03:28:25 +01:00
+									if s.expect('0b', s.pos) {
 										return s.ident_bin_number()
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									} else if s.expect('0x', s.pos) {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										return s.ident_hex_number()
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									} else if s.expect('0o', s.pos) {
-												scanner: enable 0oxx to handle octals


											
										
										
											2020-02-23 12:33:07 +01:00
+										return s.ident_oct_number()
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+									} else {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										return s.ident_dec_number()
 									}
 								}
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+								[direct_array_access; inline]
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) skip_whitespace() {
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+									for s.pos < s.text.len {
 										c := s.text[s.pos]
-												v.scanner: apply `[direct_array_access]` for key frequently called low level functions

											
										
										
											2021-05-05 22:45:23 +02:00
+										if !(c == 32 || (c > 8 && c < 14) || (c == 0x85) || (c == 0xa0)) {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											return
 										}
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+										c_is_nl := c == scanner.b_cr || c == scanner.b_lf
 										if c_is_nl && s.is_vh {
 											return
 										}
 										if s.pos + 1 < s.text.len && c == scanner.b_cr && s.text[s.pos + 1] == scanner.b_lf {
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+											s.is_crlf = true
 										}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										// Count \r\n as one line
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+										if c_is_nl && !(s.pos > 0 && s.text[s.pos - 1] == scanner.b_cr && c == scanner.b_lf) {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											s.inc_line_number()
 										}
 										s.pos++
 									}
 								}
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) end_of_file() token.Token {
-												scanner: prevent infinite looping, when reaching .eof due to parser bugs

											
										
										
											2020-05-28 18:22:11 +02:00
+									s.eofs++
 									if s.eofs > 50 {
 										s.line_nr--
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+										panic(
 											'the end of file `$s.file_path` has been reached 50 times already, the v parser is probably stuck.\n' +
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											'This should not happen. Please report the bug here, and include the last 2-3 lines of your source code:\n' +
 											'https://github.com/vlang/v/issues/new?labels=Bug&template=bug_report.md')
-												scanner: prevent infinite looping, when reaching .eof due to parser bugs

											
										
										
											2020-05-28 18:22:11 +02:00
+									}
 									if s.pos != s.text.len && s.eofs == 1 {
 										s.inc_line_number()
 									}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									s.pos = s.text.len
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+									return s.new_eof_token()
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								}
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+								pub fn (mut s Scanner) scan_all_tokens_in_buffer(mode CommentsMode) {
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+									// s.scan_all_tokens_in_buffer is used mainly by vdoc,
 									// in order to implement the .toplevel_comments mode.
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+									util.timing_start('SCAN')
 									defer {
 										util.timing_measure_cumulative('SCAN')
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+									}
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+									oldmode := s.comments_mode
 									s.comments_mode = mode
 									s.scan_remaining_text()
 									s.comments_mode = oldmode
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+									s.tidx = 0
 									$if debugscanner ? {
 										for t in s.all_tokens {
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											eprintln('> tidx:${t.tidx:-5} | kind: ${t.kind:-10} | lit: $t.lit')
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+										}
 									}
 								}
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+								pub fn (mut s Scanner) scan_remaining_text() {
 									for {
 										t := s.text_scan()
 										if s.comments_mode == .skip_comments && t.kind == .comment {
 											continue
 										}
 										s.all_tokens << t
 										if t.kind == .eof {
 											break
 										}
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+									}
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+								}
 								pub fn (mut s Scanner) scan() token.Token {
 									return s.buffer_scan()
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								}
 								pub fn (mut s Scanner) buffer_scan() token.Token {
 									for {
 										cidx := s.tidx
 										s.tidx++
 										if cidx >= s.all_tokens.len {
 											return s.end_of_file()
 										}
 										if s.all_tokens[cidx].kind == .comment {
 											if !s.should_parse_comment() {
 												continue
 											}
 										}
 										return s.all_tokens[cidx]
 									}
-												scanner: fix eof token position (#9432)


											
										
										
											2021-03-23 08:51:09 +01:00
+									return s.new_eof_token()
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								}
-												scanner: fix parsing multiple .amp 


											
										
										
											2020-06-13 00:01:44 +02:00
+								[inline]
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+								pub fn (s &Scanner) peek_token(n int) token.Token {
 									idx := s.tidx + n
 									if idx >= s.all_tokens.len {
 										return s.new_eof_token()
 									}
 									t := s.all_tokens[idx]
 									return t
 								}
-												v.scanner: apply `[direct_array_access]` for key frequently called low level functions

											
										
										
											2021-05-05 22:45:23 +02:00
+								[direct_array_access; inline]
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+								fn (s &Scanner) look_ahead(n int) byte {
-												scanner: fix parsing multiple .amp 


											
										
										
											2020-06-13 00:01:44 +02:00
+									if s.pos + n < s.text.len {
 										return s.text[s.pos + n]
 									} else {
 										return `\0`
 									}
 								}
-												vdoc: implement .toplevel_comments parsing mode

											
										
										
											2020-06-06 17:47:16 +02:00
+								fn (mut s Scanner) text_scan() token.Token {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+									// The for loop here is so that instead of doing
 									// `return s.scan()` (which will use a new call stack frame),
 									// text_scan can just do continue, keeping
 									// memory & stack usage low.
 									// That optimization mostly matters for long sections
 									// of comments and string literals.
 									for {
 										// if s.comments_mode == .parse_comments {
 										// println('\nscan()')
 										// }
 										// if s.line_comment != '' {
 										// s.fgenln('// LC "$s.line_comment"')
 										// s.line_comment = ''
 										// }
 										if s.is_started {
 											s.pos++
-												scanner: minor optimization in `text_scan()` (#7172)


											
										
										
											2020-12-07 06:33:41 +01:00
+										} else {
 											s.is_started = true
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										if !s.is_inside_string {
 											s.skip_whitespace()
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												all: update assoc syntax (#8274)


											
										
										
											2021-01-22 23:24:48 +01:00
+										if s.pos >= s.text.len {
 											return s.end_of_file()
 										}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										// End of $var, start next string
 										if s.is_inter_end {
 											if s.text[s.pos] == s.quote {
 												s.is_inter_end = false
 												return s.new_token(.string, '', 1)
 											}
 											s.is_inter_end = false
 											ident_string := s.ident_string()
 											return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										s.skip_whitespace()
 										// end of file
 										if s.pos >= s.text.len {
 											return s.end_of_file()
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										// handle each char
 										c := s.text[s.pos]
 										nextc := s.look_ahead(1)
 										// name or keyword
 										if util.is_name_char(c) {
 											name := s.ident_name()
 											// tmp hack to detect . in ${}
 											// Check if not .eof to prevent panic
 											next_char := s.look_ahead(1)
 											kind := token.keywords[name]
 											if kind != .unknown {
 												return s.new_token(kind, name, name.len)
-												scanner: make `0o` prefix the only way to define octals 


											
										
										
											2020-02-23 23:43:04 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											// 'asdf $b' => "b" is the last name in the string, dont start parsing string
 											// at the next ', skip it
 											if s.is_inside_string {
 												if next_char == s.quote {
 													s.is_inter_end = true
 													s.is_inter_start = false
 													s.is_inside_string = false
 												}
-												scanner: make `0o` prefix the only way to define octals 


											
										
										
											2020-02-23 23:43:04 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											// end of `$expr`
 											// allow `'$a.b'` and `'$a.c()'`
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+											if s.is_inter_start && next_char == `\\`
-												scanner: do not warn on \' after string interpolation (#8729)


											
										
										
											2021-02-15 16:54:30 +01:00
+												&& s.look_ahead(2) !in [`x`, `n`, `r`, `\\`, `t`, `e`, `"`, `\'`] {
-												all: remove remaining broken escape sequences

											
										
										
											2020-11-27 03:17:07 +01:00
+												s.warn('unknown escape sequence \\${s.look_ahead(2)}')
-												all: remove broken escape sequences after $ in strings

											
										
										
											2020-11-27 03:07:37 +01:00
+											}
-												scanner: warn about interpolation for `$f(expr)` (#6179)


											
										
										
											2020-08-22 05:48:06 +02:00
+											if s.is_inter_start && next_char == `(` {
 												if s.look_ahead(2) != `)` {
-												all: remove broken escape sequences after $ in strings

											
										
										
											2020-11-27 03:07:37 +01:00
+													s.warn('use `\${f(expr)}` instead of `\$f(expr)`')
-												scanner: warn about interpolation for `$f(expr)` (#6179)


											
										
										
											2020-08-22 05:48:06 +02:00
+												}
 											} else if s.is_inter_start && next_char != `.` {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												s.is_inter_end = true
 												s.is_inter_start = false
-												scanner: fix string interpolation for '$a.b().c' (#5612)


											
										
										
											2020-07-02 00:02:59 +02:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											if s.pos == 0 && next_char == ` ` {
 												// If a single letter name at the start of the file, increment
 												// Otherwise the scanner would be stuck at s.pos = 0
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+												s.pos++
 											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											return s.new_token(.name, name, name.len)
 										} else if c.is_digit() || (c == `.` && nextc.is_digit()) {
 											// `123`, `.123`
 											if !s.is_inside_string {
 												// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
 												mut start_pos := s.pos
 												for start_pos < s.text.len && s.text[start_pos] == `0` {
 													start_pos++
 												}
 												mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
 												// for 0b, 0o, 0x the heading zero shouldn't be jumped
 												if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
 													prefix_zero_num--
 												}
 												s.pos += prefix_zero_num // jump these zeros
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											num := s.ident_number()
 											return s.new_token(.number, num, num.len)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										// Handle `'$fn()'`
 										if c == `)` && s.is_inter_start {
 											next_char := s.look_ahead(1)
 											if next_char != `.` {
 												s.is_inter_end = true
 												s.is_inter_start = false
 												if next_char == s.quote {
 													s.is_inside_string = false
 												}
 												return s.new_token(.rpar, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
 										}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										// all other tokens
 										match c {
 											`+` {
 												if nextc == `+` {
 													s.pos++
 													return s.new_token(.inc, '', 2)
 												} else if nextc == `=` {
 													s.pos++
 													return s.new_token(.plus_assign, '', 2)
 												}
 												return s.new_token(.plus, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`-` {
 												if nextc == `-` {
 													s.pos++
 													return s.new_token(.dec, '', 2)
 												} else if nextc == `=` {
 													s.pos++
 													return s.new_token(.minus_assign, '', 2)
 												}
 												return s.new_token(.minus, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`*` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.mult_assign, '', 2)
 												}
 												return s.new_token(.mul, '', 1)
-												vet: prohibit spaces after `(`

											
										
										
											2020-07-11 11:41:39 +02:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`^` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.xor_assign, '', 2)
 												}
 												return s.new_token(.xor, '', 1)
-												vet: prohibit spaces before `)`

											
										
										
											2020-07-11 12:14:10 +02:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`%` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.mod_assign, '', 2)
 												}
 												return s.new_token(.mod, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`?` {
 												return s.new_token(.question, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+											scanner.single_quote, scanner.double_quote {
-												all: fixes related to Node.pos (#9613)


											
										
										
											2021-04-06 15:16:19 +02:00
+												start_line := s.line_nr
-												scanner: even better error messages 


											
										
										
											2020-04-10 00:09:34 +02:00
+												ident_string := s.ident_string()
-												all: fixes related to Node.pos (#9613)


											
										
										
											2021-04-06 15:16:19 +02:00
+												return s.new_multiline_token(.string, ident_string, ident_string.len + 2,
 													start_line) // + two quotes
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`\`` {
 												// ` // apostrophe balance comment. do not remove
 												ident_char := s.ident_char()
 												return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`(` {
 												// TODO `$if vet {` for performance
 												if s.pref.is_vet && s.text[s.pos + 1] == ` ` {
-												vvet: move to own subdir, prepare richer suggestions (#7989)


											
										
										
											2021-01-09 15:11:49 +01:00
+													s.vet_error('Looks like you are adding a space after `(`', .vfmt)
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												}
 												return s.new_token(.lpar, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`)` {
 												// TODO `$if vet {` for performance
 												if s.pref.is_vet && s.text[s.pos - 1] == ` ` {
-												vvet: move to own subdir, prepare richer suggestions (#7989)


											
										
										
											2021-01-09 15:11:49 +01:00
+													s.vet_error('Looks like you are adding a space before `)`', .vfmt)
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												}
 												return s.new_token(.rpar, '', 1)
-												fmt: further fixes for string interpolation and builtin macros


											
										
										
											2020-06-18 18:48:23 +02:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`[` {
 												return s.new_token(.lsbr, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`]` {
 												return s.new_token(.rsbr, '', 1)
-												scanner: add support for '@MOD'


											
										
										
											2020-05-25 18:33:41 +02:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`{` {
 												// Skip { in `${` in strings
 												if s.is_inside_string {
 													continue
 												}
 												return s.new_token(.lcbr, '', 1)
-												scanner: add support for `@STRUCT` compile time substitution 


											
										
										
											2020-05-26 15:35:11 +02:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`$` {
 												if s.is_inside_string {
 													return s.new_token(.str_dollar, '', 1)
 												} else {
 													return s.new_token(.dollar, '', 1)
 												}
-												v and v2: support @VEXE 


											
										
										
											2020-03-28 21:51:45 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`}` {
 												// s = `hello $name !`
 												// s = `hello ${name} !`
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+												if s.is_enclosed_inter {
-												scanner: fix when string literal ends after } (#7237)


											
										
										
											2020-12-10 16:04:12 +01:00
+													if s.pos < s.text.len - 1 {
 														s.pos++
 													} else {
 														s.error('unfinished string literal')
 													}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													if s.text[s.pos] == s.quote {
 														s.is_inside_string = false
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+														s.is_enclosed_inter = false
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+														return s.new_token(.string, '', 1)
 													}
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+													s.is_enclosed_inter = false
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													ident_string := s.ident_string()
 													return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 												} else {
 													return s.new_token(.rcbr, '', 1)
 												}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`&` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.and_assign, '', 2)
 												}
 												afternextc := s.look_ahead(2)
 												if nextc == `&` && afternextc.is_space() {
 													s.pos++
 													return s.new_token(.and, '', 2)
 												}
 												return s.new_token(.amp, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`|` {
 												if nextc == `|` {
 													s.pos++
 													return s.new_token(.logical_or, '', 2)
 												}
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.or_assign, '', 2)
 												}
 												return s.new_token(.pipe, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`,` {
 												return s.new_token(.comma, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`@` {
-												parser: fix silent parsing examples/vmod.v (#7264)


											
										
										
											2020-12-11 18:14:07 +01:00
+												mut name := ''
 												if nextc != `\0` {
 													s.pos++
 													name = s.ident_name()
 												}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												if s.is_fmt {
 													return s.new_token(.name, '@' + name, name.len + 1)
 												}
-												all: remove comp time '@' expansion from scanner (#6746)


											
										
										
											2020-11-05 09:12:32 +01:00
+												// @FN, @STRUCT, @MOD etc. See full list in token.valid_at_tokens
 												if '@' + name in token.valid_at_tokens {
 													return s.new_token(.at, '@' + name, name.len + 1)
-												scanner: add support for @VMOD_FILE

											
										
										
											2020-05-26 22:39:15 +02:00
+												}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												if !token.is_key(name) {
-												all: remove unused enum value and improve error message for `@` tokens in scanner (#6751)


											
										
										
											2020-11-05 11:59:49 +01:00
+													mut at_error_msg := '@ must be used before keywords or compile time variables (e.g. `@type string` or `@FN`)'
 													// If name is all uppercase, the user is probably looking for a compile time variable ("at-token")
 													if name.is_upper() {
 														at_error_msg += '\nAvailable compile time variables:\n$token.valid_at_tokens'
 													}
 													s.error(at_error_msg)
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												}
 												return s.new_token(.name, name, name.len)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`.` {
 												if nextc == `.` {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+													s.pos++
-												parser: fix fibonacci in silent mode (#7240)


											
										
										
											2020-12-10 17:17:25 +01:00
+													if s.pos + 1 < s.text.len && s.text[s.pos + 1] == `.` {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+														s.pos++
 														return s.new_token(.ellipsis, '', 3)
 													}
 													return s.new_token(.dotdot, '', 2)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+												}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												return s.new_token(.dot, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`#` {
 												start := s.pos + 1
 												s.ignore_line()
 												if nextc == `!` {
 													// treat shebang line (#!) as a comment
-												fmt: keep the shebang header in vsh files (#7893)


											
										
										
											2021-01-05 15:14:16 +01:00
+													comment := s.text[start - 1..s.pos].trim_space()
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													// s.fgenln('// shebang line "$s.line_comment"')
-												fmt: keep the shebang header in vsh files (#7893)


											
										
										
											2021-01-05 15:14:16 +01:00
+													return s.new_token(.comment, comment, comment.len + 2)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+												}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												hash := s.text[start..s.pos].trim_space()
-												v.scanner: fix position of hash statements (#9222)


											
										
										
											2021-03-10 19:23:17 +01:00
+												return s.new_token(.hash, hash, hash.len + 2)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`>` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.ge, '', 2)
 												} else if nextc == `>` {
 													if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 														s.pos += 2
 														return s.new_token(.right_shift_assign, '', 3)
 													}
 													s.pos++
 													return s.new_token(.right_shift, '', 2)
 												} else {
 													return s.new_token(.gt, '', 1)
 												}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`<` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.le, '', 2)
 												} else if nextc == `<` {
 													if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 														s.pos += 2
 														return s.new_token(.left_shift_assign, '', 3)
 													}
 													s.pos++
 													return s.new_token(.left_shift, '', 2)
-												all: handle `<-` arrow token for channel operations (#6152)


											
										
										
											2020-08-17 20:12:00 +02:00
+												} else if nextc == `-` {
 													s.pos++
 													return s.new_token(.arrow, '', 2)
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												} else {
 													return s.new_token(.lt, '', 1)
 												}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`=` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.eq, '', 2)
 												} else {
 													return s.new_token(.assign, '', 1)
 												}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`:` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.decl_assign, '', 2)
 												} else {
 													return s.new_token(.colon, '', 1)
 												}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`;` {
 												return s.new_token(.semicolon, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`!` {
 												if nextc == `=` {
 													s.pos++
 													return s.new_token(.ne, '', 2)
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+												} else if s.text.len > s.pos + 3 && nextc == `i` && s.text[s.pos + 2] == `n`
 													&& s.text[s.pos + 3].is_space() {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													s.pos += 2
 													return s.new_token(.not_in, '', 3)
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+												} else if s.text.len > s.pos + 3 && nextc == `i` && s.text[s.pos + 2] == `s`
 													&& s.text[s.pos + 3].is_space() {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													s.pos += 2
 													return s.new_token(.not_is, '', 3)
 												} else {
 													return s.new_token(.not, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+												}
 											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											`~` {
 												return s.new_token(.bit_not, '', 1)
 											}
 											`/` {
 												if nextc == `=` {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+													s.pos++
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													return s.new_token(.div_assign, '', 2)
 												}
 												if nextc == `/` {
 													start := s.pos + 1
 													s.ignore_line()
-												v.scanner: fix error line numbers when comments end with CRLF

											
										
										
											2020-08-26 10:39:32 +02:00
+													mut comment_line_end := s.pos
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+													if s.text[s.pos - 1] == scanner.b_cr {
-												v.scanner: fix error line numbers when comments end with CRLF

											
										
										
											2020-08-26 10:39:32 +02:00
+														comment_line_end--
 													} else {
 														// fix line_nr, \n was read; the comment is marked on the next line
 														s.pos--
 														s.line_nr--
-												checker: fix error pos on default value (#6338)


											
										
										
											2020-09-09 15:34:41 +02:00
+													}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													if s.should_parse_comment() {
-												scanner: reduce memory, increase speed for long commented sections

											
										
										
											2020-07-11 20:27:39 +02:00
+														s.line_comment = s.text[start + 1..comment_line_end]
-												fmt: only insert a space after // if the 3rd char is alphanumeric (#7330)


											
										
										
											2020-12-15 04:26:28 +01:00
+														mut comment := s.line_comment
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+														// Find out if this comment is on its own line (for vfmt)
 														mut is_separate_line_comment := true
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+														for j := start - 2; j >= 0 && s.text[j] != scanner.b_lf; j-- {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+															if s.text[j] !in [`\t`, ` `] {
 																is_separate_line_comment = false
 															}
 														}
 														if is_separate_line_comment {
-												fmt: cleanup comments code (#8901)


											
										
										
											2021-02-22 17:43:54 +01:00
+															// NB: ´\x01´ is used to preserve the initial whitespace in comments
 															//     that are on a separate line
-												fmt: fix multiple things and format most of the compiler (#6631)

Format expressions inside string interpolation like the rest (it used to be a+b instead of a + b, not too sure why)
Fix formatting some match branches when there were only one statement inside (it was inlined)
Fix parsing and formatting some comments edge case on struct field init. You should check out this test because the result is a bit different from before. I personally find it more logical but I would understand if the former format was to stay
Fix formatting of void-returning function signature
											
										
										
											2020-10-15 22:12:59 +02:00
+															comment = '\x01' + comment
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+														}
 														return s.new_token(.comment, comment, comment.len + 2)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+													}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
 													// Skip the comment (return the next token)
 													continue
 												}
 												// Multiline comments
 												if nextc == `*` {
 													start := s.pos + 2
-												fmt: keep single empty lines (#8189)


											
										
										
											2021-01-19 14:49:40 +01:00
+													start_line := s.line_nr
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													mut nest_count := 1
 													// Skip comment
-												parser: fix silent errors (#7271)


											
										
										
											2020-12-12 04:06:09 +01:00
+													for nest_count > 0 && s.pos < s.text.len - 1 {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+														s.pos++
 														if s.pos >= s.text.len {
 															s.line_nr--
 															s.error('comment not terminated')
 														}
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+														if s.text[s.pos] == scanner.b_lf {
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+															s.inc_line_number()
 															continue
 														}
 														if s.expect('/*', s.pos) {
 															nest_count++
 															continue
 														}
 														if s.expect('*/', s.pos) {
 															nest_count--
 														}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+													}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													s.pos++
 													if s.should_parse_comment() {
-												fmt: do not move single line block comments to previous line (#8046)


											
										
										
											2021-01-13 03:30:24 +01:00
+														mut comment := s.text[start..(s.pos - 1)].trim(' ')
 														if !comment.contains('\n') {
 															comment = '\x01' + comment
 														}
-												scanner: fix typo (#8345)


											
										
										
											2021-01-27 13:53:41 +01:00
+														return s.new_multiline_token(.comment, comment, comment.len + 4,
-												fmt: keep single empty lines (#8189)


											
										
										
											2021-01-19 14:49:40 +01:00
+															start_line)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+													}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+													// Skip if not in fmt mode
 													continue
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+												}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+												return s.new_token(.div, '', 1)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+											else {}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										$if windows {
 											if c == `\0` {
 												return s.end_of_file()
 											}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												scanner: print multibyte char for invalid char error (#8804)


											
										
										
											2021-02-18 14:43:39 +01:00
+										s.invalid_character()
-												scanner: parse multiple comments and long strings without a stackoverflow

											
										
										
											2020-07-11 19:52:05 +02:00
+										break
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									}
 									return s.end_of_file()
 								}
-												scanner: print multibyte char for invalid char error (#8804)


											
										
										
											2021-02-18 14:43:39 +01:00
+								fn (mut s Scanner) invalid_character() {
-												utf8: deprecate utf8.char_len (#9008)


											
										
										
											2021-03-01 14:35:57 +01:00
+									len := utf8_char_len(s.text[s.pos])
-												math: add a pure V `math.mathutil`, with generic `min`, `max` and `abs` functions (#9176), and use it consistently


											
										
										
											2021-03-12 10:28:04 +01:00
+									end := mu.min(s.pos + len, s.text.len)
-												scanner: print multibyte char for invalid char error (#8804)


											
										
										
											2021-02-18 14:43:39 +01:00
+									c := s.text[s.pos..end]
 									s.error('invalid character `$c`')
 								}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								fn (s &Scanner) current_column() int {
 									return s.pos - s.last_nl_pos
 								}
-												scanner: cleanup, only v.parser now depends on v.scanner

											
										
										
											2020-04-27 15:08:04 +02:00
+								fn (s &Scanner) count_symbol_before(p int, sym byte) int {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									mut count := 0
 									for i := p; i >= 0; i-- {
 										if s.text[i] != sym {
 											break
 										}
 										count++
 									}
 									return count
 								}
-												v.scanner: apply `[direct_array_access]` for key frequently called low level functions

											
										
										
											2021-05-05 22:45:23 +02:00
+								[direct_array_access]
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_string() string {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									q := s.text[s.pos]
-												ci: fix `v test-cleancode`

											
										
										
											2021-01-25 10:26:20 +01:00
+									is_quote := q == scanner.single_quote || q == scanner.double_quote
-												scanner: fix `' "$var",  "$another"'` where `r"`, was treated as start of a raw string

											
										
										
											2021-01-28 14:38:00 +01:00
+									is_raw := is_quote && s.pos > 0 && s.text[s.pos - 1] == `r` && !s.is_inside_string
 									is_cstr := is_quote && s.pos > 0 && s.text[s.pos - 1] == `c` && !s.is_inside_string
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+									if is_quote {
-												scanner: fix interpolation with more embedded string args (#7258)


											
										
										
											2020-12-11 18:15:59 +01:00
+										if s.is_inside_string || s.is_enclosed_inter || s.is_inter_start {
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+											s.inter_quote = q
 										} else {
 											s.quote = q
 										}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									}
 									// if s.file_path.contains('string_test') {
 									// println('\nident_string() at char=${s.text[s.pos].str()}')
 									// println('linenr=$s.line_nr quote=  $qquote ${qquote.str()}')
 									// }
-												scanner: ignore CR signs in string literals

											
										
										
											2020-06-23 20:10:51 +02:00
+									mut n_cr_chars := 0
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									mut start := s.pos
-												parser: filter out vet space indent errors inside StringInterLiterals (#9695)


											
										
										
											2021-04-12 15:03:22 +02:00
+									start_char := s.text[start]
 									if start_char == s.quote
 										|| (start_char == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter)) {
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+										start++
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+									} else if start_char == scanner.b_lf {
-												parser: filter out vet space indent errors inside StringInterLiterals (#9695)


											
										
										
											2021-04-12 15:03:22 +02:00
+										s.inc_line_number()
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+									}
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+									s.is_inside_string = false
-												v.scanner: decode \uXXXX in scanner (#9298)


											
										
										
											2021-03-14 11:09:17 +01:00
+									mut u_escapes_pos := []int{} // pos list of \uXXXX
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									slash := `\\`
 									for {
 										s.pos++
 										if s.pos >= s.text.len {
-												scanner: add an error for unfinished string literals (#6519)


											
										
										
											2020-10-01 15:40:59 +02:00
+											s.error('unfinished string literal')
-												scanner: fix silent mode string parsing (#7220)


											
										
										
											2020-12-09 20:09:07 +01:00
+											break
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
 										c := s.text[s.pos]
 										prevc := s.text[s.pos - 1]
 										// end of string
 										if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
 											// handle '123\\'  slash at the end
 											break
 										}
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+										if c == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter) {
 											break
 										}
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+										if c == scanner.b_cr {
-												scanner: ignore CR signs in string literals

											
										
										
											2020-06-23 20:10:51 +02:00
+											n_cr_chars++
 										}
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+										if c == scanner.b_lf {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											s.inc_line_number()
 										}
 										// Don't allow \0
-												scanner: minor cleanup and optimization in scanner.v (#7204)


											
										
										
											2020-12-09 02:09:46 +01:00
+										if c == `0` && s.pos > 2 && prevc == slash {
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+											if (s.pos < s.text.len - 1 && s.text[s.pos + 1].is_digit())
 												|| s.count_symbol_before(s.pos - 1, slash) % 2 == 0 {
-												scanner: add check for `!is_raw` for null `\0` (#6427)


											
										
										
											2020-09-19 18:09:18 +02:00
+											} else if !is_cstr && !is_raw {
-												scanner/parser: change error msgs for `\0` and `\x00` and add error for r`byte` and c`byte` (#6436)


											
										
										
											2020-09-21 16:34:24 +02:00
+												s.error(r'cannot use `\0` (NULL character) in the string literal')
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											}
 										}
 										// Don't allow \x00
 										if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
-												scanner: allow escape on null character (#6404)


											
										
										
											2020-09-18 01:02:06 +02:00
+											if s.count_symbol_before(s.pos - 3, slash) % 2 == 0 {
-												scanner: add check for `!is_raw` for null `\0` (#6427)


											
										
										
											2020-09-19 18:09:18 +02:00
+											} else if !is_cstr && !is_raw {
-												scanner/parser: change error msgs for `\0` and `\x00` and add error for r`byte` and c`byte` (#6436)


											
										
										
											2020-09-21 16:34:24 +02:00
+												s.error(r'cannot use `\x00` (NULL character) in the string literal')
-												scanner: remove error check for embedded \x00 chars in c'literals'

											
										
										
											2020-07-10 20:41:57 +02:00
+											}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
-												scanner: fix escape `\u` (fix #6849) (#7012)


											
										
										
											2020-11-29 16:01:40 +01:00
+										// Escape `\x` `\u`
 										if prevc == slash && !is_raw && !is_cstr && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
 											// Escape `\x`
 											if c == `x` && (s.text[s.pos + 1] == s.quote || !s.text[s.pos + 1].is_hex_digit()) {
 												s.error(r'`\x` used with no following hex digits')
 											}
 											// Escape `\u`
-												scanner: replace ascii unicode(\u0020) with hex(\x20) (#9259)


											
										
										
											2021-03-13 07:02:42 +01:00
+											if c == `u` {
 												if s.text[s.pos + 1] == s.quote || s.text[s.pos + 2] == s.quote
 													|| s.text[s.pos + 3] == s.quote || s.text[s.pos + 4] == s.quote
 													|| !s.text[s.pos + 1].is_hex_digit() || !s.text[s.pos + 2].is_hex_digit()
 													|| !s.text[s.pos + 3].is_hex_digit() || !s.text[s.pos + 4].is_hex_digit() {
 													s.error(r'`\u` incomplete unicode character value')
 												}
-												v.scanner: decode \uXXXX in scanner (#9298)


											
										
										
											2021-03-14 11:09:17 +01:00
+												u_escapes_pos << s.pos - 1
-												scanner: fix escape `\u` (fix #6849) (#7012)


											
										
										
											2020-11-29 16:01:40 +01:00
+											}
-												scanner: fix error of backslash escaping x in ident_string(fix #6850) (#6994)


											
										
										
											2020-11-28 23:39:45 +01:00
+										}
-												all: remove broken escape sequences after $ in strings

											
										
										
											2020-11-27 03:07:37 +01:00
+										// ${var} (ignore in vfmt mode) (skip \$)
-												scanner: warn about interpolation for `$f(expr)` (#6179)


											
										
										
											2020-08-22 05:48:06 +02:00
+										if prevc == `$` && c == `{` && !is_raw && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+											s.is_inside_string = true
-												scanner: fix interpolation with string args (#7214)


											
										
										
											2020-12-09 19:58:27 +01:00
+											s.is_enclosed_inter = true
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											// so that s.pos points to $ at the next step
 											s.pos -= 2
 											break
 										}
 										// $var
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+										if prevc == `$` && util.is_name_char(c) && !is_raw
 											&& s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+											s.is_inside_string = true
 											s.is_inter_start = true
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+											s.pos -= 2
 											break
 										}
 									}
 									mut lit := ''
 									mut end := s.pos
-												scanner: uniform bool type variable names


											
										
										
											2020-04-02 12:23:18 +02:00
+									if s.is_inside_string {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										end++
 									}
-												scanner: fix string slash line breaks error


											
										
										
											2020-05-04 10:19:23 +02:00
+									if start <= s.pos {
-												scanner: ignore CR signs in string literals

											
										
										
											2020-06-23 20:10:51 +02:00
+										mut string_so_far := s.text[start..end]
-												v.scanner: decode \uXXXX in scanner (#9298)


											
										
										
											2021-03-14 11:09:17 +01:00
+										if !s.is_fmt && u_escapes_pos.len > 0 {
 											string_so_far = decode_u_escapes(string_so_far, start, u_escapes_pos)
-												scanner: replace ascii unicode(\u0020) with hex(\x20) (#9259)


											
										
										
											2021-03-13 07:02:42 +01:00
+										}
-												scanner: ignore CR signs in string literals

											
										
										
											2020-06-23 20:10:51 +02:00
+										if n_cr_chars > 0 {
 											string_so_far = string_so_far.replace('\r', '')
 										}
 										if string_so_far.contains('\\\n') {
 											lit = trim_slash_line_break(string_so_far)
-												scanner: fix string slash line breaks error


											
										
										
											2020-05-04 10:19:23 +02:00
+										} else {
-												scanner: ignore CR signs in string literals

											
										
										
											2020-06-23 20:10:51 +02:00
+											lit = string_so_far
-												scanner: fix string slash line breaks error


											
										
										
											2020-05-04 10:19:23 +02:00
+										}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									}
 									return lit
 								}
-												v.scanner: decode \uXXXX in scanner (#9298)


											
										
										
											2021-03-14 11:09:17 +01:00
+								fn decode_u_escapes(s string, start int, escapes_pos []int) string {
 									if escapes_pos.len == 0 {
 										return s
 									}
 									mut ss := []string{cap: escapes_pos.len * 2 + 1}
 									ss << s[..escapes_pos.first() - start]
 									for i, pos in escapes_pos {
 										idx := pos - start
 										end_idx := idx + 6 // "\uXXXX".len == 6
 										ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32)))
 										if i + 1 < escapes_pos.len {
 											ss << s[end_idx..escapes_pos[i + 1] - start]
 										} else {
 											ss << s[end_idx..]
 										}
 									}
 									return ss.join('')
 								}
-												scanner: fix string slash line breaks error


											
										
										
											2020-05-04 10:19:23 +02:00
+								fn trim_slash_line_break(s string) string {
 									mut start := 0
 									mut ret_str := s
 									for {
 										idx := ret_str.index_after('\\\n', start)
 										if idx != -1 {
-												vfmt: format scanner.v

											
										
										
											2020-07-04 15:14:30 +02:00
+											ret_str = ret_str[..idx] + ret_str[idx + 2..].trim_left(' \n\t\v\f\r')
-												scanner: fix string slash line breaks error


											
										
										
											2020-05-04 10:19:23 +02:00
+											start = idx
 										} else {
 											break
 										}
 									}
 									return ret_str
 								}
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ident_char() string {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									start := s.pos
 									slash := `\\`
 									mut len := 0
 									for {
 										s.pos++
 										if s.pos >= s.text.len {
 											break
 										}
 										if s.text[s.pos] != slash {
 											len++
 										}
 										double_slash := s.expect('\\\\', s.pos - 2)
 										if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
 											// ` // apostrophe balance comment. do not remove
 											if double_slash {
 												len++
 											}
 											break
 										}
 									}
 									len--
 									c := s.text[start + 1..s.pos]
 									if len != 1 {
 										u := c.ustring()
 										if u.len != 1 {
-												fmt: smarter if condition wrapping (#8201)


											
										
										
											2021-01-23 09:33:22 +01:00
+											s.error('invalid character literal (more than one character)\n' +
 												'use quotes for strings, backticks for characters')
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										}
 									}
 									// Escapes a `'` character
-												all: simplify `return if ...` constructs to make more code compatible with -autofree

											
										
										
											2021-03-22 15:45:29 +01:00
+									if c == "'" {
 										return '\\' + c
 									}
 									return c
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								}
-												v.scanner: apply `[direct_array_access]` for key frequently called low level functions

											
										
										
											2021-05-05 22:45:23 +02:00
+								[direct_array_access; inline]
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								fn (s &Scanner) expect(want string, start_pos int) bool {
 									end_pos := start_pos + want.len
-												scanner: minor cleanup of scanner.v (#7467)


											
										
										
											2020-12-22 10:56:29 +01:00
+									if start_pos < 0 || end_pos < 0 || start_pos >= s.text.len || end_pos > s.text.len {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										return false
 									}
 									for pos in start_pos .. end_pos {
 										if s.text[pos] != want[pos - start_pos] {
 											return false
 										}
 									}
 									return true
 								}
-												scanner: reduce memory, increase speed for long commented sections

											
										
										
											2020-07-11 20:27:39 +02:00
+								[inline]
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) ignore_line() {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									s.eat_to_end_of_line()
 									s.inc_line_number()
 								}
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+								[direct_array_access; inline]
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) eat_to_end_of_line() {
-												scanner: speed up Scanner.skip_whitespace (~2-3% speed up of `-o x.c cmd/v`)

											
										
										
											2021-05-03 13:14:32 +02:00
+									for s.pos < s.text.len && s.text[s.pos] != scanner.b_lf {
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+										s.pos++
 									}
 								}
-												scanner: reduce memory, increase speed for long commented sections

											
										
										
											2020-07-11 20:27:39 +02:00
+								[inline]
-												parser: check `(mut f Foo)` syntax


											
										
										
											2020-05-17 13:51:18 +02:00
+								fn (mut s Scanner) inc_line_number() {
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+									s.last_nl_pos = mu.min(s.text.len - 1, s.pos)
 									if s.is_crlf {
 										s.last_nl_pos++
 									}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+									s.line_nr++
 									s.line_ends << s.pos
 									if s.line_nr > s.nr_lines {
 										s.nr_lines = s.line_nr
 									}
 								}
-												v: support compiler notices. Use them for `[deprecated_after: '2021-05-01']` tags

Compiler notices are like warnings, with these differences:
   a) notices use a different color.
   b) notices use a different label.
   c) notices do not prevent compilation with -prod.
   (warnings are converted to errors with -prod)

											
										
										
											2021-03-22 18:43:06 +01:00
+								pub fn (mut s Scanner) note(msg string) {
 									pos := token.Position{
 										line_nr: s.line_nr
 										pos: s.pos
 									}
 									if s.pref.output_mode == .stdout {
 										eprintln(util.formatted_error('notice:', msg, s.file_path, pos))
 									} else {
 										s.notices << errors.Notice{
 											file_path: s.file_path
 											pos: pos
 											reporter: .scanner
 											message: msg
 										}
 									}
 								}
-												scanner: store errors and warnings on silent mode (#6883)


											
										
										
											2020-11-20 10:44:19 +01:00
+								pub fn (mut s Scanner) warn(msg string) {
-												scanner: treat warnings as errors on -W too, simillar to the parser

											
										
										
											2020-12-04 14:11:35 +01:00
+									if s.pref.warns_are_errors {
 										s.error(msg)
 										return
 									}
-												scanner: warn about interpolation for `$f(expr)` (#6179)


											
										
										
											2020-08-22 05:48:06 +02:00
+									pos := token.Position{
 										line_nr: s.line_nr
 										pos: s.pos
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+										col: s.current_column() - 1
-												scanner: warn about interpolation for `$f(expr)` (#6179)


											
										
										
											2020-08-22 05:48:06 +02:00
+									}
-												scanner: store errors and warnings on silent mode (#6883)


											
										
										
											2020-11-20 10:44:19 +01:00
+									if s.pref.output_mode == .stdout {
 										eprintln(util.formatted_error('warning:', msg, s.file_path, pos))
 									} else {
 										s.warnings << errors.Warning{
 											file_path: s.file_path
 											pos: pos
 											reporter: .scanner
 											message: msg
 										}
 									}
-												scanner: warn about interpolation for `$f(expr)` (#6179)


											
										
										
											2020-08-22 05:48:06 +02:00
+								}
-												scanner: store errors and warnings on silent mode (#6883)


											
										
										
											2020-11-20 10:44:19 +01:00
+								pub fn (mut s Scanner) error(msg string) {
-												scanner: use formatted errors for s.error too

											
										
										
											2020-04-06 18:58:21 +02:00
+									pos := token.Position{
 										line_nr: s.line_nr
 										pos: s.pos
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+										col: s.current_column() - 1
-												checker: check if BranchStmt is in a loop 


											
										
										
											2020-04-07 12:29:11 +02:00
+									}
-												scanner: store errors and warnings on silent mode (#6883)


											
										
										
											2020-11-20 10:44:19 +01:00
+									if s.pref.output_mode == .stdout {
 										eprintln(util.formatted_error('error:', msg, s.file_path, pos))
 										exit(1)
 									} else {
-												parser: prepare for better VLS integration, more accurate parser errors (#7119)


											
										
										
											2020-12-04 19:34:05 +01:00
+										if s.pref.fatal_errors {
 											exit(1)
 										}
-												scanner: store errors and warnings on silent mode (#6883)


											
										
										
											2020-11-20 10:44:19 +01:00
+										s.errors << errors.Error{
 											file_path: s.file_path
 											pos: pos
 											reporter: .scanner
 											message: msg
 										}
 									}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								}
-												vvet: move to own subdir, prepare richer suggestions (#7989)


											
										
										
											2021-01-09 15:11:49 +01:00
+								fn (mut s Scanner) vet_error(msg string, fix vet.FixKind) {
 									ve := vet.Error{
 										message: msg
 										file_path: s.file_path
 										pos: token.Position{
 											line_nr: s.line_nr
-												scanner, token: add column information to tokens (#9407)


											
										
										
											2021-03-23 06:23:46 +01:00
+											col: s.current_column() - 1
-												vvet: move to own subdir, prepare richer suggestions (#7989)


											
										
										
											2021-01-09 15:11:49 +01:00
+										}
 										kind: .error
 										fix: fix
-												v vet: give an error for trailing whitespace (#9574)


											
										
										
											2021-04-09 12:22:14 +02:00
+										typ: .default
-												vvet: move to own subdir, prepare richer suggestions (#7989)


											
										
										
											2021-01-09 15:11:49 +01:00
+									}
 									s.vet_errors << ve
-												fmt,vet: handle `var in [single_value]` (#5882)


											
										
										
											2020-07-19 19:58:34 +02:00
+								}
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								pub fn verror(s string) {
-												v2: compile vfmt again; consistent colored error messages

											
										
										
											2020-04-03 17:38:41 +02:00
+									util.verror('scanner error', s)
-												new AST built with sum types

											
										
										
											2019-12-22 02:34:37 +01:00
+								}
-												parser: reimplement [flag] enum support. Add p.vcodegen too


											
										
										
											2020-05-25 23:00:48 +02:00
 								pub fn (mut s Scanner) codegen(newtext string) {
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+									$if debug_codegen ? {
 										eprintln('scanner.codegen:\n $newtext')
 									}
-												parser: reimplement [flag] enum support. Add p.vcodegen too


											
										
										
											2020-05-25 23:00:48 +02:00
+									// codegen makes sense only during normal compilation
 									// feeding code generated V code to vfmt or vdoc will
 									// cause them to output/document ephemeral stuff.
 									if s.comments_mode == .skip_comments {
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+										s.all_tokens.delete_last() // remove .eof from end of .all_tokens
-												parser: reimplement [flag] enum support. Add p.vcodegen too


											
										
										
											2020-05-25 23:00:48 +02:00
+										s.text += newtext
-												parser/scanner: replace p.peek_tok2/3 with p.peek_token(2/3) (#8946)


											
										
										
											2021-02-24 19:03:53 +01:00
+										old_tidx := s.tidx
 										s.tidx = s.all_tokens.len
 										s.scan_remaining_text()
 										s.tidx = old_tidx
-												parser: reimplement [flag] enum support. Add p.vcodegen too


											
										
										
											2020-05-25 23:00:48 +02:00
+									}
 								}
-												scanner: implement s.trace/2

											
										
										
											2021-01-28 14:21:19 +01:00
 								fn (mut s Scanner) trace(fbase string, message string) {
 									if s.file_base == fbase {
 										println('> s.trace | ${fbase:-10s} | $message')
 									}
 								}