915 lines
21 KiB
V
915 lines
21 KiB
V
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
|
|
// Use of this source code is governed by an MIT license
|
|
// that can be found in the LICENSE file.
|
|
|
|
module main
|
|
|
|
import (
|
|
os
|
|
strings
|
|
term
|
|
)
|
|
|
|
const (
|
|
single_quote = `\'`
|
|
double_quote = `"`
|
|
error_context_before = 2 // how many lines of source context to print before the pointer line
|
|
error_context_after = 2 // ^^^ same, but after
|
|
)
|
|
|
|
struct Scanner {
|
|
mut:
|
|
file_path string
|
|
text string
|
|
pos int
|
|
line_nr int
|
|
last_nl_pos int // for calculating column
|
|
inside_string bool
|
|
inter_start bool // for hacky string interpolation TODO simplify
|
|
inter_end bool
|
|
debug bool
|
|
line_comment string
|
|
started bool
|
|
// vfmt fields
|
|
fmt_out strings.Builder
|
|
fmt_indent int
|
|
fmt_line_empty bool
|
|
prev_tok Token
|
|
fn_name string // needed for @FN
|
|
should_print_line_on_error bool
|
|
should_print_errors_in_color bool
|
|
quote byte // which quote is used to denote current string: ' or "
|
|
file_lines []string // filled *only on error* by rescanning the source till the error (and several lines more)
|
|
}
|
|
|
|
// new scanner from file.
|
|
fn new_scanner_file(file_path string) &Scanner {
|
|
if !os.file_exists(file_path) {
|
|
verror("$file_path doesn't exist")
|
|
}
|
|
|
|
mut raw_text := os.read_file(file_path) or {
|
|
verror('scanner: failed to open $file_path')
|
|
return 0
|
|
}
|
|
|
|
// BOM check
|
|
if raw_text.len >= 3 {
|
|
c_text := raw_text.str
|
|
|
|
if c_text[0] == 0xEF && c_text[1] == 0xBB && c_text[2] == 0xBF {
|
|
// skip three BOM bytes
|
|
offset_from_begin := 3
|
|
raw_text = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
|
|
}
|
|
}
|
|
|
|
mut s := new_scanner(raw_text)
|
|
s.file_path = file_path
|
|
|
|
return s
|
|
}
|
|
|
|
// new scanner from string.
|
|
fn new_scanner(text string) &Scanner {
|
|
return &Scanner {
|
|
text: text
|
|
fmt_out: strings.new_builder(1000)
|
|
should_print_line_on_error: true
|
|
should_print_errors_in_color: true
|
|
}
|
|
}
|
|
|
|
|
|
// The goal of ScannerPos is to track the current scanning position,
|
|
// so that if there is an error found later, v could show a more accurate
|
|
// position about where the error initially was.
|
|
// NB: The fields of ScannerPos *should be kept synchronized* with the
|
|
// corresponding fields in Scanner.
|
|
struct ScannerPos {
|
|
mut:
|
|
pos int
|
|
line_nr int
|
|
last_nl_pos int
|
|
}
|
|
fn (s ScannerPos) str() string {
|
|
return 'ScannerPos{ ${s.pos:5d} , ${s.line_nr:5d} , ${s.last_nl_pos:5d} }'
|
|
}
|
|
fn (s &Scanner) get_scanner_pos() ScannerPos {
|
|
return ScannerPos{ pos: s.pos line_nr: s.line_nr last_nl_pos: s.last_nl_pos }
|
|
}
|
|
fn (s mut Scanner) goto_scanner_position(scp ScannerPos) {
|
|
s.pos = scp.pos
|
|
s.line_nr = scp.line_nr
|
|
s.last_nl_pos = scp.last_nl_pos
|
|
}
|
|
|
|
// get_scanner_pos_of_token rescans *the whole source* till it reaches {t.line_nr, t.col} .
|
|
fn (s mut Scanner) get_scanner_pos_of_token(t Tok) ScannerPos {
|
|
// This rescanning is done just once on error, so it is fine for now.
|
|
// Be careful for the performance implications, if you want to
|
|
// do it more frequently. The alternative would be to store
|
|
// the scanpos (12 bytes) for each token, and there are potentially many tokens.
|
|
tline := t.line_nr
|
|
tcol := if t.line_nr == 0 { t.col + 1 } else { t.col - 1 }
|
|
// save the current scanner position, it will be restored later
|
|
cpos := s.get_scanner_pos()
|
|
mut sptoken := ScannerPos{}
|
|
// Starting from the start, scan the source lines
|
|
// till the desired tline is reached, then
|
|
// s.pos + tcol would be the proper position
|
|
// of the token. Continue scanning for some more lines of context too.
|
|
s.goto_scanner_position(ScannerPos{})
|
|
s.file_lines = []string
|
|
mut prevlinepos := 0
|
|
for {
|
|
prevlinepos = s.pos
|
|
if s.pos >= s.text.len { break }
|
|
if s.line_nr > tline + 10 { break }
|
|
////////////////////////////////////////
|
|
if tline == s.line_nr {
|
|
sptoken = s.get_scanner_pos()
|
|
sptoken.pos += tcol
|
|
}
|
|
s.ignore_line() s.eat_single_newline()
|
|
sline := s.text.substr( prevlinepos, s.pos ).trim_right('\r\n')
|
|
s.file_lines << sline
|
|
}
|
|
//////////////////////////////////////////////////
|
|
s.goto_scanner_position(cpos)
|
|
return sptoken
|
|
}
|
|
fn (s mut Scanner) eat_single_newline(){
|
|
if s.pos >= s.text.len { return }
|
|
if s.expect('\r\n', s.pos) { s.pos += 2 return }
|
|
if s.text[ s.pos ] == `\n` { s.pos ++ return }
|
|
if s.text[ s.pos ] == `\r` { s.pos ++ return }
|
|
}
|
|
|
|
|
|
// TODO remove once multiple return values are implemented
|
|
struct ScanRes {
|
|
tok Token
|
|
lit string
|
|
}
|
|
|
|
fn scan_res(tok Token, lit string) ScanRes {
|
|
return ScanRes{tok, lit}
|
|
}
|
|
|
|
fn (s mut Scanner) ident_name() string {
|
|
start := s.pos
|
|
for {
|
|
s.pos++
|
|
if s.pos >= s.text.len {
|
|
break
|
|
}
|
|
c := s.text[s.pos]
|
|
if !is_name_char(c) && !c.is_digit() {
|
|
break
|
|
}
|
|
}
|
|
name := s.text.substr(start, s.pos)
|
|
s.pos--
|
|
return name
|
|
}
|
|
|
|
fn (s mut Scanner) ident_hex_number() string {
|
|
start_pos := s.pos
|
|
s.pos += 2 // skip '0x'
|
|
for {
|
|
if s.pos >= s.text.len {
|
|
break
|
|
}
|
|
c := s.text[s.pos]
|
|
if !c.is_hex_digit() {
|
|
break
|
|
}
|
|
s.pos++
|
|
}
|
|
number := s.text.substr(start_pos, s.pos)
|
|
s.pos--
|
|
return number
|
|
}
|
|
|
|
fn (s mut Scanner) ident_oct_number() string {
|
|
start_pos := s.pos
|
|
for {
|
|
if s.pos >= s.text.len {
|
|
break
|
|
}
|
|
c := s.text[s.pos]
|
|
if c.is_digit() {
|
|
if !c.is_oct_digit() {
|
|
s.error('malformed octal constant')
|
|
}
|
|
} else {
|
|
break
|
|
}
|
|
s.pos++
|
|
}
|
|
number := s.text.substr(start_pos, s.pos)
|
|
s.pos--
|
|
return number
|
|
}
|
|
|
|
fn (s mut Scanner) ident_dec_number() string {
|
|
start_pos := s.pos
|
|
|
|
// scan integer part
|
|
for s.pos < s.text.len && s.text[s.pos].is_digit() {
|
|
s.pos++
|
|
}
|
|
|
|
// e.g. 1..9
|
|
// we just return '1' and don't scan '..9'
|
|
if s.expect('..', s.pos) {
|
|
number := s.text.substr(start_pos, s.pos)
|
|
s.pos--
|
|
return number
|
|
}
|
|
|
|
// scan fractional part
|
|
if s.pos < s.text.len && s.text[s.pos] == `.` {
|
|
s.pos++
|
|
for s.pos < s.text.len && s.text[s.pos].is_digit() {
|
|
s.pos++
|
|
}
|
|
if !s.inside_string && s.pos < s.text.len && s.text[s.pos] == `f` {
|
|
s.error('no `f` is needed for floats')
|
|
}
|
|
}
|
|
|
|
// scan exponential part
|
|
mut has_exponential_part := false
|
|
if s.expect('e+', s.pos) || s.expect('e-', s.pos) {
|
|
exp_start_pos := s.pos += 2
|
|
for s.pos < s.text.len && s.text[s.pos].is_digit() {
|
|
s.pos++
|
|
}
|
|
if exp_start_pos == s.pos {
|
|
s.error('exponent has no digits')
|
|
}
|
|
has_exponential_part = true
|
|
}
|
|
|
|
// error check: 1.23.4, 123.e+3.4
|
|
if s.pos < s.text.len && s.text[s.pos] == `.` {
|
|
if has_exponential_part {
|
|
s.error('exponential part should be integer')
|
|
}
|
|
else {
|
|
s.error('too many decimal points in number')
|
|
}
|
|
}
|
|
|
|
number := s.text.substr(start_pos, s.pos)
|
|
s.pos--
|
|
return number
|
|
}
|
|
|
|
fn (s mut Scanner) ident_number() string {
|
|
if s.expect('0x', s.pos) {
|
|
return s.ident_hex_number()
|
|
}
|
|
|
|
if s.expect('0.', s.pos) || s.expect('0e', s.pos) {
|
|
return s.ident_dec_number()
|
|
}
|
|
|
|
if s.text[s.pos] == `0` {
|
|
return s.ident_oct_number()
|
|
}
|
|
|
|
return s.ident_dec_number()
|
|
}
|
|
|
|
fn (s mut Scanner) skip_whitespace() {
|
|
for s.pos < s.text.len && s.text[s.pos].is_white() {
|
|
// Count \r\n as one line
|
|
if is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos-1) {
|
|
s.inc_line_number()
|
|
}
|
|
s.pos++
|
|
}
|
|
}
|
|
|
|
fn (s mut Scanner) scan() ScanRes {
|
|
if s.line_comment != '' {
|
|
//s.fgenln('// LOL "$s.line_comment"')
|
|
//s.line_comment = ''
|
|
}
|
|
if s.started {
|
|
s.pos++
|
|
}
|
|
s.started = true
|
|
if s.pos >= s.text.len {
|
|
return scan_res(.eof, '')
|
|
}
|
|
// skip whitespace
|
|
if !s.inside_string {
|
|
s.skip_whitespace()
|
|
}
|
|
// End of $var, start next string
|
|
if s.inter_end {
|
|
if s.text[s.pos] == `\'` {
|
|
s.inter_end = false
|
|
return scan_res(.str, '')
|
|
}
|
|
s.inter_end = false
|
|
return scan_res(.str, s.ident_string())
|
|
}
|
|
s.skip_whitespace()
|
|
// end of file
|
|
if s.pos >= s.text.len {
|
|
return scan_res(.eof, '')
|
|
}
|
|
// handle each char
|
|
c := s.text[s.pos]
|
|
mut nextc := `\0`
|
|
if s.pos + 1 < s.text.len {
|
|
nextc = s.text[s.pos + 1]
|
|
}
|
|
// name or keyword
|
|
if is_name_char(c) {
|
|
name := s.ident_name()
|
|
// tmp hack to detect . in ${}
|
|
// Check if not .eof to prevent panic
|
|
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
|
|
if is_key(name) {
|
|
return scan_res(key_to_token(name), '')
|
|
}
|
|
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
|
|
// at the next ', skip it
|
|
if s.inside_string {
|
|
if next_char == `\'` {
|
|
s.inter_end = true
|
|
s.inter_start = false
|
|
s.inside_string = false
|
|
}
|
|
}
|
|
if s.inter_start && next_char != `.` {
|
|
s.inter_end = true
|
|
s.inter_start = false
|
|
}
|
|
if s.pos == 0 && next_char == ` ` {
|
|
s.pos++
|
|
//If a single letter name at the start of the file, increment
|
|
//Otherwise the scanner would be stuck at s.pos = 0
|
|
}
|
|
return scan_res(.name, name)
|
|
}
|
|
// `123`, `.123`
|
|
else if c.is_digit() || (c == `.` && nextc.is_digit()) {
|
|
num := s.ident_number()
|
|
return scan_res(.number, num)
|
|
}
|
|
// all other tokens
|
|
switch c {
|
|
case `+`:
|
|
if nextc == `+` {
|
|
s.pos++
|
|
return scan_res(.inc, '')
|
|
}
|
|
else if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.plus_assign, '')
|
|
}
|
|
return scan_res(.plus, '')
|
|
case `-`:
|
|
if nextc == `-` {
|
|
s.pos++
|
|
return scan_res(.dec, '')
|
|
}
|
|
else if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.minus_assign, '')
|
|
}
|
|
return scan_res(.minus, '')
|
|
case `*`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.mult_assign, '')
|
|
}
|
|
return scan_res(.mul, '')
|
|
case `^`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.xor_assign, '')
|
|
}
|
|
return scan_res(.xor, '')
|
|
case `%`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.mod_assign, '')
|
|
}
|
|
return scan_res(.mod, '')
|
|
case `?`:
|
|
return scan_res(.question, '')
|
|
case single_quote, double_quote:
|
|
return scan_res(.str, s.ident_string())
|
|
case `\``: // ` // apostrophe balance comment. do not remove
|
|
return scan_res(.chartoken, s.ident_char())
|
|
case `(`:
|
|
return scan_res(.lpar, '')
|
|
case `)`:
|
|
return scan_res(.rpar, '')
|
|
case `[`:
|
|
return scan_res(.lsbr, '')
|
|
case `]`:
|
|
return scan_res(.rsbr, '')
|
|
case `{`:
|
|
// Skip { in ${ in strings
|
|
if s.inside_string {
|
|
return s.scan()
|
|
}
|
|
return scan_res(.lcbr, '')
|
|
case `$`:
|
|
return scan_res(.dollar, '')
|
|
case `}`:
|
|
// s = `hello $name !`
|
|
// s = `hello ${name} !`
|
|
if s.inside_string {
|
|
s.pos++
|
|
// TODO UNNEEDED?
|
|
if s.text[s.pos] == `\'` {
|
|
s.inside_string = false
|
|
return scan_res(.str, '')
|
|
}
|
|
return scan_res(.str, s.ident_string())
|
|
}
|
|
else {
|
|
return scan_res(.rcbr, '')
|
|
}
|
|
case `&`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.and_assign, '')
|
|
}
|
|
if nextc == `&` {
|
|
s.pos++
|
|
return scan_res(.and, '')
|
|
}
|
|
return scan_res(.amp, '')
|
|
case `|`:
|
|
if nextc == `|` {
|
|
s.pos++
|
|
return scan_res(.logical_or, '')
|
|
}
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.or_assign, '')
|
|
}
|
|
return scan_res(.pipe, '')
|
|
case `,`:
|
|
return scan_res(.comma, '')
|
|
case `@`:
|
|
s.pos++
|
|
name := s.ident_name()
|
|
// @FN => will be substituted with the name of the current V function
|
|
// @FILE => will be substituted with the path of the V source file
|
|
// @LINE => will be substituted with the V line number where it appears (as a string).
|
|
// @COLUMN => will be substituted with the column where it appears (as a string).
|
|
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
|
|
// This allows things like this:
|
|
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN)
|
|
// ... which is useful while debugging/tracing
|
|
if name == 'FN' { return scan_res(.str, s.fn_name) }
|
|
if name == 'FILE' { return scan_res(.str, os.realpath(s.file_path).replace('\\', '\\\\')) } // escape \
|
|
if name == 'LINE' { return scan_res(.str, (s.line_nr+1).str()) }
|
|
if name == 'COLUMN' { return scan_res(.str, (s.current_column()).str()) }
|
|
if name == 'VHASH' { return scan_res(.str, vhash()) }
|
|
if !is_key(name) {
|
|
s.error('@ must be used before keywords (e.g. `@type string`)')
|
|
}
|
|
return scan_res(.name, name)
|
|
case `\r`:
|
|
if nextc == `\n` {
|
|
s.pos++
|
|
s.last_nl_pos = s.pos
|
|
return scan_res(.nl, '')
|
|
}
|
|
case `\n`:
|
|
s.last_nl_pos = s.pos
|
|
return scan_res(.nl, '')
|
|
case `.`:
|
|
if nextc == `.` {
|
|
s.pos++
|
|
return scan_res(.dotdot, '')
|
|
}
|
|
return scan_res(.dot, '')
|
|
case `#`:
|
|
start := s.pos + 1
|
|
s.ignore_line()
|
|
if nextc == `!` {
|
|
// treat shebang line (#!) as a comment
|
|
s.line_comment = s.text.substr(start + 1, s.pos).trim_space()
|
|
s.fgenln('// shebang line "$s.line_comment"')
|
|
return s.scan()
|
|
}
|
|
hash := s.text.substr(start, s.pos)
|
|
return scan_res(.hash, hash.trim_space())
|
|
case `>`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.ge, '')
|
|
}
|
|
else if nextc == `>` {
|
|
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
|
|
s.pos += 2
|
|
return scan_res(.righ_shift_assign, '')
|
|
}
|
|
s.pos++
|
|
return scan_res(.righ_shift, '')
|
|
}
|
|
else {
|
|
return scan_res(.gt, '')
|
|
}
|
|
case 0xE2:
|
|
//case `≠`:
|
|
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
|
|
s.pos += 2
|
|
return scan_res(.ne, '')
|
|
}
|
|
// ⩽
|
|
else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
|
|
s.pos += 2
|
|
return scan_res(.le, '')
|
|
}
|
|
// ⩾
|
|
else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
|
|
s.pos += 2
|
|
return scan_res(.ge, '')
|
|
}
|
|
case `<`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.le, '')
|
|
}
|
|
else if nextc == `<` {
|
|
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
|
|
s.pos += 2
|
|
return scan_res(.left_shift_assign, '')
|
|
}
|
|
s.pos++
|
|
return scan_res(.left_shift, '')
|
|
}
|
|
else {
|
|
return scan_res(.lt, '')
|
|
}
|
|
case `=`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.eq, '')
|
|
}
|
|
else if nextc == `>` {
|
|
s.pos++
|
|
return scan_res(.arrow, '')
|
|
}
|
|
else {
|
|
return scan_res(.assign, '')
|
|
}
|
|
case `:`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.decl_assign, '')
|
|
}
|
|
else {
|
|
return scan_res(.colon, '')
|
|
}
|
|
case `;`:
|
|
return scan_res(.semicolon, '')
|
|
case `!`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.ne, '')
|
|
}
|
|
else {
|
|
return scan_res(.not, '')
|
|
}
|
|
case `~`:
|
|
return scan_res(.bit_not, '')
|
|
case `/`:
|
|
if nextc == `=` {
|
|
s.pos++
|
|
return scan_res(.div_assign, '')
|
|
}
|
|
if nextc == `/` {
|
|
start := s.pos + 1
|
|
s.ignore_line()
|
|
s.line_comment = s.text.substr(start + 1, s.pos)
|
|
s.line_comment = s.line_comment.trim_space()
|
|
s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
|
|
// Skip the comment (return the next token)
|
|
return s.scan()
|
|
}
|
|
// Multiline comments
|
|
if nextc == `*` {
|
|
start := s.pos
|
|
mut nest_count := 1
|
|
// Skip comment
|
|
for nest_count > 0 {
|
|
s.pos++
|
|
if s.pos >= s.text.len {
|
|
s.line_nr--
|
|
s.error('comment not terminated')
|
|
}
|
|
if s.text[s.pos] == `\n` {
|
|
s.inc_line_number()
|
|
continue
|
|
}
|
|
if s.expect('/*', s.pos) {
|
|
nest_count++
|
|
continue
|
|
}
|
|
if s.expect('*/', s.pos) {
|
|
nest_count--
|
|
}
|
|
}
|
|
s.pos++
|
|
end := s.pos + 1
|
|
comm := s.text.substr(start, end)
|
|
s.fgenln(comm)
|
|
// Skip if not in fmt mode
|
|
return s.scan()
|
|
}
|
|
return scan_res(.div, '')
|
|
}
|
|
$if windows {
|
|
if c == `\0` {
|
|
return scan_res(.eof, '')
|
|
}
|
|
}
|
|
mut msg := 'invalid character `${c.str()}`'
|
|
if c == `"` {
|
|
msg += ', use \' to denote strings'
|
|
}
|
|
s.error(msg)
|
|
return scan_res(.eof, '')
|
|
}
|
|
|
|
fn (s &Scanner) current_column() int {
|
|
return s.pos - s.last_nl_pos
|
|
}
|
|
|
|
fn imax(a,b int) int { return if a > b { a } else { b } }
|
|
fn imin(a,b int) int { return if a < b { a } else { b } }
|
|
fn (s &Scanner) error(msg string) {
|
|
s.error_with_col(msg, 0)
|
|
}
|
|
|
|
fn (s &Scanner) error_with_col(msg string, col int) {
|
|
fullpath := os.realpath( s.file_path )
|
|
color_on := s.should_print_errors_in_color && term.can_show_color()
|
|
final_message := if color_on { term.red( term.bold( msg ) ) } else { msg }
|
|
// The filepath:line:col: format is the default C compiler
|
|
// error output format. It allows editors and IDE's like
|
|
// emacs to quickly find the errors in the output
|
|
// and jump to their source with a keyboard shortcut.
|
|
// Using only the filename leads to inability of IDE/editors
|
|
// to find the source file, when it is in another folder.
|
|
eprintln('${fullpath}:${s.line_nr + 1}:${col}: $final_message')
|
|
|
|
if s.should_print_line_on_error && s.file_lines.len > 0 {
|
|
context_start_line := imax(0, (s.line_nr - error_context_before + 1 ))
|
|
context_end_line := imin(s.file_lines.len, (s.line_nr + error_context_after + 1 ))
|
|
for cline := context_start_line; cline < context_end_line; cline++ {
|
|
line := '${(cline+1):5d}| ' + s.file_lines[ cline ]
|
|
coloredline := if cline == s.line_nr && color_on { term.red(line) } else { line }
|
|
println( coloredline )
|
|
if cline != s.line_nr { continue }
|
|
// The pointerline should have the same spaces/tabs as the offending
|
|
// line, so that it prints the ^ character exactly on the *same spot*
|
|
// where it is needed. That is the reason we can not just
|
|
// use strings.repeat(` `, col) to form it.
|
|
mut pointerline := []string
|
|
for i , c in line {
|
|
if i < col {
|
|
x := if c.is_space() { c } else { ` ` }
|
|
pointerline << x.str()
|
|
continue
|
|
}
|
|
pointerline << if color_on { term.bold( term.blue('^') ) } else { '^' }
|
|
break
|
|
}
|
|
println( ' ' + pointerline.join('') )
|
|
}
|
|
}
|
|
exit(1)
|
|
}
|
|
|
|
fn (s Scanner) count_symbol_before(p int, sym byte) int {
|
|
mut count := 0
|
|
for i:=p; i>=0; i-- {
|
|
if s.text[i] != sym {
|
|
break
|
|
}
|
|
count++
|
|
}
|
|
return count
|
|
}
|
|
|
|
// println('array out of bounds $idx len=$a.len')
|
|
// This is really bad. It needs a major clean up
|
|
fn (s mut Scanner) ident_string() string {
|
|
q := s.text[s.pos]
|
|
if (q == single_quote || q == double_quote) && !s.inside_string{
|
|
s.quote = q
|
|
}
|
|
//if s.file_path.contains('string_test') {
|
|
//println('\nident_string() at char=${s.text[s.pos].str()}')
|
|
//println('linenr=$s.line_nr quote= $qquote ${qquote.str()}')
|
|
//}
|
|
mut start := s.pos
|
|
s.inside_string = false
|
|
slash := `\\`
|
|
for {
|
|
s.pos++
|
|
if s.pos >= s.text.len {
|
|
break
|
|
}
|
|
c := s.text[s.pos]
|
|
prevc := s.text[s.pos - 1]
|
|
// end of string
|
|
if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
|
|
// handle '123\\' slash at the end
|
|
break
|
|
}
|
|
if c == `\n` {
|
|
s.inc_line_number()
|
|
}
|
|
// Don't allow \0
|
|
if c == `0` && s.pos > 2 && s.text[s.pos - 1] == `\\` {
|
|
s.error('0 character in a string literal')
|
|
}
|
|
// Don't allow \x00
|
|
if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
|
|
s.error('0 character in a string literal')
|
|
}
|
|
// ${var}
|
|
if c == `{` && prevc == `$` && s.count_symbol_before(s.pos-2, `\\`) % 2 == 0 {
|
|
s.inside_string = true
|
|
// so that s.pos points to $ at the next step
|
|
s.pos -= 2
|
|
break
|
|
}
|
|
// $var
|
|
if (c.is_letter() || c == `_`) && prevc == `$` && s.count_symbol_before(s.pos-2, `\\`) % 2 == 0 {
|
|
s.inside_string = true
|
|
s.inter_start = true
|
|
s.pos -= 2
|
|
break
|
|
}
|
|
}
|
|
mut lit := ''
|
|
if s.text[start] == s.quote {
|
|
start++
|
|
}
|
|
mut end := s.pos
|
|
if s.inside_string {
|
|
end++
|
|
}
|
|
if start > s.pos{}
|
|
else {
|
|
lit = s.text.substr(start, end)
|
|
}
|
|
return lit
|
|
}
|
|
|
|
fn (s mut Scanner) ident_char() string {
|
|
start := s.pos
|
|
slash := `\\`
|
|
mut len := 0
|
|
for {
|
|
s.pos++
|
|
if s.pos >= s.text.len {
|
|
break
|
|
}
|
|
if s.text[s.pos] != slash {
|
|
len++
|
|
}
|
|
double_slash := s.expect('\\\\', s.pos - 2)
|
|
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) { // ` // apostrophe balance comment. do not remove
|
|
if double_slash {
|
|
len++
|
|
}
|
|
break
|
|
}
|
|
}
|
|
len--
|
|
c := s.text.substr(start + 1, s.pos)
|
|
if len != 1 {
|
|
u := c.ustring()
|
|
if u.len != 1 {
|
|
s.error('invalid character literal (more than one character: $len)')
|
|
}
|
|
}
|
|
if c == '\\`' {
|
|
return '`'
|
|
}
|
|
// Escapes a `'` character
|
|
return if c == '\'' { '\\' + c } else { c }
|
|
}
|
|
|
|
fn (s &Scanner) expect(want string, start_pos int) bool {
|
|
end_pos := start_pos + want.len
|
|
if start_pos < 0 || start_pos >= s.text.len {
|
|
return false
|
|
}
|
|
if end_pos < 0 || end_pos > s.text.len {
|
|
return false
|
|
}
|
|
for pos in start_pos..end_pos {
|
|
if s.text[pos] != want[pos-start_pos] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
fn (s mut Scanner) debug_tokens() {
|
|
s.pos = 0
|
|
s.debug = true
|
|
|
|
fname := s.file_path.all_after('/')
|
|
println('\n===DEBUG TOKENS $fname===')
|
|
|
|
for {
|
|
res := s.scan()
|
|
tok := res.tok
|
|
lit := res.lit
|
|
print(tok.str())
|
|
if lit != '' {
|
|
println(' `$lit`')
|
|
}
|
|
else {
|
|
println('')
|
|
}
|
|
if tok == .eof {
|
|
println('============ END OF DEBUG TOKENS ==================')
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
fn (s mut Scanner) ignore_line() {
|
|
s.eat_to_end_of_line()
|
|
s.inc_line_number()
|
|
}
|
|
|
|
fn (s mut Scanner) eat_to_end_of_line(){
|
|
for s.pos < s.text.len && s.text[s.pos] != `\n` {
|
|
s.pos++
|
|
}
|
|
}
|
|
|
|
fn (s mut Scanner) inc_line_number() {
|
|
s.last_nl_pos = s.pos
|
|
s.line_nr++
|
|
}
|
|
|
|
fn is_name_char(c byte) bool {
|
|
return c.is_letter() || c == `_`
|
|
}
|
|
|
|
fn is_nl(c byte) bool {
|
|
return c == `\r` || c == `\n`
|
|
}
|
|
|
|
fn contains_capital(s string) bool {
|
|
for c in s {
|
|
if c >= `A` && c <= `Z` {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// HTTPRequest bad
|
|
// HttpRequest good
|
|
fn good_type_name(s string) bool {
|
|
if s.len < 4 {
|
|
return true
|
|
}
|
|
for i in 2 .. s.len {
|
|
if s[i].is_capital() && s[i-1].is_capital() && s[i-2].is_capital() {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// registration_date good
|
|
// registrationdate bad
|
|
fn (s &Scanner) validate_var_name(name string) {
|
|
if name.len > 11 && !name.contains('_') {
|
|
s.error('bad variable name `$name`\n' +
|
|
'looks like you have a multi-word name without separating them with `_`' +
|
|
'\nfor example, use `registration_date` instead of `registrationdate` ')
|
|
|
|
}
|
|
}
|
|
|
|
|