v/vlib/compiler/scanner.v

1013 lines
22 KiB
V
Raw Normal View History

2020-01-23 21:04:46 +01:00
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
2019-06-23 04:21:30 +02:00
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module compiler
2019-06-22 20:20:28 +02:00
2019-09-21 00:23:53 +02:00
import (
os
2020-02-17 20:31:23 +01:00
filepath
2019-12-18 06:13:31 +01:00
// strings
2019-09-21 00:23:53 +02:00
)
const (
single_quote = `\'`
double_quote = `"`
error_context_before = 2 // how many lines of source context to print before the pointer line
2019-12-18 06:13:31 +01:00
error_context_after = 2 // ^^^ same, but after
2019-09-21 00:23:53 +02:00
)
2019-07-12 07:37:54 +02:00
2019-12-22 02:34:37 +01:00
pub struct Scanner {
2019-06-22 20:20:28 +02:00
mut:
2019-12-18 06:13:31 +01:00
file_path string
text string
pos int
line_nr int
last_nl_pos int // for calculating column
inside_string bool
inter_start bool // for hacky string interpolation TODO simplify
inter_end bool
debug bool
line_comment string
started bool
2019-12-17 15:28:25 +01:00
// vfmt fields TODO move to a separate struct
2019-12-18 06:13:31 +01:00
// fmt_out strings.Builder
fmt_lines []string
// fmt_line string
fmt_indent int
fmt_line_empty bool
// fmt_needs_nl bool
prev_tok TokenKind
fn_name string // needed for @FN
print_line_on_error bool
print_colored_error bool
print_rel_paths_on_error bool
quote byte // which quote is used to denote current string: ' or "
line_ends []int // the positions of source lines ends (i.e. \n signs)
nlines int // total number of lines in the source file that were scanned
is_vh bool // Keep newlines
is_fmt bool // Used only for skipping ${} in strings, since we need literal
// string values when generating formatted code.
2019-06-22 20:20:28 +02:00
}
// new scanner from file.
fn new_scanner_file(file_path string) &Scanner {
if !os.exists(file_path) {
2019-09-26 04:28:43 +02:00
verror("$file_path doesn't exist")
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
mut raw_text := os.read_file(file_path)or{
2019-09-26 04:28:43 +02:00
verror('scanner: failed to open $file_path')
2019-08-29 02:30:17 +02:00
return 0
}
2019-07-17 00:05:04 +02:00
// BOM check
if raw_text.len >= 3 {
c_text := raw_text.str
2019-07-17 00:05:04 +02:00
if c_text[0] == 0xEF && c_text[1] == 0xBB && c_text[2] == 0xBF {
// skip three BOM bytes
offset_from_begin := 3
2019-09-15 14:36:05 +02:00
raw_text = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
2019-07-17 00:05:04 +02:00
}
}
mut s := new_scanner(raw_text)
2019-11-11 06:58:50 +01:00
s.init_fmt()
s.file_path = file_path
return s
}
// new scanner from string.
fn new_scanner(text string) &Scanner {
2019-12-18 06:13:31 +01:00
return &Scanner{
text: text
2019-12-18 06:13:31 +01:00
print_line_on_error: true
print_colored_error: true
print_rel_paths_on_error: true
2019-06-22 20:20:28 +02:00
}
}
// TODO remove once multiple return values are implemented
struct ScanRes {
tok TokenKind
2019-06-22 20:20:28 +02:00
lit string
}
2019-12-19 22:29:37 +01:00
fn scan_res(tok TokenKind, lit string) ScanRes {
2019-12-18 06:13:31 +01:00
return ScanRes{
tok:tok
lit:lit
}
2019-06-22 20:20:28 +02:00
}
fn (s mut Scanner) ident_name() string {
start := s.pos
2020-02-25 22:58:51 +01:00
s.pos++
for s.pos < s.text.len && (is_name_char(s.text[s.pos]) || s.text[s.pos].is_digit()) {
2019-06-22 20:20:28 +02:00
s.pos++
}
name := s.text[start..s.pos]
2019-06-22 20:20:28 +02:00
s.pos--
return name
}
const(
num_sep = `_` // char used as number separator
)
fn filter_num_sep(txt byteptr, start int, end int) string {
2020-02-07 22:10:48 +01:00
unsafe {
2020-02-25 22:58:51 +01:00
mut b := malloc(end-start + 1) // add a byte for the endstring 0
mut i := start
mut i1 := 0
for i < end {
if txt[i] != num_sep && txt[i] != `o` {
b[i1]=txt[i]
i1++
}
i++
}
2020-02-25 22:58:51 +01:00
b[i1]=0 // C string compatibility
return string{str:b len:i1}
2020-02-07 22:10:48 +01:00
}
}
fn (s mut Scanner) ident_bin_number() string {
mut has_wrong_digit := false
mut first_wrong_digit := `\0`
start_pos := s.pos
s.pos += 2 // skip '0b'
2020-02-25 22:58:51 +01:00
for s.pos < s.text.len {
c := s.text[s.pos]
if !c.is_bin_digit() && c != num_sep {
if (!c.is_digit() && !c.is_letter()) || s.inside_string {
break
2020-02-29 17:51:35 +01:00
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit = c
}
}
s.pos++
}
if start_pos + 2 == s.pos {
s.error('number part of this binary is not provided')
}
else if has_wrong_digit {
s.error('this binary number has unsuitable digit `${first_wrong_digit.str()}`')
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
s.pos--
return number
}
2019-07-24 00:06:48 +02:00
fn (s mut Scanner) ident_hex_number() string {
mut has_wrong_digit := false
mut first_wrong_digit := `\0`
2019-07-24 00:06:48 +02:00
start_pos := s.pos
s.pos += 2 // skip '0x'
2020-02-25 22:58:51 +01:00
for s.pos < s.text.len {
2019-06-22 20:20:28 +02:00
c := s.text[s.pos]
if !c.is_hex_digit() && c != num_sep {
if !c.is_letter() || s.inside_string {
break
2020-02-29 17:51:35 +01:00
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit = c
}
2019-06-22 20:20:28 +02:00
}
2019-07-24 00:06:48 +02:00
s.pos++
}
if start_pos + 2 == s.pos {
s.error('number part of this hexadecimal is not provided')
}
else if has_wrong_digit {
s.error('this hexadecimal number has unsuitable digit `${first_wrong_digit.str()}`')
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
2019-07-24 00:06:48 +02:00
s.pos--
return number
}
fn (s mut Scanner) ident_oct_number() string {
mut has_wrong_digit := false
mut first_wrong_digit := `\0`
2019-07-24 00:06:48 +02:00
start_pos := s.pos
2020-02-23 12:33:07 +01:00
s.pos += 2 // skip '0o'
2020-02-25 22:58:51 +01:00
for s.pos < s.text.len {
2019-07-24 00:06:48 +02:00
c := s.text[s.pos]
2020-02-23 12:33:07 +01:00
if !c.is_oct_digit() && c != num_sep {
if (!c.is_digit() && !c.is_letter()) || s.inside_string {
break
2020-02-29 17:51:35 +01:00
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit = c
}
2019-06-22 20:20:28 +02:00
}
2019-07-24 00:06:48 +02:00
s.pos++
}
if start_pos + 2 == s.pos {
s.error('number part of this octal is not provided')
}
else if has_wrong_digit {
s.error('this octal number has unsuitable digit `${first_wrong_digit.str()}`')
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
2019-07-24 00:06:48 +02:00
s.pos--
return number
}
fn (s mut Scanner) ident_dec_number() string {
mut has_wrong_digit := false
mut first_wrong_digit := `\0`
2019-07-24 00:06:48 +02:00
start_pos := s.pos
// scan integer part
for s.pos < s.text.len {
2020-02-25 22:58:51 +01:00
c := s.text[s.pos]
if !c.is_digit() && c != num_sep {
if !c.is_letter() || c in [`e`, `E`] || s.inside_string {
break
}
else if !has_wrong_digit {
has_wrong_digit = true
2020-02-25 22:58:51 +01:00
first_wrong_digit = c
}
}
2019-07-24 00:06:48 +02:00
s.pos++
}
// e.g. 1..9
// we just return '1' and don't scan '..9'
if s.expect('..', s.pos) {
number := filter_num_sep(s.text.str, start_pos, s.pos)
2019-07-24 00:06:48 +02:00
s.pos--
return number
}
// scan fractional part
if s.pos < s.text.len && s.text[s.pos] == `.` {
2019-07-24 00:06:48 +02:00
s.pos++
for s.pos < s.text.len {
2020-02-25 22:58:51 +01:00
c := s.text[s.pos]
if !c.is_digit() {
if !c.is_letter() || c in [`e`, `E`] || s.inside_string {
break
}
else if !has_wrong_digit {
has_wrong_digit = true
2020-02-25 22:58:51 +01:00
first_wrong_digit = c
}
}
2019-07-24 00:06:48 +02:00
s.pos++
2019-06-22 20:20:28 +02:00
}
}
2019-07-24 00:06:48 +02:00
// scan exponential part
mut has_exponential_part := false
2020-02-17 02:35:01 +01:00
if s.expect('e', s.pos) || s.expect('E', s.pos) {
s.pos++
exp_start_pos := s.pos
if s.pos < s.text.len && s.text[s.pos] in [`-`, `+`] {
2020-02-17 02:35:01 +01:00
s.pos++
}
for s.pos < s.text.len {
2020-02-25 22:58:51 +01:00
c := s.text[s.pos]
if !c.is_digit() {
if !c.is_letter() || s.inside_string {
break
}
else if !has_wrong_digit {
has_wrong_digit = true
2020-02-25 22:58:51 +01:00
first_wrong_digit = c
}
}
2019-07-24 00:06:48 +02:00
s.pos++
}
if exp_start_pos == s.pos {
s.error('exponent has no digits')
}
has_exponential_part = true
}
// error check: 1.23.4, 123.e+3.4
if s.pos < s.text.len && s.text[s.pos] == `.` {
2019-07-24 00:06:48 +02:00
if has_exponential_part {
s.error('exponential part should be integer')
}
else {
s.error('too many decimal points in number')
}
}
if has_wrong_digit {
s.error('this number has unsuitable digit `${first_wrong_digit.str()}`')
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
2019-06-22 20:20:28 +02:00
s.pos--
return number
}
2019-07-24 00:06:48 +02:00
fn (s mut Scanner) ident_number() string {
if s.expect('0b', s.pos) {
return s.ident_bin_number()
}
else if s.expect('0x', s.pos) {
2019-07-24 00:06:48 +02:00
return s.ident_hex_number()
}
else if s.expect('0o', s.pos) {
2020-02-23 12:33:07 +01:00
return s.ident_oct_number()
}
else {
2019-07-24 00:06:48 +02:00
return s.ident_dec_number()
}
}
2019-06-22 20:20:28 +02:00
fn (s mut Scanner) skip_whitespace() {
2019-12-18 06:13:31 +01:00
// if s.is_vh { println('vh') return }
for s.pos < s.text.len && s.text[s.pos].is_space() {
2019-10-23 07:18:44 +02:00
if is_nl(s.text[s.pos]) && s.is_vh {
return
2019-12-03 11:08:57 +01:00
}
2019-08-17 21:19:37 +02:00
// Count \r\n as one line
2019-12-18 06:13:31 +01:00
if is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos - 1) {
s.inc_line_number()
2019-06-22 20:20:28 +02:00
}
s.pos++
}
}
fn (s mut Scanner) end_of_file() ScanRes {
s.pos = s.text.len
s.inc_line_number()
return scan_res(.eof, '')
}
2019-07-06 15:33:15 +02:00
fn (s mut Scanner) scan() ScanRes {
2019-12-18 06:13:31 +01:00
// if s.line_comment != '' {
// s.fgenln('// LC "$s.line_comment"')
// s.line_comment = ''
// }
2019-07-01 18:07:22 +02:00
if s.started {
2019-06-22 20:20:28 +02:00
s.pos++
}
s.started = true
if s.pos >= s.text.len {
return s.end_of_file()
2019-06-22 20:20:28 +02:00
}
if !s.inside_string {
s.skip_whitespace()
}
// End of $var, start next string
if s.inter_end {
2019-10-23 07:18:44 +02:00
if s.text[s.pos] == s.quote {
s.inter_end = false
2019-07-16 17:59:07 +02:00
return scan_res(.str, '')
2019-06-22 20:20:28 +02:00
}
s.inter_end = false
2019-07-16 17:59:07 +02:00
return scan_res(.str, s.ident_string())
2019-06-22 20:20:28 +02:00
}
s.skip_whitespace()
// end of file
if s.pos >= s.text.len {
return s.end_of_file()
2019-06-22 20:20:28 +02:00
}
// handle each char
c := s.text[s.pos]
mut nextc := `\0`
if s.pos + 1 < s.text.len {
nextc = s.text[s.pos + 1]
}
// name or keyword
if is_name_char(c) {
name := s.ident_name()
// tmp hack to detect . in ${}
// Check if not .eof to prevent panic
2019-12-19 22:29:37 +01:00
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
2019-06-22 20:20:28 +02:00
if is_key(name) {
return scan_res(key_to_token(name), '')
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
if s.inside_string {
if next_char == s.quote {
s.inter_end = true
s.inter_start = false
2019-06-22 20:20:28 +02:00
s.inside_string = false
}
}
2019-12-08 18:06:00 +01:00
// end of `$expr`
2019-12-08 20:22:47 +01:00
// allow `'$a.b'` and `'$a.c()'`
if s.inter_start && next_char != `.` && next_char != `(` {
s.inter_end = true
s.inter_start = false
2019-06-22 20:20:28 +02:00
}
if s.pos == 0 && next_char == ` ` {
2019-12-18 06:13:31 +01:00
// If a single letter name at the start of the file, increment
// Otherwise the scanner would be stuck at s.pos = 0
2019-12-08 18:06:00 +01:00
s.pos++
}
return scan_res(.name, name)
2019-06-22 20:20:28 +02:00
}
2019-07-16 17:59:07 +02:00
// `123`, `.123`
else if c.is_digit() || (c == `.` && nextc.is_digit()) {
if !s.inside_string {
// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
mut start_pos := s.pos
for start_pos < s.text.len && s.text[start_pos] == `0` {
start_pos++
}
mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
// for 0b, 0o, 0x the heading zero shouldn't be jumped
if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
prefix_zero_num--
}
s.pos += prefix_zero_num // jump these zeros
}
2019-06-22 20:20:28 +02:00
num := s.ident_number()
2019-07-24 00:06:48 +02:00
return scan_res(.number, num)
2019-06-22 20:20:28 +02:00
}
2019-12-08 20:22:47 +01:00
// Handle `'$fn()'`
if c == `)` && s.inter_start {
s.inter_end = true
s.inter_start = false
2019-12-19 22:29:37 +01:00
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
2019-12-08 20:22:47 +01:00
if next_char == s.quote {
s.inside_string = false
}
return scan_res(.rpar, '')
}
2019-06-22 20:20:28 +02:00
// all other tokens
match c {
2019-12-18 06:13:31 +01:00
`+` {
if nextc == `+` {
s.pos++
return scan_res(.inc, '')
}
else if nextc == `=` {
s.pos++
return scan_res(.plus_assign, '')
}
return scan_res(.plus, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`-` {
if nextc == `-` {
s.pos++
return scan_res(.dec, '')
}
else if nextc == `=` {
s.pos++
return scan_res(.minus_assign, '')
}
return scan_res(.minus, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`*` {
if nextc == `=` {
s.pos++
return scan_res(.mult_assign, '')
}
return scan_res(.mul, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`^` {
if nextc == `=` {
s.pos++
return scan_res(.xor_assign, '')
}
return scan_res(.xor, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`%` {
if nextc == `=` {
s.pos++
return scan_res(.mod_assign, '')
}
return scan_res(.mod, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`?` {
return scan_res(.question, '')
2019-06-22 20:20:28 +02:00
}
2019-12-19 22:29:37 +01:00
single_quote, double_quote {
2019-12-18 06:13:31 +01:00
return scan_res(.str, s.ident_string())
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`\`` {
// ` // apostrophe balance comment. do not remove
return scan_res(.chartoken, s.ident_char())
}
2019-12-18 06:13:31 +01:00
`(` {
return scan_res(.lpar, '')
}
2019-12-18 06:13:31 +01:00
`)` {
return scan_res(.rpar, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`[` {
return scan_res(.lsbr, '')
2019-12-03 11:08:57 +01:00
}
2019-12-18 06:13:31 +01:00
`]` {
return scan_res(.rsbr, '')
}
`{` {
// Skip { in `${` in strings
if s.inside_string {
return s.scan()
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
return scan_res(.lcbr, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`$` {
if s.inside_string {
return scan_res(.str_dollar, '')
}
else {
return scan_res(.dollar, '')
}
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`}` {
// s = `hello $name !`
// s = `hello ${name} !`
if s.inside_string {
s.pos++
if s.text[s.pos] == s.quote {
s.inside_string = false
return scan_res(.str, '')
}
return scan_res(.str, s.ident_string())
}
else {
return scan_res(.rcbr, '')
}
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`&` {
if nextc == `=` {
s.pos++
return scan_res(.and_assign, '')
}
if nextc == `&` {
s.pos++
return scan_res(.and, '')
}
return scan_res(.amp, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`|` {
if nextc == `|` {
s.pos++
return scan_res(.logical_or, '')
}
if nextc == `=` {
s.pos++
return scan_res(.or_assign, '')
}
return scan_res(.pipe, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`,` {
return scan_res(.comma, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`@` {
s.pos++
name := s.ident_name()
// @FN => will be substituted with the name of the current V function
// @FILE => will be substituted with the path of the V source file
// @LINE => will be substituted with the V line number where it appears (as a string).
// @COLUMN => will be substituted with the column where it appears (as a string).
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
// This allows things like this:
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN)
// ... which is useful while debugging/tracing
if name == 'FN' {
return scan_res(.str, s.fn_name)
}
if name == 'FILE' {
return scan_res(.str, cescaped_path(os.realpath(s.file_path)))
}
if name == 'LINE' {
return scan_res(.str, (s.line_nr + 1).str())
}
if name == 'COLUMN' {
return scan_res(.str, (s.current_column()).str())
}
if name == 'VHASH' {
return scan_res(.str, vhash())
}
if !is_key(name) {
s.error('@ must be used before keywords (e.g. `@type string`)')
}
return scan_res(.name, name)
2019-08-17 21:19:37 +02:00
}
2019-12-18 06:13:31 +01:00
/*
2019-06-23 05:21:22 +02:00
case `\r`:
if nextc == `\n` {
s.pos++
s.last_nl_pos = s.pos
return scan_res(.nl, '')
2019-06-23 05:21:22 +02:00
}
}
2019-06-22 20:20:28 +02:00
case `\n`:
s.last_nl_pos = s.pos
return scan_res(.nl, '')
}
2019-10-23 07:18:44 +02:00
*/
2019-12-18 06:13:31 +01:00
`.` {
if nextc == `.` {
s.pos++
2019-12-18 06:13:31 +01:00
if s.text[s.pos + 1] == `.` {
s.pos++
return scan_res(.ellipsis, '')
}
return scan_res(.dotdot, '')
}
2019-12-18 06:13:31 +01:00
return scan_res(.dot, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`#` {
start := s.pos + 1
s.ignore_line()
if nextc == `!` {
// treat shebang line (#!) as a comment
s.line_comment = s.text[start + 1..s.pos].trim_space()
// s.fgenln('// shebang line "$s.line_comment"')
return s.scan()
}
hash := s.text[start..s.pos]
return scan_res(.hash, hash.trim_space())
2019-07-25 12:27:59 +02:00
}
2019-12-18 06:13:31 +01:00
`>` {
if nextc == `=` {
s.pos++
return scan_res(.ge, '')
}
else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return scan_res(.righ_shift_assign, '')
}
s.pos++
return scan_res(.righ_shift, '')
}
else {
return scan_res(.gt, '')
}
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
0xE2 {
// case `≠`:
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
2019-06-22 20:20:28 +02:00
s.pos += 2
2019-12-18 06:13:31 +01:00
return scan_res(.ne, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
// ⩽
else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
2019-06-22 20:20:28 +02:00
s.pos += 2
2019-12-18 06:13:31 +01:00
return scan_res(.le, '')
}
// ⩾
else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
return scan_res(.ge, '')
2019-06-22 20:20:28 +02:00
}
2019-08-17 21:19:37 +02:00
}
2019-12-18 06:13:31 +01:00
`<` {
if nextc == `=` {
s.pos++
return scan_res(.le, '')
}
else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return scan_res(.left_shift_assign, '')
}
s.pos++
return scan_res(.left_shift, '')
}
else if nextc == `-` {
s.pos++
println("GOT ARR")
return scan_res(.left_arrow, '')
}
2019-12-18 06:13:31 +01:00
else {
return scan_res(.lt, '')
}
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`=` {
if nextc == `=` {
s.pos++
return scan_res(.eq, '')
}
else if nextc == `>` {
s.pos++
return scan_res(.arrow, '')
}
else {
return scan_res(.assign, '')
}
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`:` {
if nextc == `=` {
s.pos++
return scan_res(.decl_assign, '')
}
else {
return scan_res(.colon, '')
}
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`;` {
return scan_res(.semicolon, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`!` {
if nextc == `=` {
s.pos++
return scan_res(.ne, '')
}
else {
return scan_res(.not, '')
}
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`~` {
return scan_res(.bit_not, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
`/` {
if nextc == `=` {
2019-06-22 20:20:28 +02:00
s.pos++
2019-12-18 06:13:31 +01:00
return scan_res(.div_assign, '')
}
if nextc == `/` {
start := s.pos + 1
s.ignore_line()
s.line_comment = s.text[start + 1..s.pos]
s.line_comment = s.line_comment.trim_space()
if s.is_fmt {
s.pos-- // fix line_nr, \n was read, and the comment is marked on the next line
2019-06-22 20:20:28 +02:00
s.line_nr--
2019-12-18 06:13:31 +01:00
return scan_res(.line_comment, s.line_comment)
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token)
return s.scan()
}
// Multiline comments
if nextc == `*` {
start := s.pos
mut nest_count := 1
// Skip comment
for nest_count > 0 {
s.pos++
if s.pos >= s.text.len {
s.line_nr--
s.error('comment not terminated')
}
if s.text[s.pos] == `\n` {
s.inc_line_number()
continue
}
if s.expect('/*', s.pos) {
nest_count++
continue
}
if s.expect('*/', s.pos) {
nest_count--
}
}
2019-12-18 06:13:31 +01:00
s.pos++
end := s.pos + 1
comment := s.text[start..end]
if s.is_fmt {
s.line_comment = comment
return scan_res(.mline_comment, s.line_comment)
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
// Skip if not in fmt mode
return s.scan()
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
return scan_res(.div, '')
2019-06-22 20:20:28 +02:00
}
2019-12-18 06:13:31 +01:00
else {
}}
2019-06-29 17:58:20 +02:00
$if windows {
if c == `\0` {
return s.end_of_file()
2019-08-17 21:19:37 +02:00
}
}
2019-12-08 12:11:19 +01:00
s.error('invalid character `${c.str()}`')
return s.end_of_file()
2019-06-22 20:20:28 +02:00
}
2019-09-07 12:44:41 +02:00
fn (s &Scanner) current_column() int {
return s.pos - s.last_nl_pos
2019-09-07 12:44:41 +02:00
}
2019-12-19 22:29:37 +01:00
fn (s Scanner) count_symbol_before(p int, sym byte) int {
2019-12-18 06:13:31 +01:00
mut count := 0
for i := p; i >= 0; i-- {
if s.text[i] != sym {
break
}
count++
}
return count
2019-08-15 11:41:23 +02:00
}
2019-06-22 20:20:28 +02:00
fn (s mut Scanner) ident_string() string {
2019-09-21 15:26:25 +02:00
q := s.text[s.pos]
is_quote := q == single_quote || q == double_quote
2019-12-18 06:13:31 +01:00
is_raw := is_quote && s.text[s.pos - 1] == `r`
if is_quote && !s.inside_string {
2019-09-21 15:26:25 +02:00
s.quote = q
}
2019-12-18 06:13:31 +01:00
// if s.file_path.contains('string_test') {
// println('\nident_string() at char=${s.text[s.pos].str()}')
// println('linenr=$s.line_nr quote= $qquote ${qquote.str()}')
// }
2019-06-22 20:20:28 +02:00
mut start := s.pos
s.inside_string = false
slash := `\\`
for {
s.pos++
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
prevc := s.text[s.pos - 1]
// end of string
2019-09-21 15:26:25 +02:00
if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
2019-06-22 20:20:28 +02:00
// handle '123\\' slash at the end
break
}
if c == `\n` {
s.inc_line_number()
2019-06-22 20:20:28 +02:00
}
// Don't allow \0
2019-12-18 06:13:31 +01:00
if c == `0` && s.pos > 2 && s.text[s.pos - 1] == slash {
if s.pos < s.text.len - 1 && s.text[s.pos + 1].is_digit() {
}
else {
s.error('0 character in a string literal')
}
2019-06-22 20:20:28 +02:00
}
// Don't allow \x00
2019-07-24 00:06:48 +02:00
if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
2019-06-22 20:20:28 +02:00
s.error('0 character in a string literal')
}
2019-12-08 18:06:00 +01:00
// ${var} (ignore in vfmt mode)
2019-12-18 06:13:31 +01:00
if c == `{` && prevc == `$` && !is_raw && !s.is_fmt && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
2019-06-22 20:20:28 +02:00
s.inside_string = true
// so that s.pos points to $ at the next step
s.pos -= 2
break
}
// $var
2019-12-20 21:15:16 +01:00
if is_name_char(c) && prevc == `$` && !s.is_fmt && !is_raw && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
2019-06-22 20:20:28 +02:00
s.inside_string = true
s.inter_start = true
2019-06-22 20:20:28 +02:00
s.pos -= 2
break
}
}
mut lit := ''
2019-09-21 15:26:25 +02:00
if s.text[start] == s.quote {
2019-06-22 20:20:28 +02:00
start++
}
mut end := s.pos
if s.inside_string {
end++
}
2019-12-18 06:13:31 +01:00
if start > s.pos {
}
2019-06-22 20:20:28 +02:00
else {
lit = s.text[start..end]
2019-06-22 20:20:28 +02:00
}
return lit
}
fn (s mut Scanner) ident_char() string {
start := s.pos
slash := `\\`
mut len := 0
for {
s.pos++
if s.pos >= s.text.len {
break
}
if s.text[s.pos] != slash {
len++
}
2019-07-24 00:06:48 +02:00
double_slash := s.expect('\\\\', s.pos - 2)
2019-12-18 06:13:31 +01:00
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
// ` // apostrophe balance comment. do not remove
2019-06-22 20:20:28 +02:00
if double_slash {
len++
}
break
}
}
len--
c := s.text[start + 1..s.pos]
2019-06-22 20:20:28 +02:00
if len != 1 {
2019-07-07 21:46:21 +02:00
u := c.ustring()
if u.len != 1 {
2019-12-18 06:13:31 +01:00
s.error('invalid character literal (more than one character)\n' + 'use quotes for strings, backticks for characters')
2019-07-07 21:46:21 +02:00
}
2019-06-22 20:20:28 +02:00
}
if c == '\\`' {
return '`'
2019-12-03 11:08:57 +01:00
}
2019-09-11 14:21:20 +02:00
// Escapes a `'` character
2019-12-19 22:29:37 +01:00
return if c == "\'" { '\\' + c } else { c }
2019-06-22 20:20:28 +02:00
}
2019-12-19 22:29:37 +01:00
fn (s &Scanner) expect(want string, start_pos int) bool {
2019-07-24 00:06:48 +02:00
end_pos := start_pos + want.len
if start_pos < 0 || start_pos >= s.text.len {
return false
}
if end_pos < 0 || end_pos > s.text.len {
return false
}
2019-12-18 06:13:31 +01:00
for pos in start_pos .. end_pos {
if s.text[pos] != want[pos - start_pos] {
2019-07-24 00:06:48 +02:00
return false
}
}
return true
}
2019-06-22 20:20:28 +02:00
fn (s mut Scanner) debug_tokens() {
s.pos = 0
s.started = false
2019-07-24 00:06:48 +02:00
s.debug = true
2020-02-17 20:31:23 +01:00
fname := s.file_path.all_after(filepath.separator)
2019-07-16 17:59:07 +02:00
println('\n===DEBUG TOKENS $fname===')
2019-06-22 20:20:28 +02:00
for {
res := s.scan()
tok := res.tok
lit := res.lit
print(tok.str())
if lit != '' {
println(' `$lit`')
}
else {
println('')
}
if tok == .eof {
2019-06-22 20:20:28 +02:00
println('============ END OF DEBUG TOKENS ==================')
break
}
}
}
fn (s mut Scanner) ignore_line() {
2019-12-03 11:08:57 +01:00
s.eat_to_end_of_line()
s.inc_line_number()
}
2019-12-18 06:13:31 +01:00
fn (s mut Scanner) eat_to_end_of_line() {
for s.pos < s.text.len && s.text[s.pos] != `\n` {
s.pos++
}
}
fn (s mut Scanner) inc_line_number() {
s.last_nl_pos = s.pos
s.line_nr++
2019-12-18 06:13:31 +01:00
s.line_ends << s.pos
2019-11-09 20:05:44 +01:00
if s.line_nr > s.nlines {
s.nlines = s.line_nr
}
}
fn (s Scanner) line(n int) string {
mut res := ''
2019-12-18 06:13:31 +01:00
if n >= 0 && n < s.line_ends.len {
2019-12-19 22:29:37 +01:00
nline_start := if n == 0 { 0 } else { s.line_ends[n - 1] }
2019-12-18 06:13:31 +01:00
nline_end := s.line_ends[n]
if nline_start <= nline_end {
res = s.text[nline_start..nline_end]
}
2019-12-03 11:08:57 +01:00
}
return res.trim_right('\r\n').trim_left('\r\n')
}
2019-06-22 20:20:28 +02:00
fn is_name_char(c byte) bool {
2019-12-20 21:15:16 +01:00
return c == `_` || c.is_letter()
2019-06-22 20:20:28 +02:00
}
2019-10-23 07:18:44 +02:00
[inline]
2019-07-24 00:06:48 +02:00
fn is_nl(c byte) bool {
return c == `\r` || c == `\n`
}
2019-08-04 09:36:21 +02:00
fn contains_capital(s string) bool {
2019-09-26 04:28:43 +02:00
for c in s {
2019-08-04 09:36:21 +02:00
if c >= `A` && c <= `Z` {
return true
}
}
return false
}
// HTTPRequest bad
2019-08-17 21:19:37 +02:00
// HttpRequest good
2019-08-04 09:36:21 +02:00
fn good_type_name(s string) bool {
if s.len < 4 {
2019-08-17 21:19:37 +02:00
return true
}
for i in 2 .. s.len {
2019-12-18 06:13:31 +01:00
if s[i].is_capital() && s[i - 1].is_capital() && s[i - 2].is_capital() {
2019-08-17 21:19:37 +02:00
return false
}
}
return true
}
// registration_date good
// registrationdate bad
fn (s &Scanner) validate_var_name(name string) {
2019-12-16 17:36:29 +01:00
if name.len > 15 && !name.contains('_') {
2019-12-18 06:13:31 +01:00
s.error('bad variable name `$name`\n' + 'looks like you have a multi-word name without separating them with `_`' + '\nfor example, use `registration_date` instead of `registrationdate` ')
2019-12-03 11:08:57 +01:00
}
}