v/vlib/v/scanner/scanner.v

1256 lines
30 KiB
V
Raw Normal View History

2020-01-23 21:04:46 +01:00
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
2019-12-22 02:34:37 +01:00
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module scanner
import os
import v.token
import v.pref
import v.util
2020-05-26 22:39:15 +02:00
import v.vmod
2019-12-22 02:34:37 +01:00
const (
single_quote = `\'`
double_quote = `"`
// char used as number separator
num_sep = `_`
2019-12-22 02:34:37 +01:00
)
pub struct Scanner {
2020-05-09 15:16:48 +02:00
pub mut:
file_path string
text string
pos int
line_nr int
last_nl_pos int // for calculating column
is_inside_string bool
is_inter_start bool // for hacky string interpolation TODO simplify
is_inter_end bool
is_debug bool
line_comment string
2019-12-28 09:43:22 +01:00
// prev_tok TokenKind
is_started bool
fn_name string // needed for @FN
2020-05-25 18:33:41 +02:00
mod_name string // needed for @MOD
struct_name string // needed for @STRUCT
2020-05-26 22:39:15 +02:00
vmod_file_content string // needed for @VMOD_FILE, contents of the file, *NOT its path*
is_print_line_on_error bool
is_print_colored_error bool
is_print_rel_paths_on_error bool
quote byte // which quote is used to denote current string: ' or "
line_ends []int // the positions of source lines ends (i.e. \n signs)
nr_lines int // total number of lines in the source file that were scanned
is_vh bool // Keep newlines
is_fmt bool // Used only for skipping ${} in strings, since we need literal
2019-12-22 02:34:37 +01:00
// string values when generating formatted code.
comments_mode CommentsMode
eofs int
2019-12-22 02:34:37 +01:00
}
2020-02-29 17:51:35 +01:00
pub enum CommentsMode {
skip_comments
parse_comments
}
2019-12-22 02:34:37 +01:00
// new scanner from file.
2020-02-29 17:51:35 +01:00
pub fn new_scanner_file(file_path string, comments_mode CommentsMode) &Scanner {
2019-12-22 02:34:37 +01:00
if !os.exists(file_path) {
verror("$file_path doesn't exist")
}
raw_text := util.read_file( file_path ) or {
verror(err)
return voidptr(0)
2019-12-22 02:34:37 +01:00
}
2020-02-29 17:51:35 +01:00
mut s := new_scanner(raw_text, comments_mode) // .skip_comments)
2019-12-28 09:43:22 +01:00
// s.init_fmt()
2019-12-22 02:34:37 +01:00
s.file_path = file_path
return s
}
// new scanner from string.
2020-02-29 17:51:35 +01:00
pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner {
s := &Scanner{
2019-12-22 02:34:37 +01:00
text: text
is_print_line_on_error: true
is_print_colored_error: true
is_print_rel_paths_on_error: true
is_fmt: util.is_fmt()
2020-02-29 17:51:35 +01:00
comments_mode: comments_mode
2019-12-22 02:34:37 +01:00
}
return s
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
fn (s &Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
return token.Token{
kind: tok_kind
lit: lit
line_nr: s.line_nr + 1
2020-04-10 00:09:34 +02:00
pos: s.pos - len + 1
len: len
}
2019-12-22 02:34:37 +01:00
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_name() string {
2019-12-22 02:34:37 +01:00
start := s.pos
2020-02-25 22:58:51 +01:00
s.pos++
for s.pos < s.text.len && (util.is_name_char(s.text[s.pos]) || s.text[s.pos].is_digit()) {
2019-12-22 02:34:37 +01:00
s.pos++
}
name := s.text[start..s.pos]
s.pos--
return name
}
// ident_fn_name look ahead and return name of function if possible, otherwise empty string
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_fn_name() string {
start := s.pos
mut pos := s.pos
pos++
if s.current_column() - 2 != 0 {
return s.fn_name
}
has_struct_name := s.struct_name != ''
if has_struct_name {
for pos < s.text.len && s.text[pos] != `(` {
pos++
}
if pos >= s.text.len {
return ''
}
pos++
}
for pos < s.text.len && s.text[pos] != `(` {
pos++
}
if pos >= s.text.len {
return ''
}
pos--
// Eat whitespaces
for pos > start && s.text[pos].is_space() {
pos--
}
if pos < start {
return ''
}
end_pos := pos + 1
pos--
// Search for the start position
for pos > start && util.is_func_char(s.text[pos]) {
pos--
}
pos++
start_pos := pos
if pos <= start || pos >= s.text.len {
return ''
}
if s.text[start_pos].is_digit() || end_pos > s.text.len || end_pos <= start_pos || end_pos <= start || start_pos < start {
return ''
}
fn_name := s.text[start_pos..end_pos]
return fn_name
}
2020-05-25 18:33:41 +02:00
// ident_mod_name look ahead and return name of module this file belongs to if possible, otherwise empty string
fn (mut s Scanner) ident_mod_name() string {
start := s.pos
mut pos := s.pos
pos++
// Eat whitespaces
for pos < s.text.len && s.text[pos].is_space() {
pos++
}
if pos >= s.text.len {
return ''
}
start_pos := pos
// Search for next occurrence of a whitespace or newline
for pos < s.text.len && !s.text[pos].is_space() && !util.is_nl(s.text[pos]) {
pos++
}
if pos >= s.text.len {
return ''
}
end_pos := pos
if end_pos > s.text.len || end_pos <= start_pos || end_pos <= start || start_pos <= start {
return ''
}
mod_name := s.text[start_pos..end_pos]
return mod_name
}
// ident_struct_name look ahead and return name of last encountered struct if possible, otherwise empty string
fn (mut s Scanner) ident_struct_name() string {
start := s.pos
mut pos := s.pos
// Return last known stuct_name encountered to avoid using high order/anonymous function definitions
if s.current_column() - 2 != 0 {
return s.struct_name
}
pos++
// Eat whitespaces
for pos < s.text.len && s.text[pos].is_space() {
pos++
}
if pos >= s.text.len {
return ''
}
// Return if `(` is not the first character after "fn ..."
if s.text[pos] != `(` {
return ''
}
// Search for closing parenthesis
for pos < s.text.len && s.text[pos] != `)` {
pos++
}
if pos >= s.text.len {
return ''
}
pos--
// Search backwards for end position of struct name
// Eat whitespaces
for pos > start && s.text[pos].is_space() {
pos--
}
if pos < start {
return ''
}
end_pos := pos + 1
// Go back while we have a name character or digit
for pos > start && (util.is_name_char(s.text[pos]) || s.text[pos].is_digit()) {
pos--
}
if pos < start {
return ''
}
start_pos := pos + 1
if s.text[start_pos].is_digit() || end_pos > s.text.len || end_pos <= start_pos || end_pos <= start || start_pos <= start {
return ''
}
struct_name := s.text[start_pos..end_pos]
return struct_name
}
fn filter_num_sep(txt byteptr, start int, end int) string {
2020-02-29 17:51:35 +01:00
unsafe{
2020-02-07 22:10:48 +01:00
mut b := malloc(end - start + 1) // add a byte for the endstring 0
mut i := start
mut i1 := 0
for i < end {
if txt[i] != num_sep {
2020-02-07 22:10:48 +01:00
b[i1] = txt[i]
i1++
}
i++
}
2020-02-07 22:10:48 +01:00
b[i1] = 0 // C string compatibility
return string(b,i1)
}
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_bin_number() string {
mut has_wrong_digit := false
mut first_wrong_digit_pos := 0
mut first_wrong_digit := `\0`
start_pos := s.pos
s.pos += 2 // skip '0b'
2020-02-25 22:58:51 +01:00
for s.pos < s.text.len {
c := s.text[s.pos]
if !c.is_bin_digit() && c != num_sep {
if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
break
2020-02-29 17:51:35 +01:00
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit_pos = s.pos
first_wrong_digit = c
}
}
s.pos++
}
if start_pos + 2 == s.pos {
s.pos-- // adjust error position
s.error('number part of this binary is not provided')
}
else if has_wrong_digit {
s.pos = first_wrong_digit_pos // adjust error position
s.error('this binary number has unsuitable digit `${first_wrong_digit.str()}`')
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
s.pos--
return number
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_hex_number() string {
mut has_wrong_digit := false
mut first_wrong_digit_pos := 0
mut first_wrong_digit := `\0`
2019-12-22 02:34:37 +01:00
start_pos := s.pos
s.pos += 2 // skip '0x'
2020-02-25 22:58:51 +01:00
for s.pos < s.text.len {
2019-12-22 02:34:37 +01:00
c := s.text[s.pos]
if !c.is_hex_digit() && c != num_sep {
if !c.is_letter() || s.is_inside_string {
break
2020-02-29 17:51:35 +01:00
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit_pos = s.pos
first_wrong_digit = c
}
2019-12-22 02:34:37 +01:00
}
s.pos++
}
if start_pos + 2 == s.pos {
s.pos-- // adjust error position
s.error('number part of this hexadecimal is not provided')
}
else if has_wrong_digit {
s.pos = first_wrong_digit_pos // adjust error position
s.error('this hexadecimal number has unsuitable digit `${first_wrong_digit.str()}`')
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
2019-12-22 02:34:37 +01:00
s.pos--
return number
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_oct_number() string {
mut has_wrong_digit := false
mut first_wrong_digit_pos := 0
mut first_wrong_digit := `\0`
2019-12-22 02:34:37 +01:00
start_pos := s.pos
2020-02-23 12:33:07 +01:00
s.pos += 2 // skip '0o'
2020-02-25 22:58:51 +01:00
for s.pos < s.text.len {
2019-12-22 02:34:37 +01:00
c := s.text[s.pos]
2020-02-23 12:33:07 +01:00
if !c.is_oct_digit() && c != num_sep {
if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
break
2020-02-29 17:51:35 +01:00
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit_pos = s.pos
first_wrong_digit = c
}
2019-12-22 02:34:37 +01:00
}
s.pos++
}
if start_pos + 2 == s.pos {
s.pos-- // adjust error position
s.error('number part of this octal is not provided')
}
else if has_wrong_digit {
s.pos = first_wrong_digit_pos // adjust error position
s.error('this octal number has unsuitable digit `${first_wrong_digit.str()}`')
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
2019-12-22 02:34:37 +01:00
s.pos--
return number
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_dec_number() string {
mut has_wrong_digit := false
mut first_wrong_digit_pos := 0
mut first_wrong_digit := `\0`
2019-12-22 02:34:37 +01:00
start_pos := s.pos
// scan integer part
for s.pos < s.text.len {
2020-02-25 22:58:51 +01:00
c := s.text[s.pos]
if !c.is_digit() && c != num_sep {
if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
break
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit_pos = s.pos
2020-02-25 22:58:51 +01:00
first_wrong_digit = c
}
}
2019-12-22 02:34:37 +01:00
s.pos++
}
2020-04-18 17:49:27 +02:00
mut call_method := false // true for, e.g., 5.str(), 5.5.str(), 5e5.str()
mut is_range := false // true for, e.g., 5..10
mut is_float_without_fraction := false // true for, e.g. 5.
2019-12-22 02:34:37 +01:00
// scan fractional part
if s.pos < s.text.len && s.text[s.pos] == `.` {
s.pos++
if s.pos < s.text.len {
2020-04-18 17:49:27 +02:00
// 5.5, 5.5.str()
if s.text[s.pos].is_digit() {
for s.pos < s.text.len {
c := s.text[s.pos]
if !c.is_digit() {
if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
2020-04-18 17:49:27 +02:00
// 5.5.str()
if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
call_method = true
}
break
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit_pos = s.pos
first_wrong_digit = c
}
}
s.pos++
}
}
2020-04-18 17:49:27 +02:00
else if s.text[s.pos] == `.` {
// 5.. (a range)
2020-04-18 17:49:27 +02:00
is_range = true
s.pos--
}
else if s.text[s.pos] in [`e`, `E`] {
2020-04-18 17:49:27 +02:00
// 5.e5
}
2020-04-18 17:49:27 +02:00
else if s.text[s.pos].is_letter() {
// 5.str()
call_method = true
s.pos--
}
2020-04-18 17:49:27 +02:00
else if s.text[s.pos] != `)` {
// 5.
is_float_without_fraction = true
2020-04-18 17:49:27 +02:00
s.pos--
}
}
2019-12-22 02:34:37 +01:00
}
// scan exponential part
2020-04-18 17:49:27 +02:00
mut has_exp := false
if s.pos < s.text.len && s.text[s.pos] in [`e`, `E`] {
has_exp = true
s.pos++
if s.pos < s.text.len && s.text[s.pos] in [`-`, `+`] {
2020-02-17 02:35:01 +01:00
s.pos++
}
for s.pos < s.text.len {
2020-02-25 22:58:51 +01:00
c := s.text[s.pos]
if !c.is_digit() {
if !c.is_letter() || s.is_inside_string {
2020-04-18 17:49:27 +02:00
// 5e5.str()
if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
call_method = true
}
break
}
else if !has_wrong_digit {
has_wrong_digit = true
first_wrong_digit_pos = s.pos
2020-02-25 22:58:51 +01:00
first_wrong_digit = c
}
}
2019-12-22 02:34:37 +01:00
s.pos++
}
2020-04-18 17:49:27 +02:00
}
if has_wrong_digit {
// error check: wrong digit
s.pos = first_wrong_digit_pos // adjust error position
2020-04-18 17:49:27 +02:00
s.error('this number has unsuitable digit `${first_wrong_digit.str()}`')
}
else if s.text[s.pos - 1] in [`e`, `E`] {
// error check: 5e
s.pos-- // adjust error position
2020-04-18 17:49:27 +02:00
s.error('exponent has no digits')
2019-12-22 02:34:37 +01:00
}
2020-04-18 17:49:27 +02:00
else if s.pos < s.text.len && s.text[s.pos] == `.` && !is_range && !is_float_without_fraction && !call_method {
// error check: 1.23.4, 123.e+3.4
2020-04-18 17:49:27 +02:00
if has_exp {
2019-12-22 02:34:37 +01:00
s.error('exponential part should be integer')
}
else {
s.error('too many decimal points in number')
}
}
number := filter_num_sep(s.text.str, start_pos, s.pos)
2019-12-22 02:34:37 +01:00
s.pos--
return number
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_number() string {
if s.expect('0b', s.pos) {
return s.ident_bin_number()
}
else if s.expect('0x', s.pos) {
2019-12-22 02:34:37 +01:00
return s.ident_hex_number()
}
else if s.expect('0o', s.pos) {
2020-02-23 12:33:07 +01:00
return s.ident_oct_number()
}
else {
2019-12-22 02:34:37 +01:00
return s.ident_dec_number()
}
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) skip_whitespace() {
2019-12-22 02:34:37 +01:00
// if s.is_vh { println('vh') return }
for s.pos < s.text.len && s.text[s.pos].is_space() {
if util.is_nl(s.text[s.pos]) && s.is_vh {
2019-12-22 02:34:37 +01:00
return
}
// Count \r\n as one line
if util.is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos - 1) {
2019-12-22 02:34:37 +01:00
s.inc_line_number()
}
s.pos++
}
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) end_of_file() token.Token {
s.eofs++
if s.eofs > 50 {
s.line_nr--
s.error('the end of file `$s.file_path` has been reached 50 times already, the v parser is probably stuck.\n' +
'This should not happen. Please report the bug here, and include the last 2-3 lines of your source code:\n' +
'https://github.com/vlang/v/issues/new?labels=Bug&template=bug_report.md'
)
}
if s.pos != s.text.len && s.eofs == 1 {
s.inc_line_number()
}
2019-12-22 02:34:37 +01:00
s.pos = s.text.len
2020-04-10 00:09:34 +02:00
return s.new_token(.eof, '', 1)
2019-12-22 02:34:37 +01:00
}
2020-05-17 13:51:18 +02:00
pub fn (mut s Scanner) scan() token.Token {
2020-02-29 17:51:35 +01:00
// if s.comments_mode == .parse_comments {
// println('\nscan()')
// }
2019-12-22 02:34:37 +01:00
// if s.line_comment != '' {
// s.fgenln('// LC "$s.line_comment"')
// s.line_comment = ''
// }
if s.is_started {
2019-12-22 02:34:37 +01:00
s.pos++
}
s.is_started = true
2019-12-22 02:34:37 +01:00
if s.pos >= s.text.len {
return s.end_of_file()
}
if !s.is_inside_string {
2019-12-22 02:34:37 +01:00
s.skip_whitespace()
}
// End of $var, start next string
if s.is_inter_end {
2019-12-22 02:34:37 +01:00
if s.text[s.pos] == s.quote {
s.is_inter_end = false
2020-04-10 00:09:34 +02:00
return s.new_token(.string, '', 1)
2019-12-22 02:34:37 +01:00
}
s.is_inter_end = false
2020-04-10 00:09:34 +02:00
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
2019-12-22 02:34:37 +01:00
}
s.skip_whitespace()
// end of file
if s.pos >= s.text.len {
return s.end_of_file()
}
// handle each char
c := s.text[s.pos]
2020-04-03 08:46:54 +02:00
nextc := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
2019-12-22 02:34:37 +01:00
// name or keyword
if util.is_name_char(c) {
2019-12-22 02:34:37 +01:00
name := s.ident_name()
// tmp hack to detect . in ${}
// Check if not .eof to prevent panic
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
2020-06-06 11:49:50 +02:00
kind := token.keywords[name]
if kind != .unknown {
if kind == .key_fn {
s.struct_name = s.ident_struct_name()
s.fn_name = s.ident_fn_name()
2020-05-25 18:33:41 +02:00
} else if kind == .key_module {
s.mod_name = s.ident_mod_name()
}
return s.new_token(kind, name, name.len)
2019-12-22 02:34:37 +01:00
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
if s.is_inside_string {
2019-12-22 02:34:37 +01:00
if next_char == s.quote {
s.is_inter_end = true
s.is_inter_start = false
s.is_inside_string = false
2019-12-22 02:34:37 +01:00
}
}
// end of `$expr`
// allow `'$a.b'` and `'$a.c()'`
if s.is_inter_start && next_char != `.` && next_char != `(` {
s.is_inter_end = true
s.is_inter_start = false
2019-12-22 02:34:37 +01:00
}
if s.pos == 0 && next_char == ` ` {
// If a single letter name at the start of the file, increment
// Otherwise the scanner would be stuck at s.pos = 0
s.pos++
}
2020-04-10 00:09:34 +02:00
return s.new_token(.name, name, name.len)
2019-12-22 02:34:37 +01:00
}
else if c.is_digit() || (c == `.` && nextc.is_digit()) {
// `123`, `.123`
if !s.is_inside_string {
2020-02-29 17:51:35 +01:00
// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
mut start_pos := s.pos
for start_pos < s.text.len && s.text[start_pos] == `0` {
start_pos++
}
2020-02-29 17:51:35 +01:00
mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
// for 0b, 0o, 0x the heading zero shouldn't be jumped
if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
prefix_zero_num--
}
2020-02-29 17:51:35 +01:00
s.pos += prefix_zero_num // jump these zeros
}
2019-12-22 02:34:37 +01:00
num := s.ident_number()
2020-04-10 00:09:34 +02:00
return s.new_token(.number, num, num.len)
2019-12-22 02:34:37 +01:00
}
// Handle `'$fn()'`
if c == `)` && s.is_inter_start {
s.is_inter_end = true
s.is_inter_start = false
2019-12-22 02:34:37 +01:00
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
if next_char == s.quote {
s.is_inside_string = false
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.rpar, '', 1)
2019-12-22 02:34:37 +01:00
}
// all other tokens
match c {
`+` {
if nextc == `+` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.inc, '', 2)
2019-12-22 02:34:37 +01:00
}
else if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.plus_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.plus, '', 1)
2019-12-22 02:34:37 +01:00
}
`-` {
if nextc == `-` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.dec, '', 2)
2019-12-22 02:34:37 +01:00
}
else if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.minus_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.minus, '', 1)
2019-12-22 02:34:37 +01:00
}
`*` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.mult_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.mul, '', 1)
2019-12-22 02:34:37 +01:00
}
`^` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.xor_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.xor, '', 1)
2019-12-22 02:34:37 +01:00
}
`%` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.mod_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.mod, '', 1)
2019-12-22 02:34:37 +01:00
}
`?` {
2020-04-10 00:09:34 +02:00
return s.new_token(.question, '', 1)
2019-12-22 02:34:37 +01:00
}
single_quote, double_quote {
2020-04-10 00:09:34 +02:00
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
2019-12-22 02:34:37 +01:00
}
`\`` {
// ` // apostrophe balance comment. do not remove
2020-04-10 00:09:34 +02:00
ident_char := s.ident_char()
return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
2019-12-22 02:34:37 +01:00
}
`(` {
2020-04-10 00:09:34 +02:00
return s.new_token(.lpar, '', 1)
2019-12-22 02:34:37 +01:00
}
`)` {
2020-04-10 00:09:34 +02:00
return s.new_token(.rpar, '', 1)
2019-12-22 02:34:37 +01:00
}
`[` {
2020-04-10 00:09:34 +02:00
return s.new_token(.lsbr, '', 1)
2019-12-22 02:34:37 +01:00
}
`]` {
2020-04-10 00:09:34 +02:00
return s.new_token(.rsbr, '', 1)
2019-12-22 02:34:37 +01:00
}
`{` {
// Skip { in `${` in strings
if s.is_inside_string {
2019-12-22 02:34:37 +01:00
return s.scan()
}
2020-04-10 00:09:34 +02:00
return s.new_token(.lcbr, '', 1)
2019-12-22 02:34:37 +01:00
}
`$` {
if s.is_inside_string {
2020-04-10 00:09:34 +02:00
return s.new_token(.str_dollar, '', 1)
2019-12-22 02:34:37 +01:00
}
else {
2020-04-10 00:09:34 +02:00
return s.new_token(.dollar, '', 1)
2019-12-22 02:34:37 +01:00
}
}
`}` {
// s = `hello $name !`
// s = `hello ${name} !`
if s.is_inside_string {
2019-12-22 02:34:37 +01:00
s.pos++
if s.text[s.pos] == s.quote {
s.is_inside_string = false
2020-04-10 00:09:34 +02:00
return s.new_token(.string, '', 1)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
2019-12-22 02:34:37 +01:00
}
else {
2020-04-10 00:09:34 +02:00
return s.new_token(.rcbr, '', 1)
2019-12-22 02:34:37 +01:00
}
}
`&` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.and_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
if nextc == `&` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.and, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.amp, '', 1)
2019-12-22 02:34:37 +01:00
}
`|` {
if nextc == `|` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.logical_or, '', 2)
2019-12-22 02:34:37 +01:00
}
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.or_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.pipe, '', 1)
2019-12-22 02:34:37 +01:00
}
`,` {
2020-04-10 00:09:34 +02:00
return s.new_token(.comma, '', 1)
2019-12-22 02:34:37 +01:00
}
`@` {
s.pos++
name := s.ident_name()
// @FN => will be substituted with the name of the current V function
2020-05-25 18:33:41 +02:00
// @MOD => will be substituted with the name of the current V module
// @STRUCT => will be substituted with the name of the current V struct
2020-03-28 21:51:45 +01:00
// @VEXE => will be substituted with the path to the V compiler
2019-12-22 02:34:37 +01:00
// @FILE => will be substituted with the path of the V source file
// @LINE => will be substituted with the V line number where it appears (as a string).
// @COLUMN => will be substituted with the column where it appears (as a string).
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
2020-05-26 22:39:15 +02:00
// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
2019-12-22 02:34:37 +01:00
// This allows things like this:
2020-05-25 18:33:41 +02:00
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
2019-12-22 02:34:37 +01:00
// ... which is useful while debugging/tracing
if name == 'FN' {
2020-04-10 00:09:34 +02:00
return s.new_token(.string, s.fn_name, 3)
2019-12-22 02:34:37 +01:00
}
2020-05-25 18:33:41 +02:00
if name == 'MOD' {
return s.new_token(.string, s.mod_name, 4)
}
if name == 'STRUCT' {
return s.new_token(.string, s.struct_name, 7)
}
2020-03-28 21:51:45 +01:00
if name == 'VEXE' {
vexe := pref.vexe_path()
return s.new_token(.string, util.cescaped_path(vexe), 5)
2020-03-28 21:51:45 +01:00
}
2019-12-22 02:34:37 +01:00
if name == 'FILE' {
return s.new_token(.string, util.cescaped_path(os.real_path(s.file_path)), 5)
2019-12-22 02:34:37 +01:00
}
if name == 'LINE' {
2020-04-10 00:09:34 +02:00
return s.new_token(.string, (s.line_nr + 1).str(), 5)
2019-12-22 02:34:37 +01:00
}
if name == 'COLUMN' {
2020-04-10 00:09:34 +02:00
return s.new_token(.string, s.current_column().str(), 7)
2019-12-22 02:34:37 +01:00
}
if name == 'VHASH' {
2020-04-10 00:09:34 +02:00
return s.new_token(.string, util.vhash(), 6)
2019-12-22 02:34:37 +01:00
}
2020-05-26 22:39:15 +02:00
if name == 'VMOD_FILE' {
if s.vmod_file_content.len == 0 {
mcache := vmod.get_cache()
vmod_file_location := mcache.get_by_file( s.file_path )
2020-05-26 22:39:15 +02:00
if vmod_file_location.vmod_file.len == 0 {
s.error('@VMOD_FILE can be used only in projects, that have v.mod file')
}
vmod_content := os.read_file(vmod_file_location.vmod_file) or {''}
2020-05-27 03:32:39 +02:00
$if windows {
s.vmod_file_content = vmod_content.replace('\r\n', '\n')
} $else {
s.vmod_file_content = vmod_content
}
2020-05-26 22:39:15 +02:00
}
return s.new_token(.string, s.vmod_file_content, 10)
}
2019-12-22 02:34:37 +01:00
if !token.is_key(name) {
s.error('@ must be used before keywords (e.g. `@type string`)')
}
2020-04-10 00:09:34 +02:00
return s.new_token(.name, name, name.len)
2019-12-22 02:34:37 +01:00
}
/*
case `\r`:
if nextc == `\n` {
s.pos++
s.last_nl_pos = s.pos
2020-04-02 09:27:00 +02:00
return s.new_token(.nl, '')
2019-12-22 02:34:37 +01:00
}
}
case `\n`:
s.last_nl_pos = s.pos
2020-04-02 09:27:00 +02:00
return s.new_token(.nl, '')
2019-12-22 02:34:37 +01:00
}
*/
`.` {
if nextc == `.` {
s.pos++
if s.text[s.pos + 1] == `.` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.ellipsis, '', 3)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.dotdot, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-10 00:09:34 +02:00
return s.new_token(.dot, '', 1)
2019-12-22 02:34:37 +01:00
}
`#` {
start := s.pos + 1
s.ignore_line()
if nextc == `!` {
// treat shebang line (#!) as a comment
s.line_comment = s.text[start + 1..s.pos].trim_space()
// s.fgenln('// shebang line "$s.line_comment"')
return s.scan()
}
2020-04-10 00:09:34 +02:00
hash := s.text[start..s.pos].trim_space()
return s.new_token(.hash, hash, hash.len)
2019-12-22 02:34:37 +01:00
}
`>` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.ge, '', 2)
2019-12-22 02:34:37 +01:00
}
else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
2020-04-10 00:09:34 +02:00
return s.new_token(.right_shift_assign, '', 3)
2019-12-22 02:34:37 +01:00
}
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.right_shift, '', 2)
2019-12-22 02:34:37 +01:00
}
else {
2020-04-10 00:09:34 +02:00
return s.new_token(.gt, '', 1)
2019-12-22 02:34:37 +01:00
}
}
0xE2 {
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
// case `≠`:
2019-12-22 02:34:37 +01:00
s.pos += 2
2020-04-10 00:09:34 +02:00
return s.new_token(.ne, '', 3)
2019-12-22 02:34:37 +01:00
}
else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
s.pos += 2
2020-04-10 00:09:34 +02:00
return s.new_token(.le, '', 3)
2019-12-22 02:34:37 +01:00
}
else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
2020-04-10 00:09:34 +02:00
return s.new_token(.ge, '', 3)
2019-12-22 02:34:37 +01:00
}
}
`<` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.le, '', 2)
2019-12-22 02:34:37 +01:00
}
else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
2020-04-10 00:09:34 +02:00
return s.new_token(.left_shift_assign, '', 3)
2019-12-22 02:34:37 +01:00
}
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.left_shift, '', 2)
2019-12-22 02:34:37 +01:00
}
else {
2020-04-10 00:09:34 +02:00
return s.new_token(.lt, '', 1)
2019-12-22 02:34:37 +01:00
}
}
`=` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.eq, '', 2)
2019-12-22 02:34:37 +01:00
}
else if nextc == `>` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.arrow, '', 2)
2019-12-22 02:34:37 +01:00
}
else {
2020-04-10 00:09:34 +02:00
return s.new_token(.assign, '', 1)
2019-12-22 02:34:37 +01:00
}
}
`:` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.decl_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
else {
2020-04-10 00:09:34 +02:00
return s.new_token(.colon, '', 1)
2019-12-22 02:34:37 +01:00
}
}
`;` {
2020-04-10 00:09:34 +02:00
return s.new_token(.semicolon, '', 1)
2019-12-22 02:34:37 +01:00
}
`!` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.ne, '', 2)
2019-12-22 02:34:37 +01:00
}
2020-04-12 07:30:26 +02:00
else if nextc == `i` && s.text[s.pos+2] == `n` && s.text[s.pos+3].is_space() {
2020-04-11 21:31:54 +02:00
s.pos += 2
return s.new_token(.not_in, '', 3)
}
2020-06-02 16:18:12 +02:00
else if nextc == `i` && s.text[s.pos+2] == `s` && s.text[s.pos+3].is_space() {
s.pos += 2
return s.new_token(.not_is, '', 3)
}
//
2019-12-22 02:34:37 +01:00
else {
2020-04-10 00:09:34 +02:00
return s.new_token(.not, '', 1)
2019-12-22 02:34:37 +01:00
}
}
`~` {
2020-04-10 00:09:34 +02:00
return s.new_token(.bit_not, '', 1)
2019-12-22 02:34:37 +01:00
}
`/` {
if nextc == `=` {
s.pos++
2020-04-10 00:09:34 +02:00
return s.new_token(.div_assign, '', 2)
2019-12-22 02:34:37 +01:00
}
if nextc == `/` {
start := s.pos + 1
s.ignore_line()
s.line_comment = s.text[start + 1..s.pos]
2020-04-11 04:09:41 +02:00
mut comment := s.line_comment.trim_space()
s.pos--
// fix line_nr, \n was read, and the comment is marked
// on the next line
s.line_nr--
2020-02-29 17:51:35 +01:00
if s.comments_mode == .parse_comments {
2020-04-05 16:38:29 +02:00
// Find out if this comment is on its own line (for vfmt)
mut is_separate_line_comment := true
2020-04-05 18:03:36 +02:00
for j := start-2; j >= 0 && s.text[j] != `\n`; j-- {
if s.text[j] !in [`\t`, ` `] {
2020-04-05 16:38:29 +02:00
is_separate_line_comment = false
}
}
if is_separate_line_comment {
comment = '|' + comment
}
2020-04-10 00:09:34 +02:00
return s.new_token(.comment, comment, comment.len + 2)
2019-12-22 02:34:37 +01:00
}
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token)
return s.scan()
}
// Multiline comments
if nextc == `*` {
start := s.pos + 2
2019-12-22 02:34:37 +01:00
mut nest_count := 1
// Skip comment
for nest_count > 0 {
s.pos++
if s.pos >= s.text.len {
s.line_nr--
s.error('comment not terminated')
}
if s.text[s.pos] == `\n` {
s.inc_line_number()
continue
}
if s.expect('/*', s.pos) {
nest_count++
continue
}
if s.expect('*/', s.pos) {
nest_count--
}
}
s.pos++
if s.comments_mode == .parse_comments {
comment := s.text[start..(s.pos - 1)].trim_space()
2020-04-10 00:09:34 +02:00
return s.new_token(.comment, comment, comment.len + 4)
2019-12-22 02:34:37 +01:00
}
// Skip if not in fmt mode
return s.scan()
}
2020-04-10 00:09:34 +02:00
return s.new_token(.div, '', 1)
2019-12-22 02:34:37 +01:00
}
2019-12-28 09:43:22 +01:00
else {}
}
2019-12-22 02:34:37 +01:00
$if windows {
if c == `\0` {
return s.end_of_file()
}
}
s.error('invalid character `${c.str()}`')
return s.end_of_file()
}
fn (s &Scanner) current_column() int {
return s.pos - s.last_nl_pos
}
fn (s &Scanner) count_symbol_before(p int, sym byte) int {
2019-12-22 02:34:37 +01:00
mut count := 0
for i := p; i >= 0; i-- {
if s.text[i] != sym {
break
}
count++
}
return count
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_string() string {
2019-12-22 02:34:37 +01:00
q := s.text[s.pos]
is_quote := q == single_quote || q == double_quote
is_raw := is_quote && s.pos > 0 && s.text[s.pos - 1] == `r`
if is_quote && !s.is_inside_string {
2019-12-22 02:34:37 +01:00
s.quote = q
}
// if s.file_path.contains('string_test') {
// println('\nident_string() at char=${s.text[s.pos].str()}')
// println('linenr=$s.line_nr quote= $qquote ${qquote.str()}')
// }
mut start := s.pos
s.is_inside_string = false
2019-12-22 02:34:37 +01:00
slash := `\\`
for {
s.pos++
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
prevc := s.text[s.pos - 1]
// end of string
if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
// handle '123\\' slash at the end
break
}
if c == `\n` {
s.inc_line_number()
}
// Don't allow \0
if c == `0` && s.pos > 2 && s.text[s.pos - 1] == slash {
2019-12-28 09:43:22 +01:00
if s.pos < s.text.len - 1 && s.text[s.pos + 1].is_digit() {}
2019-12-22 02:34:37 +01:00
else {
s.error('0 character in a string literal')
}
}
// Don't allow \x00
if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
s.error('0 character in a string literal')
}
// ${var} (ignore in vfmt mode)
if c == `{` && prevc == `$` && !is_raw && !s.is_fmt && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
s.is_inside_string = true
2019-12-22 02:34:37 +01:00
// so that s.pos points to $ at the next step
s.pos -= 2
break
}
// $var
if util.is_name_char(c) && prevc == `$` && !s.is_fmt && !is_raw && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
s.is_inside_string = true
s.is_inter_start = true
2019-12-22 02:34:37 +01:00
s.pos -= 2
break
}
}
mut lit := ''
if s.text[start] == s.quote {
start++
}
mut end := s.pos
if s.is_inside_string {
2019-12-22 02:34:37 +01:00
end++
}
if start <= s.pos {
if s.text[start..end].contains('\\\n') {
lit = trim_slash_line_break(s.text[start..end])
} else {
lit = s.text[start..end]
}
2019-12-22 02:34:37 +01:00
}
return lit
}
fn trim_slash_line_break(s string) string {
mut start := 0
mut ret_str := s
for {
idx := ret_str.index_after('\\\n', start)
if idx != -1 {
ret_str = ret_str[..idx] + ret_str[idx+2..].trim_left(' \n\t\v\f\r')
start = idx
} else {
break
}
}
return ret_str
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ident_char() string {
2019-12-22 02:34:37 +01:00
start := s.pos
slash := `\\`
mut len := 0
for {
s.pos++
if s.pos >= s.text.len {
break
}
if s.text[s.pos] != slash {
len++
}
double_slash := s.expect('\\\\', s.pos - 2)
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
// ` // apostrophe balance comment. do not remove
if double_slash {
len++
}
break
}
}
len--
c := s.text[start + 1..s.pos]
if len != 1 {
u := c.ustring()
if u.len != 1 {
s.error('invalid character literal (more than one character)\n' + 'use quotes for strings, backticks for characters')
}
}
// Escapes a `'` character
return if c == "\'" { '\\' + c } else { c }
}
fn (s &Scanner) expect(want string, start_pos int) bool {
end_pos := start_pos + want.len
if start_pos < 0 || start_pos >= s.text.len {
return false
}
if end_pos < 0 || end_pos > s.text.len {
return false
}
for pos in start_pos .. end_pos {
if s.text[pos] != want[pos - start_pos] {
return false
}
}
return true
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) debug_tokens() {
2019-12-22 02:34:37 +01:00
s.pos = 0
s.is_started = false
s.is_debug = true
fname := s.file_path.all_after_last(os.path_separator)
2019-12-22 02:34:37 +01:00
println('\n===DEBUG TOKENS $fname===')
for {
tok := s.scan()
tok_kind := tok.kind
lit := tok.lit
print(tok_kind.str())
2019-12-22 02:34:37 +01:00
if lit != '' {
println(' `$lit`')
}
else {
println('')
}
if tok_kind == .eof {
2019-12-22 02:34:37 +01:00
println('============ END OF DEBUG TOKENS ==================')
break
}
}
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) ignore_line() {
2019-12-22 02:34:37 +01:00
s.eat_to_end_of_line()
s.inc_line_number()
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) eat_to_end_of_line() {
2019-12-22 02:34:37 +01:00
for s.pos < s.text.len && s.text[s.pos] != `\n` {
s.pos++
}
}
2020-05-17 13:51:18 +02:00
fn (mut s Scanner) inc_line_number() {
2019-12-22 02:34:37 +01:00
s.last_nl_pos = s.pos
s.line_nr++
s.line_ends << s.pos
if s.line_nr > s.nr_lines {
s.nr_lines = s.line_nr
}
}
pub fn (s &Scanner) error(msg string) {
pos := token.Position{
line_nr: s.line_nr
pos: s.pos
}
eprintln(util.formatted_error('error:', msg, s.file_path, pos))
2019-12-22 02:34:37 +01:00
exit(1)
}
pub fn verror(s string) {
util.verror('scanner error', s)
2019-12-22 02:34:37 +01:00
}
pub fn (mut s Scanner) codegen(newtext string) {
// codegen makes sense only during normal compilation
// feeding code generated V code to vfmt or vdoc will
// cause them to output/document ephemeral stuff.
if s.comments_mode == .skip_comments {
s.text += newtext
$if debug_codegen ? {
eprintln('scanner.codegen:\n $newtext')
}
}
}