2020-01-23 21:04:46 +01:00
|
|
|
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
|
2019-12-22 02:34:37 +01:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
|
|
|
module scanner
|
|
|
|
|
2020-04-26 07:35:59 +02:00
|
|
|
import os
|
|
|
|
import v.token
|
|
|
|
import v.pref
|
|
|
|
import v.util
|
2020-05-26 22:39:15 +02:00
|
|
|
import v.vmod
|
2019-12-22 02:34:37 +01:00
|
|
|
|
|
|
|
const (
|
|
|
|
single_quote = `\'`
|
|
|
|
double_quote = `"`
|
2020-07-04 15:14:30 +02:00
|
|
|
// char used as number separator
|
|
|
|
num_sep = `_`
|
2019-12-22 02:34:37 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
pub struct Scanner {
|
2020-05-09 15:16:48 +02:00
|
|
|
pub mut:
|
2020-04-02 12:23:18 +02:00
|
|
|
file_path string
|
|
|
|
text string
|
|
|
|
pos int
|
|
|
|
line_nr int
|
|
|
|
last_nl_pos int // for calculating column
|
|
|
|
is_inside_string bool
|
|
|
|
is_inter_start bool // for hacky string interpolation TODO simplify
|
|
|
|
is_inter_end bool
|
|
|
|
is_debug bool
|
|
|
|
line_comment string
|
2019-12-28 09:43:22 +01:00
|
|
|
// prev_tok TokenKind
|
2020-04-02 12:23:18 +02:00
|
|
|
is_started bool
|
|
|
|
fn_name string // needed for @FN
|
2020-05-25 18:33:41 +02:00
|
|
|
mod_name string // needed for @MOD
|
2020-05-26 15:35:11 +02:00
|
|
|
struct_name string // needed for @STRUCT
|
2020-05-26 22:39:15 +02:00
|
|
|
vmod_file_content string // needed for @VMOD_FILE, contents of the file, *NOT its path*
|
2020-04-02 12:23:18 +02:00
|
|
|
is_print_line_on_error bool
|
|
|
|
is_print_colored_error bool
|
|
|
|
is_print_rel_paths_on_error bool
|
|
|
|
quote byte // which quote is used to denote current string: ' or "
|
|
|
|
line_ends []int // the positions of source lines ends (i.e. \n signs)
|
|
|
|
nr_lines int // total number of lines in the source file that were scanned
|
|
|
|
is_vh bool // Keep newlines
|
|
|
|
is_fmt bool // Used only for skipping ${} in strings, since we need literal
|
2019-12-22 02:34:37 +01:00
|
|
|
// string values when generating formatted code.
|
2020-04-02 12:23:18 +02:00
|
|
|
comments_mode CommentsMode
|
2020-06-06 17:47:16 +02:00
|
|
|
is_inside_toplvl_statement bool = false // *only* used in comments_mode: .toplevel_comments, toggled by parser
|
|
|
|
all_tokens []token.Token // *only* used in comments_mode: .toplevel_comments, contains all tokens
|
|
|
|
tidx int
|
2020-05-28 18:22:11 +02:00
|
|
|
eofs int
|
2020-07-11 11:41:39 +02:00
|
|
|
pref &pref.Preferences
|
2020-07-19 19:58:34 +02:00
|
|
|
vet_errors &[]string
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
|
2020-06-06 17:47:16 +02:00
|
|
|
/*
|
|
|
|
How the .toplevel_comments mode works:
|
2020-02-29 17:51:35 +01:00
|
|
|
|
2020-06-06 17:47:16 +02:00
|
|
|
In this mode, the scanner scans *everything* at once, before parsing starts,
|
|
|
|
including all the comments, and stores the results in an buffer s.all_tokens.
|
|
|
|
|
|
|
|
Then .scan() just returns s.all_tokens[ s.tidx++ ] *ignoring* the
|
|
|
|
comment tokens. In other words, by default in this mode, the parser
|
|
|
|
*will not see any comments* inside top level statements, so it has
|
|
|
|
no reason to complain about them.
|
|
|
|
|
|
|
|
When the parser determines, that it is outside of a top level statement,
|
|
|
|
it tells the scanner to backtrack s.tidx to the current p.tok index,
|
|
|
|
then it changes .is_inside_toplvl_statement to false , and refills its
|
|
|
|
lookahead buffer (i.e. p.peek_tok, p.peek_tok2, p.peek_tok3) from the
|
|
|
|
scanner.
|
|
|
|
|
|
|
|
In effect, from the parser's point of view, the next tokens, that it will
|
|
|
|
receive with p.next(), will be the same, as if comments are not ignored
|
|
|
|
anymore, *between* top level statements.
|
|
|
|
|
|
|
|
When the parser determines, that it is going again inside a top level
|
|
|
|
statement, it does the same, this time setting .is_inside_toplvl_statement
|
|
|
|
to true, again refilling the lookahead buffer => calling .next() in this
|
|
|
|
mode, will again ignore all the comment tokens, till the top level statement
|
|
|
|
is finished.
|
|
|
|
*/
|
|
|
|
// The different kinds of scanner modes:
|
|
|
|
//
|
|
|
|
// .skip_comments - simplest/fastest, just ignores all comments early.
|
|
|
|
// This mode is used by the compiler itself.
|
|
|
|
//
|
|
|
|
// .parse_comments is used by vfmt. Ideally it should handle inline /* */
|
|
|
|
// comments too, i.e. it returns every kind of comment as a new token.
|
|
|
|
//
|
|
|
|
// .toplevel_comments is used by vdoc, parses *only* top level ones
|
|
|
|
// that are *outside* structs/enums/fns.
|
2020-02-29 17:51:35 +01:00
|
|
|
pub enum CommentsMode {
|
|
|
|
skip_comments
|
|
|
|
parse_comments
|
2020-06-06 17:47:16 +02:00
|
|
|
toplevel_comments
|
2020-02-29 17:51:35 +01:00
|
|
|
}
|
|
|
|
|
2019-12-22 02:34:37 +01:00
|
|
|
// new scanner from file.
|
2020-07-11 11:41:39 +02:00
|
|
|
pub fn new_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
|
2020-07-19 19:58:34 +02:00
|
|
|
return new_vet_scanner_file(file_path, comments_mode, pref, voidptr(0))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn new_vet_scanner_file(file_path string, comments_mode CommentsMode, pref &pref.Preferences, vet_errors &[]string) &Scanner {
|
2019-12-22 02:34:37 +01:00
|
|
|
if !os.exists(file_path) {
|
|
|
|
verror("$file_path doesn't exist")
|
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
raw_text := util.read_file(file_path) or {
|
2020-04-06 18:39:58 +02:00
|
|
|
verror(err)
|
2020-05-27 05:42:48 +02:00
|
|
|
return voidptr(0)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-19 19:58:34 +02:00
|
|
|
mut s := new_vet_scanner(raw_text, comments_mode, pref, vet_errors)
|
2019-12-22 02:34:37 +01:00
|
|
|
s.file_path = file_path
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
// new scanner from string.
|
2020-07-11 11:41:39 +02:00
|
|
|
pub fn new_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences) &Scanner {
|
2020-07-19 19:58:34 +02:00
|
|
|
return new_vet_scanner(text, comments_mode, pref, voidptr(0))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn new_vet_scanner(text string, comments_mode CommentsMode, pref &pref.Preferences, vet_errors &[]string) &Scanner {
|
2020-07-11 11:41:39 +02:00
|
|
|
is_fmt := pref.is_fmt
|
2020-07-25 14:57:57 +02:00
|
|
|
mut s := &Scanner{
|
2020-07-11 11:41:39 +02:00
|
|
|
pref: pref
|
2019-12-22 02:34:37 +01:00
|
|
|
text: text
|
2020-04-02 12:23:18 +02:00
|
|
|
is_print_line_on_error: true
|
|
|
|
is_print_colored_error: true
|
|
|
|
is_print_rel_paths_on_error: true
|
2020-06-18 18:48:23 +02:00
|
|
|
is_fmt: is_fmt
|
2020-02-29 17:51:35 +01:00
|
|
|
comments_mode: comments_mode
|
2020-07-19 19:58:34 +02:00
|
|
|
vet_errors: vet_errors
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-25 14:57:57 +02:00
|
|
|
s.file_path = 'internal_memory'
|
2020-04-27 15:08:04 +02:00
|
|
|
return s
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
|
2020-06-06 17:47:16 +02:00
|
|
|
[inline]
|
|
|
|
fn (s &Scanner) should_parse_comment() bool {
|
2020-07-04 15:14:30 +02:00
|
|
|
res := (s.comments_mode == .parse_comments) ||
|
|
|
|
(s.comments_mode == .toplevel_comments && !s.is_inside_toplvl_statement)
|
2020-06-06 17:47:16 +02:00
|
|
|
return res
|
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
|
2020-06-06 17:47:16 +02:00
|
|
|
// NB: this is called by v's parser
|
|
|
|
pub fn (mut s Scanner) set_is_inside_toplevel_statement(newstate bool) {
|
|
|
|
s.is_inside_toplvl_statement = newstate
|
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
|
2020-06-06 17:47:16 +02:00
|
|
|
pub fn (mut s Scanner) set_current_tidx(cidx int) {
|
|
|
|
mut tidx := if cidx < 0 { 0 } else { cidx }
|
|
|
|
tidx = if tidx > s.all_tokens.len { s.all_tokens.len } else { tidx }
|
|
|
|
s.tidx = tidx
|
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
|
2020-07-11 20:27:39 +02:00
|
|
|
[inline]
|
2020-06-06 17:47:16 +02:00
|
|
|
fn (mut s Scanner) new_token(tok_kind token.Kind, lit string, len int) token.Token {
|
|
|
|
cidx := s.tidx
|
|
|
|
s.tidx++
|
2019-12-28 09:15:32 +01:00
|
|
|
return token.Token{
|
2020-02-12 17:39:35 +01:00
|
|
|
kind: tok_kind
|
|
|
|
lit: lit
|
|
|
|
line_nr: s.line_nr + 1
|
2020-04-10 00:09:34 +02:00
|
|
|
pos: s.pos - len + 1
|
|
|
|
len: len
|
2020-06-06 17:47:16 +02:00
|
|
|
tidx: cidx
|
2020-02-12 17:39:35 +01:00
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
|
2020-07-11 20:27:39 +02:00
|
|
|
[inline]
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_name() string {
|
2019-12-22 02:34:37 +01:00
|
|
|
start := s.pos
|
2020-02-25 22:58:51 +01:00
|
|
|
s.pos++
|
2020-04-27 15:08:04 +02:00
|
|
|
for s.pos < s.text.len && (util.is_name_char(s.text[s.pos]) || s.text[s.pos].is_digit()) {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos++
|
|
|
|
}
|
|
|
|
name := s.text[start..s.pos]
|
|
|
|
s.pos--
|
|
|
|
return name
|
|
|
|
}
|
|
|
|
|
2020-05-16 14:20:19 +02:00
|
|
|
// ident_fn_name look ahead and return name of function if possible, otherwise empty string
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_fn_name() string {
|
2020-05-16 14:20:19 +02:00
|
|
|
start := s.pos
|
|
|
|
mut pos := s.pos
|
|
|
|
pos++
|
2020-05-27 11:05:50 +02:00
|
|
|
if s.current_column() - 2 != 0 {
|
|
|
|
return s.fn_name
|
|
|
|
}
|
|
|
|
has_struct_name := s.struct_name != ''
|
|
|
|
if has_struct_name {
|
|
|
|
for pos < s.text.len && s.text[pos] != `(` {
|
|
|
|
pos++
|
|
|
|
}
|
|
|
|
if pos >= s.text.len {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
pos++
|
|
|
|
}
|
|
|
|
for pos < s.text.len && s.text[pos] != `(` {
|
2020-05-16 14:20:19 +02:00
|
|
|
pos++
|
|
|
|
}
|
|
|
|
if pos >= s.text.len {
|
2020-05-27 11:05:50 +02:00
|
|
|
return ''
|
2020-05-16 14:20:19 +02:00
|
|
|
}
|
2020-05-27 11:05:50 +02:00
|
|
|
pos--
|
|
|
|
// Eat whitespaces
|
|
|
|
for pos > start && s.text[pos].is_space() {
|
2020-05-16 14:20:19 +02:00
|
|
|
pos--
|
|
|
|
}
|
|
|
|
if pos < start {
|
2020-05-27 11:05:50 +02:00
|
|
|
return ''
|
2020-05-16 14:20:19 +02:00
|
|
|
}
|
|
|
|
end_pos := pos + 1
|
2020-05-27 11:05:50 +02:00
|
|
|
pos--
|
2020-05-16 14:20:19 +02:00
|
|
|
// Search for the start position
|
|
|
|
for pos > start && util.is_func_char(s.text[pos]) {
|
|
|
|
pos--
|
|
|
|
}
|
2020-05-27 11:05:50 +02:00
|
|
|
pos++
|
|
|
|
start_pos := pos
|
2020-07-04 15:14:30 +02:00
|
|
|
if pos <= start || pos >= s.text.len {
|
2020-05-27 11:05:50 +02:00
|
|
|
return ''
|
2020-05-16 14:20:19 +02:00
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
if s.text[start_pos].is_digit() || end_pos > s.text.len ||
|
|
|
|
end_pos <= start_pos || end_pos <= start ||
|
|
|
|
start_pos < start {
|
2020-05-27 11:05:50 +02:00
|
|
|
return ''
|
2020-05-16 14:20:19 +02:00
|
|
|
}
|
|
|
|
fn_name := s.text[start_pos..end_pos]
|
|
|
|
return fn_name
|
|
|
|
}
|
|
|
|
|
2020-05-25 18:33:41 +02:00
|
|
|
// ident_mod_name look ahead and return name of module this file belongs to if possible, otherwise empty string
|
|
|
|
fn (mut s Scanner) ident_mod_name() string {
|
|
|
|
start := s.pos
|
|
|
|
mut pos := s.pos
|
|
|
|
pos++
|
|
|
|
// Eat whitespaces
|
|
|
|
for pos < s.text.len && s.text[pos].is_space() {
|
|
|
|
pos++
|
|
|
|
}
|
|
|
|
if pos >= s.text.len {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
start_pos := pos
|
|
|
|
// Search for next occurrence of a whitespace or newline
|
|
|
|
for pos < s.text.len && !s.text[pos].is_space() && !util.is_nl(s.text[pos]) {
|
|
|
|
pos++
|
|
|
|
}
|
|
|
|
if pos >= s.text.len {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
end_pos := pos
|
|
|
|
if end_pos > s.text.len || end_pos <= start_pos || end_pos <= start || start_pos <= start {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
mod_name := s.text[start_pos..end_pos]
|
|
|
|
return mod_name
|
|
|
|
}
|
|
|
|
|
2020-05-26 15:35:11 +02:00
|
|
|
// ident_struct_name look ahead and return name of last encountered struct if possible, otherwise empty string
|
|
|
|
fn (mut s Scanner) ident_struct_name() string {
|
|
|
|
start := s.pos
|
|
|
|
mut pos := s.pos
|
|
|
|
// Return last known stuct_name encountered to avoid using high order/anonymous function definitions
|
|
|
|
if s.current_column() - 2 != 0 {
|
|
|
|
return s.struct_name
|
|
|
|
}
|
|
|
|
pos++
|
|
|
|
// Eat whitespaces
|
|
|
|
for pos < s.text.len && s.text[pos].is_space() {
|
|
|
|
pos++
|
|
|
|
}
|
|
|
|
if pos >= s.text.len {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
// Return if `(` is not the first character after "fn ..."
|
|
|
|
if s.text[pos] != `(` {
|
|
|
|
return ''
|
|
|
|
}
|
2020-05-27 11:05:50 +02:00
|
|
|
// Search for closing parenthesis
|
2020-05-26 15:35:11 +02:00
|
|
|
for pos < s.text.len && s.text[pos] != `)` {
|
|
|
|
pos++
|
|
|
|
}
|
|
|
|
if pos >= s.text.len {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
pos--
|
|
|
|
// Search backwards for end position of struct name
|
|
|
|
// Eat whitespaces
|
|
|
|
for pos > start && s.text[pos].is_space() {
|
|
|
|
pos--
|
|
|
|
}
|
|
|
|
if pos < start {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
end_pos := pos + 1
|
|
|
|
// Go back while we have a name character or digit
|
|
|
|
for pos > start && (util.is_name_char(s.text[pos]) || s.text[pos].is_digit()) {
|
|
|
|
pos--
|
|
|
|
}
|
|
|
|
if pos < start {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
start_pos := pos + 1
|
2020-07-04 15:14:30 +02:00
|
|
|
if s.text[start_pos].is_digit() || end_pos > s.text.len ||
|
|
|
|
end_pos <= start_pos || end_pos <= start ||
|
|
|
|
start_pos <= start {
|
2020-05-26 15:35:11 +02:00
|
|
|
return ''
|
|
|
|
}
|
|
|
|
struct_name := s.text[start_pos..end_pos]
|
|
|
|
return struct_name
|
|
|
|
}
|
|
|
|
|
2020-07-04 15:14:30 +02:00
|
|
|
fn filter_num_sep(txt byteptr, start, end int) string {
|
|
|
|
unsafe {
|
2020-02-07 22:10:48 +01:00
|
|
|
mut b := malloc(end - start + 1) // add a byte for the endstring 0
|
|
|
|
mut i1 := 0
|
2020-07-04 15:14:30 +02:00
|
|
|
for i := start; i < end; i++ {
|
2020-03-24 22:18:58 +01:00
|
|
|
if txt[i] != num_sep {
|
2020-02-07 22:10:48 +01:00
|
|
|
b[i1] = txt[i]
|
|
|
|
i1++
|
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
}
|
2020-02-07 22:10:48 +01:00
|
|
|
b[i1] = 0 // C string compatibility
|
2020-08-10 18:05:26 +02:00
|
|
|
return b.vstring_with_len(i1)
|
2020-01-23 03:28:25 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_bin_number() string {
|
2020-02-25 11:11:12 +01:00
|
|
|
mut has_wrong_digit := false
|
2020-05-21 18:43:57 +02:00
|
|
|
mut first_wrong_digit_pos := 0
|
2020-02-25 11:11:12 +01:00
|
|
|
mut first_wrong_digit := `\0`
|
2020-01-23 03:28:25 +01:00
|
|
|
start_pos := s.pos
|
|
|
|
s.pos += 2 // skip '0b'
|
2020-07-16 16:29:07 +02:00
|
|
|
if s.text[s.pos] == num_sep {
|
|
|
|
s.error('separator `_` is only valid between digits in a numeric literal')
|
|
|
|
}
|
2020-02-25 22:58:51 +01:00
|
|
|
for s.pos < s.text.len {
|
2020-01-23 03:28:25 +01:00
|
|
|
c := s.text[s.pos]
|
2020-07-28 08:09:19 +02:00
|
|
|
if c == num_sep && s.text[s.pos + 1] == num_sep {
|
|
|
|
s.error('cannot use `_` consecutively')
|
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
if !c.is_bin_digit() && c != num_sep {
|
2020-04-02 12:23:18 +02:00
|
|
|
if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
|
2020-02-25 11:11:12 +01:00
|
|
|
break
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if !has_wrong_digit {
|
2020-02-25 11:11:12 +01:00
|
|
|
has_wrong_digit = true
|
2020-05-21 18:43:57 +02:00
|
|
|
first_wrong_digit_pos = s.pos
|
2020-02-25 11:11:12 +01:00
|
|
|
first_wrong_digit = c
|
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
}
|
|
|
|
s.pos++
|
|
|
|
}
|
2020-07-28 08:09:19 +02:00
|
|
|
if s.text[s.pos - 1] == num_sep {
|
|
|
|
s.error('cannot use `_` at the end of a numeric literal')
|
|
|
|
}
|
|
|
|
else if start_pos + 2 == s.pos {
|
2020-05-21 15:20:36 +02:00
|
|
|
s.pos-- // adjust error position
|
2020-02-24 18:02:36 +01:00
|
|
|
s.error('number part of this binary is not provided')
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if has_wrong_digit {
|
2020-05-21 18:43:57 +02:00
|
|
|
s.pos = first_wrong_digit_pos // adjust error position
|
2020-07-04 15:14:30 +02:00
|
|
|
s.error('this binary number has unsuitable digit `$first_wrong_digit.str()`')
|
2020-02-25 11:11:12 +01:00
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
number := filter_num_sep(s.text.str, start_pos, s.pos)
|
|
|
|
s.pos--
|
|
|
|
return number
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_hex_number() string {
|
2020-02-25 11:11:12 +01:00
|
|
|
mut has_wrong_digit := false
|
2020-05-21 18:43:57 +02:00
|
|
|
mut first_wrong_digit_pos := 0
|
2020-02-25 11:11:12 +01:00
|
|
|
mut first_wrong_digit := `\0`
|
2019-12-22 02:34:37 +01:00
|
|
|
start_pos := s.pos
|
|
|
|
s.pos += 2 // skip '0x'
|
2020-07-16 16:29:07 +02:00
|
|
|
if s.text[s.pos] == num_sep {
|
|
|
|
s.error('separator `_` is only valid between digits in a numeric literal')
|
|
|
|
}
|
2020-02-25 22:58:51 +01:00
|
|
|
for s.pos < s.text.len {
|
2019-12-22 02:34:37 +01:00
|
|
|
c := s.text[s.pos]
|
2020-07-28 08:09:19 +02:00
|
|
|
if c == num_sep && s.text[s.pos + 1] == num_sep {
|
|
|
|
s.error('cannot use `_` consecutively')
|
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
if !c.is_hex_digit() && c != num_sep {
|
2020-04-02 12:23:18 +02:00
|
|
|
if !c.is_letter() || s.is_inside_string {
|
2020-02-25 11:11:12 +01:00
|
|
|
break
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if !has_wrong_digit {
|
2020-02-25 11:11:12 +01:00
|
|
|
has_wrong_digit = true
|
2020-05-21 18:43:57 +02:00
|
|
|
first_wrong_digit_pos = s.pos
|
2020-02-25 11:11:12 +01:00
|
|
|
first_wrong_digit = c
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
s.pos++
|
|
|
|
}
|
2020-07-28 08:09:19 +02:00
|
|
|
if s.text[s.pos - 1] == num_sep {
|
|
|
|
s.error('cannot use `_` at the end of a numeric literal')
|
|
|
|
}
|
|
|
|
else if start_pos + 2 == s.pos {
|
2020-05-21 15:20:36 +02:00
|
|
|
s.pos-- // adjust error position
|
2020-02-24 18:02:36 +01:00
|
|
|
s.error('number part of this hexadecimal is not provided')
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if has_wrong_digit {
|
2020-05-21 18:43:57 +02:00
|
|
|
s.pos = first_wrong_digit_pos // adjust error position
|
2020-07-04 15:14:30 +02:00
|
|
|
s.error('this hexadecimal number has unsuitable digit `$first_wrong_digit.str()`')
|
2020-02-25 11:11:12 +01:00
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
number := filter_num_sep(s.text.str, start_pos, s.pos)
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos--
|
|
|
|
return number
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_oct_number() string {
|
2020-02-25 11:11:12 +01:00
|
|
|
mut has_wrong_digit := false
|
2020-05-21 18:43:57 +02:00
|
|
|
mut first_wrong_digit_pos := 0
|
2020-02-25 11:11:12 +01:00
|
|
|
mut first_wrong_digit := `\0`
|
2019-12-22 02:34:37 +01:00
|
|
|
start_pos := s.pos
|
2020-02-23 12:33:07 +01:00
|
|
|
s.pos += 2 // skip '0o'
|
2020-07-16 16:29:07 +02:00
|
|
|
if s.text[s.pos] == num_sep {
|
|
|
|
s.error('separator `_` is only valid between digits in a numeric literal')
|
|
|
|
}
|
2020-02-25 22:58:51 +01:00
|
|
|
for s.pos < s.text.len {
|
2019-12-22 02:34:37 +01:00
|
|
|
c := s.text[s.pos]
|
2020-07-28 08:09:19 +02:00
|
|
|
if c == num_sep && s.text[s.pos + 1] == num_sep {
|
|
|
|
s.error('cannot use `_` consecutively')
|
|
|
|
}
|
2020-02-23 12:33:07 +01:00
|
|
|
if !c.is_oct_digit() && c != num_sep {
|
2020-04-02 12:23:18 +02:00
|
|
|
if (!c.is_digit() && !c.is_letter()) || s.is_inside_string {
|
2020-02-25 11:11:12 +01:00
|
|
|
break
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if !has_wrong_digit {
|
2020-02-25 11:11:12 +01:00
|
|
|
has_wrong_digit = true
|
2020-05-21 18:43:57 +02:00
|
|
|
first_wrong_digit_pos = s.pos
|
2020-02-25 11:11:12 +01:00
|
|
|
first_wrong_digit = c
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
s.pos++
|
|
|
|
}
|
2020-07-28 08:09:19 +02:00
|
|
|
if s.text[s.pos - 1] == num_sep {
|
|
|
|
s.error('cannot use `_` at the end of a numeric literal')
|
|
|
|
}
|
|
|
|
else if start_pos + 2 == s.pos {
|
2020-05-21 15:20:36 +02:00
|
|
|
s.pos-- // adjust error position
|
2020-02-24 18:02:36 +01:00
|
|
|
s.error('number part of this octal is not provided')
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if has_wrong_digit {
|
2020-05-21 18:43:57 +02:00
|
|
|
s.pos = first_wrong_digit_pos // adjust error position
|
2020-07-04 15:14:30 +02:00
|
|
|
s.error('this octal number has unsuitable digit `$first_wrong_digit.str()`')
|
2020-02-25 11:11:12 +01:00
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
number := filter_num_sep(s.text.str, start_pos, s.pos)
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos--
|
|
|
|
return number
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_dec_number() string {
|
2020-02-25 14:33:29 +01:00
|
|
|
mut has_wrong_digit := false
|
2020-05-21 18:43:57 +02:00
|
|
|
mut first_wrong_digit_pos := 0
|
2020-02-25 14:33:29 +01:00
|
|
|
mut first_wrong_digit := `\0`
|
2019-12-22 02:34:37 +01:00
|
|
|
start_pos := s.pos
|
|
|
|
// scan integer part
|
2020-02-25 14:33:29 +01:00
|
|
|
for s.pos < s.text.len {
|
2020-02-25 22:58:51 +01:00
|
|
|
c := s.text[s.pos]
|
2020-07-28 08:09:19 +02:00
|
|
|
if c == num_sep && s.text[s.pos + 1] == num_sep {
|
|
|
|
s.error('cannot use `_` consecutively')
|
|
|
|
}
|
2020-02-25 22:58:51 +01:00
|
|
|
if !c.is_digit() && c != num_sep {
|
2020-04-02 12:23:18 +02:00
|
|
|
if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
|
2020-02-25 14:33:29 +01:00
|
|
|
break
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if !has_wrong_digit {
|
2020-02-25 14:33:29 +01:00
|
|
|
has_wrong_digit = true
|
2020-05-21 18:43:57 +02:00
|
|
|
first_wrong_digit_pos = s.pos
|
2020-02-25 22:58:51 +01:00
|
|
|
first_wrong_digit = c
|
2020-02-25 14:33:29 +01:00
|
|
|
}
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos++
|
|
|
|
}
|
2020-07-28 08:09:19 +02:00
|
|
|
if s.text[s.pos - 1] == num_sep {
|
|
|
|
s.error('cannot use `_` at the end of a numeric literal')
|
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
mut call_method := false // true for, e.g., 5.str(), 5.5.str(), 5e5.str()
|
|
|
|
mut is_range := false // true for, e.g., 5..10
|
2019-12-22 02:34:37 +01:00
|
|
|
// scan fractional part
|
|
|
|
if s.pos < s.text.len && s.text[s.pos] == `.` {
|
|
|
|
s.pos++
|
2020-03-31 12:47:32 +02:00
|
|
|
if s.pos < s.text.len {
|
2020-04-18 17:49:27 +02:00
|
|
|
// 5.5, 5.5.str()
|
2020-03-31 12:47:32 +02:00
|
|
|
if s.text[s.pos].is_digit() {
|
|
|
|
for s.pos < s.text.len {
|
|
|
|
c := s.text[s.pos]
|
|
|
|
if !c.is_digit() {
|
2020-04-02 12:23:18 +02:00
|
|
|
if !c.is_letter() || c in [`e`, `E`] || s.is_inside_string {
|
2020-04-18 17:49:27 +02:00
|
|
|
// 5.5.str()
|
|
|
|
if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
|
2020-03-31 12:47:32 +02:00
|
|
|
call_method = true
|
|
|
|
}
|
|
|
|
break
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if !has_wrong_digit {
|
2020-03-31 12:47:32 +02:00
|
|
|
has_wrong_digit = true
|
2020-05-21 18:43:57 +02:00
|
|
|
first_wrong_digit_pos = s.pos
|
2020-03-31 12:47:32 +02:00
|
|
|
first_wrong_digit = c
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s.pos++
|
2020-02-25 14:33:29 +01:00
|
|
|
}
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if s.text[s.pos] == `.` {
|
|
|
|
// 5.. (a range)
|
2020-04-18 17:49:27 +02:00
|
|
|
is_range = true
|
|
|
|
s.pos--
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if s.text[s.pos] in [`e`, `E`] {
|
|
|
|
// 5.e5
|
|
|
|
} else if s.text[s.pos].is_letter() {
|
|
|
|
// 5.str()
|
2020-03-31 21:03:21 +02:00
|
|
|
call_method = true
|
2020-03-31 12:47:32 +02:00
|
|
|
s.pos--
|
2020-08-05 05:11:29 +02:00
|
|
|
} else {
|
2020-07-04 15:14:30 +02:00
|
|
|
// 5.
|
2020-04-18 17:49:27 +02:00
|
|
|
}
|
2020-03-31 21:03:21 +02:00
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
// scan exponential part
|
2020-04-18 17:49:27 +02:00
|
|
|
mut has_exp := false
|
|
|
|
if s.pos < s.text.len && s.text[s.pos] in [`e`, `E`] {
|
|
|
|
has_exp = true
|
2020-02-25 11:10:05 +01:00
|
|
|
s.pos++
|
|
|
|
if s.pos < s.text.len && s.text[s.pos] in [`-`, `+`] {
|
2020-02-17 02:35:01 +01:00
|
|
|
s.pos++
|
|
|
|
}
|
2020-02-25 14:33:29 +01:00
|
|
|
for s.pos < s.text.len {
|
2020-02-25 22:58:51 +01:00
|
|
|
c := s.text[s.pos]
|
|
|
|
if !c.is_digit() {
|
2020-04-02 12:23:18 +02:00
|
|
|
if !c.is_letter() || s.is_inside_string {
|
2020-04-18 17:49:27 +02:00
|
|
|
// 5e5.str()
|
|
|
|
if c == `.` && s.pos + 1 < s.text.len && s.text[s.pos + 1].is_letter() {
|
2020-03-31 12:47:32 +02:00
|
|
|
call_method = true
|
|
|
|
}
|
2020-02-25 14:33:29 +01:00
|
|
|
break
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if !has_wrong_digit {
|
2020-02-25 14:33:29 +01:00
|
|
|
has_wrong_digit = true
|
2020-05-21 18:43:57 +02:00
|
|
|
first_wrong_digit_pos = s.pos
|
2020-02-25 22:58:51 +01:00
|
|
|
first_wrong_digit = c
|
2020-02-25 14:33:29 +01:00
|
|
|
}
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos++
|
|
|
|
}
|
2020-04-18 17:49:27 +02:00
|
|
|
}
|
|
|
|
if has_wrong_digit {
|
2020-07-04 15:14:30 +02:00
|
|
|
// error check: wrong digit
|
2020-05-21 18:43:57 +02:00
|
|
|
s.pos = first_wrong_digit_pos // adjust error position
|
2020-07-04 15:14:30 +02:00
|
|
|
s.error('this number has unsuitable digit `$first_wrong_digit.str()`')
|
|
|
|
} else if s.text[s.pos - 1] in [`e`, `E`] {
|
|
|
|
// error check: 5e
|
2020-05-21 15:20:36 +02:00
|
|
|
s.pos-- // adjust error position
|
2020-04-18 17:49:27 +02:00
|
|
|
s.error('exponent has no digits')
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if s.pos < s.text.len &&
|
2020-08-05 05:11:29 +02:00
|
|
|
s.text[s.pos] == `.` && !is_range && !call_method {
|
2020-07-04 15:14:30 +02:00
|
|
|
// error check: 1.23.4, 123.e+3.4
|
2020-04-18 17:49:27 +02:00
|
|
|
if has_exp {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.error('exponential part should be integer')
|
2020-07-04 15:14:30 +02:00
|
|
|
} else {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.error('too many decimal points in number')
|
|
|
|
}
|
|
|
|
}
|
2020-01-23 03:28:25 +01:00
|
|
|
number := filter_num_sep(s.text.str, start_pos, s.pos)
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos--
|
|
|
|
return number
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_number() string {
|
2020-01-23 03:28:25 +01:00
|
|
|
if s.expect('0b', s.pos) {
|
|
|
|
return s.ident_bin_number()
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if s.expect('0x', s.pos) {
|
2019-12-22 02:34:37 +01:00
|
|
|
return s.ident_hex_number()
|
2020-07-04 15:14:30 +02:00
|
|
|
} else if s.expect('0o', s.pos) {
|
2020-02-23 12:33:07 +01:00
|
|
|
return s.ident_oct_number()
|
2020-07-04 15:14:30 +02:00
|
|
|
} else {
|
2019-12-22 02:34:37 +01:00
|
|
|
return s.ident_dec_number()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-11 20:27:39 +02:00
|
|
|
[inline]
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) skip_whitespace() {
|
2019-12-22 02:34:37 +01:00
|
|
|
// if s.is_vh { println('vh') return }
|
2020-02-20 20:14:54 +01:00
|
|
|
for s.pos < s.text.len && s.text[s.pos].is_space() {
|
2020-04-27 15:08:04 +02:00
|
|
|
if util.is_nl(s.text[s.pos]) && s.is_vh {
|
2019-12-22 02:34:37 +01:00
|
|
|
return
|
|
|
|
}
|
|
|
|
// Count \r\n as one line
|
2020-04-27 15:08:04 +02:00
|
|
|
if util.is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos - 1) {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.inc_line_number()
|
|
|
|
}
|
|
|
|
s.pos++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) end_of_file() token.Token {
|
2020-05-28 18:22:11 +02:00
|
|
|
s.eofs++
|
|
|
|
if s.eofs > 50 {
|
|
|
|
s.line_nr--
|
|
|
|
s.error('the end of file `$s.file_path` has been reached 50 times already, the v parser is probably stuck.\n' +
|
2020-07-04 15:14:30 +02:00
|
|
|
'This should not happen. Please report the bug here, and include the last 2-3 lines of your source code:\n' +
|
|
|
|
'https://github.com/vlang/v/issues/new?labels=Bug&template=bug_report.md')
|
2020-05-28 18:22:11 +02:00
|
|
|
}
|
|
|
|
if s.pos != s.text.len && s.eofs == 1 {
|
|
|
|
s.inc_line_number()
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos = s.text.len
|
2020-04-10 00:09:34 +02:00
|
|
|
return s.new_token(.eof, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
|
2020-07-04 15:14:30 +02:00
|
|
|
pub fn (mut s Scanner) scan_all_tokens_in_buffer() {
|
2020-06-06 17:47:16 +02:00
|
|
|
// s.scan_all_tokens_in_buffer is used mainly by vdoc,
|
|
|
|
// in order to implement the .toplevel_comments mode.
|
|
|
|
cmode := s.comments_mode
|
|
|
|
s.comments_mode = .parse_comments
|
|
|
|
for {
|
|
|
|
mut t := s.text_scan()
|
|
|
|
s.all_tokens << t
|
|
|
|
if t.kind == .eof {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s.comments_mode = cmode
|
|
|
|
s.tidx = 0
|
|
|
|
$if debugscanner ? {
|
|
|
|
for t in s.all_tokens {
|
2020-07-04 15:14:30 +02:00
|
|
|
eprintln('> tidx:${t.tidx:-5} | kind: ${t.kind:-10} | lit: $t.lit')
|
2020-06-06 17:47:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
pub fn (mut s Scanner) scan() token.Token {
|
2020-06-06 17:47:16 +02:00
|
|
|
if s.comments_mode == .toplevel_comments {
|
|
|
|
return s.buffer_scan()
|
|
|
|
}
|
|
|
|
return s.text_scan()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn (mut s Scanner) buffer_scan() token.Token {
|
|
|
|
for {
|
|
|
|
cidx := s.tidx
|
|
|
|
s.tidx++
|
|
|
|
if cidx >= s.all_tokens.len {
|
|
|
|
return s.end_of_file()
|
|
|
|
}
|
|
|
|
if s.all_tokens[cidx].kind == .comment {
|
|
|
|
if !s.should_parse_comment() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return s.all_tokens[cidx]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-13 00:01:44 +02:00
|
|
|
[inline]
|
|
|
|
fn (s Scanner) look_ahead(n int) byte {
|
|
|
|
if s.pos + n < s.text.len {
|
|
|
|
return s.text[s.pos + n]
|
|
|
|
} else {
|
|
|
|
return `\0`
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-06 17:47:16 +02:00
|
|
|
fn (mut s Scanner) text_scan() token.Token {
|
2020-07-11 19:52:05 +02:00
|
|
|
// The for loop here is so that instead of doing
|
|
|
|
// `return s.scan()` (which will use a new call stack frame),
|
|
|
|
// text_scan can just do continue, keeping
|
|
|
|
// memory & stack usage low.
|
|
|
|
// That optimization mostly matters for long sections
|
|
|
|
// of comments and string literals.
|
|
|
|
for {
|
|
|
|
// if s.comments_mode == .parse_comments {
|
|
|
|
// println('\nscan()')
|
|
|
|
// }
|
|
|
|
// if s.line_comment != '' {
|
|
|
|
// s.fgenln('// LC "$s.line_comment"')
|
|
|
|
// s.line_comment = ''
|
|
|
|
// }
|
|
|
|
if s.is_started {
|
|
|
|
s.pos++
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
s.is_started = true
|
|
|
|
if s.pos >= s.text.len {
|
|
|
|
return s.end_of_file()
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
if !s.is_inside_string {
|
|
|
|
s.skip_whitespace()
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// End of $var, start next string
|
|
|
|
if s.is_inter_end {
|
|
|
|
if s.text[s.pos] == s.quote {
|
|
|
|
s.is_inter_end = false
|
|
|
|
return s.new_token(.string, '', 1)
|
|
|
|
}
|
|
|
|
s.is_inter_end = false
|
|
|
|
ident_string := s.ident_string()
|
|
|
|
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
s.skip_whitespace()
|
|
|
|
// end of file
|
|
|
|
if s.pos >= s.text.len {
|
|
|
|
return s.end_of_file()
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// handle each char
|
|
|
|
c := s.text[s.pos]
|
|
|
|
nextc := s.look_ahead(1)
|
|
|
|
// name or keyword
|
|
|
|
if util.is_name_char(c) {
|
|
|
|
name := s.ident_name()
|
|
|
|
// tmp hack to detect . in ${}
|
|
|
|
// Check if not .eof to prevent panic
|
|
|
|
next_char := s.look_ahead(1)
|
|
|
|
kind := token.keywords[name]
|
|
|
|
if kind != .unknown {
|
|
|
|
if kind == .key_fn {
|
|
|
|
s.struct_name = s.ident_struct_name()
|
|
|
|
s.fn_name = s.ident_fn_name()
|
|
|
|
} else if kind == .key_module {
|
|
|
|
s.mod_name = s.ident_mod_name()
|
|
|
|
}
|
|
|
|
return s.new_token(kind, name, name.len)
|
2020-02-23 23:43:04 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
|
|
|
|
// at the next ', skip it
|
|
|
|
if s.is_inside_string {
|
|
|
|
if next_char == s.quote {
|
|
|
|
s.is_inter_end = true
|
|
|
|
s.is_inter_start = false
|
|
|
|
s.is_inside_string = false
|
|
|
|
}
|
2020-02-23 23:43:04 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// end of `$expr`
|
|
|
|
// allow `'$a.b'` and `'$a.c()'`
|
2020-08-22 05:48:06 +02:00
|
|
|
if s.is_inter_start && next_char == `(` {
|
|
|
|
if s.look_ahead(2) != `)` {
|
|
|
|
s.warn('use e.g. `\${f(expr)}` or `\$name\\(` instead of `\$f(expr)`')
|
|
|
|
}
|
|
|
|
} else if s.is_inter_start && next_char != `.` {
|
2020-07-11 19:52:05 +02:00
|
|
|
s.is_inter_end = true
|
|
|
|
s.is_inter_start = false
|
2020-07-02 00:02:59 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
if s.pos == 0 && next_char == ` ` {
|
|
|
|
// If a single letter name at the start of the file, increment
|
|
|
|
// Otherwise the scanner would be stuck at s.pos = 0
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos++
|
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
return s.new_token(.name, name, name.len)
|
|
|
|
} else if c.is_digit() || (c == `.` && nextc.is_digit()) {
|
|
|
|
// `123`, `.123`
|
|
|
|
if !s.is_inside_string {
|
|
|
|
// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
|
|
|
|
mut start_pos := s.pos
|
|
|
|
for start_pos < s.text.len && s.text[start_pos] == `0` {
|
|
|
|
start_pos++
|
|
|
|
}
|
|
|
|
mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
|
|
|
|
// for 0b, 0o, 0x the heading zero shouldn't be jumped
|
|
|
|
if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
|
|
|
|
prefix_zero_num--
|
|
|
|
}
|
|
|
|
s.pos += prefix_zero_num // jump these zeros
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
num := s.ident_number()
|
|
|
|
return s.new_token(.number, num, num.len)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// Handle `'$fn()'`
|
|
|
|
if c == `)` && s.is_inter_start {
|
|
|
|
next_char := s.look_ahead(1)
|
|
|
|
if next_char != `.` {
|
|
|
|
s.is_inter_end = true
|
|
|
|
s.is_inter_start = false
|
|
|
|
if next_char == s.quote {
|
|
|
|
s.is_inside_string = false
|
|
|
|
}
|
|
|
|
return s.new_token(.rpar, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// all other tokens
|
|
|
|
match c {
|
|
|
|
`+` {
|
|
|
|
if nextc == `+` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.inc, '', 2)
|
|
|
|
} else if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.plus_assign, '', 2)
|
|
|
|
}
|
|
|
|
return s.new_token(.plus, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`-` {
|
|
|
|
if nextc == `-` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.dec, '', 2)
|
|
|
|
} else if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.minus_assign, '', 2)
|
|
|
|
}
|
|
|
|
return s.new_token(.minus, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`*` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.mult_assign, '', 2)
|
|
|
|
}
|
|
|
|
return s.new_token(.mul, '', 1)
|
2020-07-11 11:41:39 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`^` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.xor_assign, '', 2)
|
|
|
|
}
|
|
|
|
return s.new_token(.xor, '', 1)
|
2020-07-11 12:14:10 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`%` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.mod_assign, '', 2)
|
|
|
|
}
|
|
|
|
return s.new_token(.mod, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`?` {
|
|
|
|
return s.new_token(.question, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
single_quote, double_quote {
|
2020-04-10 00:09:34 +02:00
|
|
|
ident_string := s.ident_string()
|
|
|
|
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`\`` {
|
|
|
|
// ` // apostrophe balance comment. do not remove
|
|
|
|
ident_char := s.ident_char()
|
|
|
|
return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`(` {
|
|
|
|
// TODO `$if vet {` for performance
|
|
|
|
if s.pref.is_vet && s.text[s.pos + 1] == ` ` {
|
2020-07-19 19:58:34 +02:00
|
|
|
s.vet_error('Looks like you are adding a space after `(`')
|
2020-07-11 19:52:05 +02:00
|
|
|
}
|
|
|
|
return s.new_token(.lpar, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`)` {
|
|
|
|
// TODO `$if vet {` for performance
|
|
|
|
if s.pref.is_vet && s.text[s.pos - 1] == ` ` {
|
2020-07-19 19:58:34 +02:00
|
|
|
s.vet_error('Looks like you are adding a space before `)`')
|
2020-07-11 19:52:05 +02:00
|
|
|
}
|
|
|
|
return s.new_token(.rpar, '', 1)
|
2020-06-18 18:48:23 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`[` {
|
|
|
|
return s.new_token(.lsbr, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`]` {
|
|
|
|
return s.new_token(.rsbr, '', 1)
|
2020-05-25 18:33:41 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`{` {
|
|
|
|
// Skip { in `${` in strings
|
|
|
|
if s.is_inside_string {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
return s.new_token(.lcbr, '', 1)
|
2020-05-26 15:35:11 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`$` {
|
|
|
|
if s.is_inside_string {
|
|
|
|
return s.new_token(.str_dollar, '', 1)
|
|
|
|
} else {
|
|
|
|
return s.new_token(.dollar, '', 1)
|
|
|
|
}
|
2020-03-28 21:51:45 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`}` {
|
|
|
|
// s = `hello $name !`
|
|
|
|
// s = `hello ${name} !`
|
|
|
|
if s.is_inside_string {
|
|
|
|
s.pos++
|
|
|
|
if s.text[s.pos] == s.quote {
|
|
|
|
s.is_inside_string = false
|
|
|
|
return s.new_token(.string, '', 1)
|
|
|
|
}
|
|
|
|
ident_string := s.ident_string()
|
|
|
|
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
|
|
|
|
} else {
|
|
|
|
return s.new_token(.rcbr, '', 1)
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`&` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.and_assign, '', 2)
|
|
|
|
}
|
|
|
|
afternextc := s.look_ahead(2)
|
|
|
|
if nextc == `&` && afternextc.is_space() {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.and, '', 2)
|
|
|
|
}
|
|
|
|
return s.new_token(.amp, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`|` {
|
|
|
|
if nextc == `|` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.logical_or, '', 2)
|
|
|
|
}
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.or_assign, '', 2)
|
|
|
|
}
|
|
|
|
return s.new_token(.pipe, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`,` {
|
|
|
|
return s.new_token(.comma, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`@` {
|
|
|
|
s.pos++
|
|
|
|
name := s.ident_name()
|
|
|
|
if s.is_fmt {
|
|
|
|
return s.new_token(.name, '@' + name, name.len + 1)
|
|
|
|
}
|
|
|
|
// @FN => will be substituted with the name of the current V function
|
|
|
|
// @MOD => will be substituted with the name of the current V module
|
|
|
|
// @STRUCT => will be substituted with the name of the current V struct
|
|
|
|
// @VEXE => will be substituted with the path to the V compiler
|
|
|
|
// @FILE => will be substituted with the path of the V source file
|
|
|
|
// @LINE => will be substituted with the V line number where it appears (as a string).
|
|
|
|
// @COLUMN => will be substituted with the column where it appears (as a string).
|
|
|
|
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
|
|
|
|
// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
|
|
|
|
// This allows things like this:
|
|
|
|
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
|
|
|
|
// ... which is useful while debugging/tracing
|
|
|
|
if name == 'FN' {
|
|
|
|
return s.new_token(.string, s.fn_name, 3)
|
|
|
|
}
|
|
|
|
if name == 'MOD' {
|
|
|
|
return s.new_token(.string, s.mod_name, 4)
|
|
|
|
}
|
|
|
|
if name == 'STRUCT' {
|
|
|
|
return s.new_token(.string, s.struct_name, 7)
|
|
|
|
}
|
|
|
|
if name == 'VEXE' {
|
|
|
|
vexe := pref.vexe_path()
|
|
|
|
return s.new_token(.string, util.cescaped_path(vexe), 5)
|
|
|
|
}
|
|
|
|
if name == 'FILE' {
|
|
|
|
fpath := os.real_path(s.file_path)
|
|
|
|
return s.new_token(.string, util.cescaped_path(fpath), 5)
|
|
|
|
}
|
|
|
|
if name == 'LINE' {
|
|
|
|
return s.new_token(.string, (s.line_nr + 1).str(), 5)
|
|
|
|
}
|
|
|
|
if name == 'COLUMN' {
|
|
|
|
return s.new_token(.string, s.current_column().str(), 7)
|
|
|
|
}
|
|
|
|
if name == 'VHASH' {
|
|
|
|
return s.new_token(.string, util.vhash(), 6)
|
|
|
|
}
|
|
|
|
if name == 'VMOD_FILE' {
|
|
|
|
if s.vmod_file_content.len == 0 {
|
2020-07-23 23:16:36 +02:00
|
|
|
mut mcache := vmod.get_cache()
|
2020-07-11 19:52:05 +02:00
|
|
|
vmod_file_location := mcache.get_by_file(s.file_path)
|
|
|
|
if vmod_file_location.vmod_file.len == 0 {
|
|
|
|
s.error('@VMOD_FILE can be used only in projects, that have v.mod file')
|
|
|
|
}
|
|
|
|
vmod_content := os.read_file(vmod_file_location.vmod_file) or {
|
|
|
|
''
|
|
|
|
}
|
|
|
|
$if windows {
|
|
|
|
s.vmod_file_content = vmod_content.replace('\r\n', '\n')
|
|
|
|
} $else {
|
|
|
|
s.vmod_file_content = vmod_content
|
|
|
|
}
|
2020-05-27 03:32:39 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
return s.new_token(.string, s.vmod_file_content, 10)
|
2020-05-26 22:39:15 +02:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
if !token.is_key(name) {
|
|
|
|
s.error('@ must be used before keywords (e.g. `@type string`)')
|
|
|
|
}
|
|
|
|
return s.new_token(.name, name, name.len)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
/*
|
|
|
|
case `\r`:
|
2019-12-22 02:34:37 +01:00
|
|
|
if nextc == `\n` {
|
|
|
|
s.pos++
|
|
|
|
s.last_nl_pos = s.pos
|
2020-04-02 09:27:00 +02:00
|
|
|
return s.new_token(.nl, '')
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
case `\n`:
|
|
|
|
s.last_nl_pos = s.pos
|
2020-04-02 09:27:00 +02:00
|
|
|
return s.new_token(.nl, '')
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
*/
|
|
|
|
`.` {
|
|
|
|
if nextc == `.` {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos++
|
2020-07-11 19:52:05 +02:00
|
|
|
if s.text[s.pos + 1] == `.` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.ellipsis, '', 3)
|
|
|
|
}
|
|
|
|
return s.new_token(.dotdot, '', 2)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
return s.new_token(.dot, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`#` {
|
|
|
|
start := s.pos + 1
|
|
|
|
s.ignore_line()
|
|
|
|
if nextc == `!` {
|
|
|
|
// treat shebang line (#!) as a comment
|
|
|
|
s.line_comment = s.text[start + 1..s.pos].trim_space()
|
|
|
|
// s.fgenln('// shebang line "$s.line_comment"')
|
|
|
|
continue
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
hash := s.text[start..s.pos].trim_space()
|
|
|
|
return s.new_token(.hash, hash, hash.len)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`>` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.ge, '', 2)
|
|
|
|
} else if nextc == `>` {
|
|
|
|
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
|
|
|
|
s.pos += 2
|
|
|
|
return s.new_token(.right_shift_assign, '', 3)
|
|
|
|
}
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.right_shift, '', 2)
|
|
|
|
} else {
|
|
|
|
return s.new_token(.gt, '', 1)
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
0xE2 {
|
|
|
|
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
|
|
|
|
// case `≠`:
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos += 2
|
2020-07-11 19:52:05 +02:00
|
|
|
return s.new_token(.ne, '', 3)
|
|
|
|
} else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
|
|
|
|
s.pos += 2
|
|
|
|
return s.new_token(.le, '', 3)
|
|
|
|
} else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
|
|
|
|
s.pos += 2
|
|
|
|
return s.new_token(.ge, '', 3)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`<` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.le, '', 2)
|
|
|
|
} else if nextc == `<` {
|
|
|
|
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
|
|
|
|
s.pos += 2
|
|
|
|
return s.new_token(.left_shift_assign, '', 3)
|
|
|
|
}
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.left_shift, '', 2)
|
2020-08-17 20:12:00 +02:00
|
|
|
} else if nextc == `-` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.arrow, '', 2)
|
2020-07-11 19:52:05 +02:00
|
|
|
} else {
|
|
|
|
return s.new_token(.lt, '', 1)
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`=` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.eq, '', 2)
|
|
|
|
} else {
|
|
|
|
return s.new_token(.assign, '', 1)
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`:` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.decl_assign, '', 2)
|
|
|
|
} else {
|
|
|
|
return s.new_token(.colon, '', 1)
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`;` {
|
|
|
|
return s.new_token(.semicolon, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`!` {
|
|
|
|
if nextc == `=` {
|
|
|
|
s.pos++
|
|
|
|
return s.new_token(.ne, '', 2)
|
|
|
|
} else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() {
|
|
|
|
s.pos += 2
|
|
|
|
return s.new_token(.not_in, '', 3)
|
|
|
|
} else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() {
|
|
|
|
s.pos += 2
|
|
|
|
return s.new_token(.not_is, '', 3)
|
|
|
|
} else {
|
|
|
|
return s.new_token(.not, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
`~` {
|
|
|
|
return s.new_token(.bit_not, '', 1)
|
|
|
|
}
|
|
|
|
`/` {
|
|
|
|
if nextc == `=` {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos++
|
2020-07-11 19:52:05 +02:00
|
|
|
return s.new_token(.div_assign, '', 2)
|
|
|
|
}
|
|
|
|
if nextc == `/` {
|
|
|
|
start := s.pos + 1
|
|
|
|
s.ignore_line()
|
2020-08-26 10:39:32 +02:00
|
|
|
mut comment_line_end := s.pos
|
|
|
|
if s.text[s.pos-1] == `\r` {
|
|
|
|
comment_line_end--
|
|
|
|
} else {
|
|
|
|
// fix line_nr, \n was read; the comment is marked on the next line
|
|
|
|
s.pos--
|
|
|
|
s.line_nr--
|
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
if s.should_parse_comment() {
|
2020-07-11 20:27:39 +02:00
|
|
|
s.line_comment = s.text[start + 1..comment_line_end]
|
|
|
|
mut comment := s.line_comment.trim_space()
|
2020-07-11 19:52:05 +02:00
|
|
|
// Find out if this comment is on its own line (for vfmt)
|
|
|
|
mut is_separate_line_comment := true
|
|
|
|
for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- {
|
|
|
|
if s.text[j] !in [`\t`, ` `] {
|
|
|
|
is_separate_line_comment = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if is_separate_line_comment {
|
|
|
|
comment = '|' + comment
|
|
|
|
}
|
|
|
|
return s.new_token(.comment, comment, comment.len + 2)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
|
|
|
|
// Skip the comment (return the next token)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Multiline comments
|
|
|
|
if nextc == `*` {
|
|
|
|
start := s.pos + 2
|
|
|
|
mut nest_count := 1
|
|
|
|
// Skip comment
|
|
|
|
for nest_count > 0 {
|
|
|
|
s.pos++
|
|
|
|
if s.pos >= s.text.len {
|
|
|
|
s.line_nr--
|
|
|
|
s.error('comment not terminated')
|
|
|
|
}
|
|
|
|
if s.text[s.pos] == `\n` {
|
|
|
|
s.inc_line_number()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if s.expect('/*', s.pos) {
|
|
|
|
nest_count++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if s.expect('*/', s.pos) {
|
|
|
|
nest_count--
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
s.pos++
|
|
|
|
if s.should_parse_comment() {
|
|
|
|
comment := s.text[start..(s.pos - 1)].trim_space()
|
|
|
|
return s.new_token(.comment, comment, comment.len + 4)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
// Skip if not in fmt mode
|
|
|
|
continue
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
return s.new_token(.div, '', 1)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
else {}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
$if windows {
|
|
|
|
if c == `\0` {
|
|
|
|
return s.end_of_file()
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-07-11 19:52:05 +02:00
|
|
|
s.error('invalid character `$c.str()`')
|
|
|
|
break
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
return s.end_of_file()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn (s &Scanner) current_column() int {
|
|
|
|
return s.pos - s.last_nl_pos
|
|
|
|
}
|
|
|
|
|
2020-04-27 15:08:04 +02:00
|
|
|
fn (s &Scanner) count_symbol_before(p int, sym byte) int {
|
2019-12-22 02:34:37 +01:00
|
|
|
mut count := 0
|
|
|
|
for i := p; i >= 0; i-- {
|
|
|
|
if s.text[i] != sym {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
count++
|
|
|
|
}
|
|
|
|
return count
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_string() string {
|
2019-12-22 02:34:37 +01:00
|
|
|
q := s.text[s.pos]
|
|
|
|
is_quote := q == single_quote || q == double_quote
|
2020-04-28 18:57:16 +02:00
|
|
|
is_raw := is_quote && s.pos > 0 && s.text[s.pos - 1] == `r`
|
2020-07-10 20:41:57 +02:00
|
|
|
is_cstr := is_quote && s.pos > 0 && s.text[s.pos - 1] == `c`
|
2020-04-02 12:23:18 +02:00
|
|
|
if is_quote && !s.is_inside_string {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.quote = q
|
|
|
|
}
|
|
|
|
// if s.file_path.contains('string_test') {
|
|
|
|
// println('\nident_string() at char=${s.text[s.pos].str()}')
|
|
|
|
// println('linenr=$s.line_nr quote= $qquote ${qquote.str()}')
|
|
|
|
// }
|
2020-06-23 20:10:51 +02:00
|
|
|
mut n_cr_chars := 0
|
2019-12-22 02:34:37 +01:00
|
|
|
mut start := s.pos
|
2020-04-02 12:23:18 +02:00
|
|
|
s.is_inside_string = false
|
2019-12-22 02:34:37 +01:00
|
|
|
slash := `\\`
|
|
|
|
for {
|
|
|
|
s.pos++
|
|
|
|
if s.pos >= s.text.len {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
c := s.text[s.pos]
|
|
|
|
prevc := s.text[s.pos - 1]
|
|
|
|
// end of string
|
|
|
|
if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
|
|
|
|
// handle '123\\' slash at the end
|
|
|
|
break
|
|
|
|
}
|
2020-06-23 20:10:51 +02:00
|
|
|
if c == `\r` {
|
|
|
|
n_cr_chars++
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
if c == `\n` {
|
|
|
|
s.inc_line_number()
|
|
|
|
}
|
|
|
|
// Don't allow \0
|
|
|
|
if c == `0` && s.pos > 2 && s.text[s.pos - 1] == slash {
|
2020-07-04 15:14:30 +02:00
|
|
|
if s.pos < s.text.len - 1 && s.text[s.pos + 1].is_digit() {
|
2020-07-10 20:41:57 +02:00
|
|
|
} else if !is_cstr {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.error('0 character in a string literal')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Don't allow \x00
|
|
|
|
if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
|
2020-07-10 20:41:57 +02:00
|
|
|
if !is_cstr {
|
|
|
|
s.error('0 character in a string literal')
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
// ${var} (ignore in vfmt mode)
|
2020-08-22 05:48:06 +02:00
|
|
|
if prevc == `$` && c == `{` && !is_raw && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
|
2020-04-02 12:23:18 +02:00
|
|
|
s.is_inside_string = true
|
2019-12-22 02:34:37 +01:00
|
|
|
// so that s.pos points to $ at the next step
|
|
|
|
s.pos -= 2
|
|
|
|
break
|
|
|
|
}
|
|
|
|
// $var
|
2020-08-22 05:48:06 +02:00
|
|
|
if prevc == `$` && util.is_name_char(c) && !is_raw &&
|
2020-07-04 15:14:30 +02:00
|
|
|
s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
|
2020-04-02 12:23:18 +02:00
|
|
|
s.is_inside_string = true
|
|
|
|
s.is_inter_start = true
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos -= 2
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mut lit := ''
|
|
|
|
if s.text[start] == s.quote {
|
|
|
|
start++
|
|
|
|
}
|
|
|
|
mut end := s.pos
|
2020-04-02 12:23:18 +02:00
|
|
|
if s.is_inside_string {
|
2019-12-22 02:34:37 +01:00
|
|
|
end++
|
|
|
|
}
|
2020-05-04 10:19:23 +02:00
|
|
|
if start <= s.pos {
|
2020-06-23 20:10:51 +02:00
|
|
|
mut string_so_far := s.text[start..end]
|
|
|
|
if n_cr_chars > 0 {
|
|
|
|
string_so_far = string_so_far.replace('\r', '')
|
|
|
|
}
|
|
|
|
if string_so_far.contains('\\\n') {
|
|
|
|
lit = trim_slash_line_break(string_so_far)
|
2020-05-04 10:19:23 +02:00
|
|
|
} else {
|
2020-06-23 20:10:51 +02:00
|
|
|
lit = string_so_far
|
2020-05-04 10:19:23 +02:00
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
return lit
|
|
|
|
}
|
|
|
|
|
2020-05-04 10:19:23 +02:00
|
|
|
fn trim_slash_line_break(s string) string {
|
|
|
|
mut start := 0
|
|
|
|
mut ret_str := s
|
|
|
|
for {
|
|
|
|
idx := ret_str.index_after('\\\n', start)
|
|
|
|
if idx != -1 {
|
2020-07-04 15:14:30 +02:00
|
|
|
ret_str = ret_str[..idx] + ret_str[idx + 2..].trim_left(' \n\t\v\f\r')
|
2020-05-04 10:19:23 +02:00
|
|
|
start = idx
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret_str
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ident_char() string {
|
2019-12-22 02:34:37 +01:00
|
|
|
start := s.pos
|
|
|
|
slash := `\\`
|
|
|
|
mut len := 0
|
|
|
|
for {
|
|
|
|
s.pos++
|
|
|
|
if s.pos >= s.text.len {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if s.text[s.pos] != slash {
|
|
|
|
len++
|
|
|
|
}
|
|
|
|
double_slash := s.expect('\\\\', s.pos - 2)
|
|
|
|
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
|
|
|
|
// ` // apostrophe balance comment. do not remove
|
|
|
|
if double_slash {
|
|
|
|
len++
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
len--
|
|
|
|
c := s.text[start + 1..s.pos]
|
|
|
|
if len != 1 {
|
|
|
|
u := c.ustring()
|
|
|
|
if u.len != 1 {
|
|
|
|
s.error('invalid character literal (more than one character)\n' + 'use quotes for strings, backticks for characters')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Escapes a `'` character
|
2020-07-04 15:14:30 +02:00
|
|
|
return if c == "\'" {
|
|
|
|
'\\' + c
|
|
|
|
} else {
|
|
|
|
c
|
|
|
|
}
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
|
|
|
|
2020-07-11 20:27:39 +02:00
|
|
|
[inline]
|
2019-12-22 02:34:37 +01:00
|
|
|
fn (s &Scanner) expect(want string, start_pos int) bool {
|
|
|
|
end_pos := start_pos + want.len
|
|
|
|
if start_pos < 0 || start_pos >= s.text.len {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if end_pos < 0 || end_pos > s.text.len {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
for pos in start_pos .. end_pos {
|
|
|
|
if s.text[pos] != want[pos - start_pos] {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) debug_tokens() {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.pos = 0
|
2020-04-02 12:23:18 +02:00
|
|
|
s.is_started = false
|
|
|
|
s.is_debug = true
|
2020-05-20 11:04:28 +02:00
|
|
|
fname := s.file_path.all_after_last(os.path_separator)
|
2019-12-22 02:34:37 +01:00
|
|
|
println('\n===DEBUG TOKENS $fname===')
|
|
|
|
for {
|
2019-12-28 09:15:32 +01:00
|
|
|
tok := s.scan()
|
|
|
|
tok_kind := tok.kind
|
|
|
|
lit := tok.lit
|
|
|
|
print(tok_kind.str())
|
2019-12-22 02:34:37 +01:00
|
|
|
if lit != '' {
|
|
|
|
println(' `$lit`')
|
2020-07-04 15:14:30 +02:00
|
|
|
} else {
|
2019-12-22 02:34:37 +01:00
|
|
|
println('')
|
|
|
|
}
|
2019-12-28 09:15:32 +01:00
|
|
|
if tok_kind == .eof {
|
2019-12-22 02:34:37 +01:00
|
|
|
println('============ END OF DEBUG TOKENS ==================')
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-11 20:27:39 +02:00
|
|
|
[inline]
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) ignore_line() {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.eat_to_end_of_line()
|
|
|
|
s.inc_line_number()
|
|
|
|
}
|
|
|
|
|
2020-07-11 20:27:39 +02:00
|
|
|
[inline]
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) eat_to_end_of_line() {
|
2019-12-22 02:34:37 +01:00
|
|
|
for s.pos < s.text.len && s.text[s.pos] != `\n` {
|
|
|
|
s.pos++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-11 20:27:39 +02:00
|
|
|
[inline]
|
2020-05-17 13:51:18 +02:00
|
|
|
fn (mut s Scanner) inc_line_number() {
|
2019-12-22 02:34:37 +01:00
|
|
|
s.last_nl_pos = s.pos
|
|
|
|
s.line_nr++
|
|
|
|
s.line_ends << s.pos
|
|
|
|
if s.line_nr > s.nr_lines {
|
|
|
|
s.nr_lines = s.line_nr
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-22 05:48:06 +02:00
|
|
|
pub fn (s &Scanner) warn(msg string) {
|
|
|
|
pos := token.Position{
|
|
|
|
line_nr: s.line_nr
|
|
|
|
pos: s.pos
|
|
|
|
}
|
|
|
|
eprintln(util.formatted_error('warning:', msg, s.file_path, pos))
|
|
|
|
}
|
|
|
|
|
2019-12-22 02:34:37 +01:00
|
|
|
pub fn (s &Scanner) error(msg string) {
|
2020-04-06 18:58:21 +02:00
|
|
|
pos := token.Position{
|
|
|
|
line_nr: s.line_nr
|
|
|
|
pos: s.pos
|
2020-04-07 12:29:11 +02:00
|
|
|
}
|
2020-05-10 16:29:15 +02:00
|
|
|
eprintln(util.formatted_error('error:', msg, s.file_path, pos))
|
2019-12-22 02:34:37 +01:00
|
|
|
exit(1)
|
|
|
|
}
|
|
|
|
|
2020-07-19 19:58:34 +02:00
|
|
|
fn (mut s Scanner) vet_error(msg string) {
|
2020-07-25 14:57:57 +02:00
|
|
|
eline := '$s.file_path:$s.line_nr: $msg'
|
|
|
|
if s.vet_errors == 0 {
|
|
|
|
eprintln(eline)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
s.vet_errors << eline
|
2020-07-19 19:58:34 +02:00
|
|
|
}
|
|
|
|
|
2019-12-22 02:34:37 +01:00
|
|
|
pub fn verror(s string) {
|
2020-04-03 17:38:41 +02:00
|
|
|
util.verror('scanner error', s)
|
2019-12-22 02:34:37 +01:00
|
|
|
}
|
2020-05-25 23:00:48 +02:00
|
|
|
|
|
|
|
pub fn (mut s Scanner) codegen(newtext string) {
|
|
|
|
// codegen makes sense only during normal compilation
|
|
|
|
// feeding code generated V code to vfmt or vdoc will
|
|
|
|
// cause them to output/document ephemeral stuff.
|
|
|
|
if s.comments_mode == .skip_comments {
|
|
|
|
s.text += newtext
|
|
|
|
$if debug_codegen ? {
|
|
|
|
eprintln('scanner.codegen:\n $newtext')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|