scanner: minor refactoring

pull/4189/head
yuyi 2020-04-02 15:27:00 +08:00 committed by GitHub
parent dac304195e
commit 24499aa6de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 79 additions and 84 deletions

View File

@ -14,6 +14,8 @@ const (
double_quote = `"`
error_context_before = 2 // how many lines of source context to print before the pointer line
error_context_after = 2 // ^^^ same, but after
is_fmt = os.getenv('VEXE').contains('vfmt')
num_sep = `_` // char used as number separator
)
pub struct Scanner {
@ -72,9 +74,6 @@ pub fn new_scanner_file(file_path string, comments_mode CommentsMode) &Scanner {
return s
}
const (
is_fmt = os.getenv('VEXE').contains('vfmt')
)
// new scanner from string.
pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner {
return &Scanner{
@ -87,7 +86,7 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner {
}
}
fn (s &Scanner) scan_res(tok_kind token.Kind, lit string) token.Token {
fn (s &Scanner) new_token(tok_kind token.Kind, lit string) token.Token {
return token.Token{
kind: tok_kind
lit: lit
@ -107,10 +106,6 @@ fn (s mut Scanner) ident_name() string {
return name
}
const (
num_sep = `_` // char used as number separator
)
fn filter_num_sep(txt byteptr, start int, end int) string {
unsafe{
mut b := malloc(end - start + 1) // add a byte for the endstring 0
@ -347,7 +342,7 @@ fn (s mut Scanner) skip_whitespace() {
fn (s mut Scanner) end_of_file() token.Token {
s.pos = s.text.len
s.inc_line_number()
return s.scan_res(.eof, '')
return s.new_token(.eof, '')
}
pub fn (s mut Scanner) scan() token.Token {
@ -372,10 +367,10 @@ pub fn (s mut Scanner) scan() token.Token {
if s.inter_end {
if s.text[s.pos] == s.quote {
s.inter_end = false
return s.scan_res(.string, '')
return s.new_token(.string, '')
}
s.inter_end = false
return s.scan_res(.string, s.ident_string())
return s.new_token(.string, s.ident_string())
}
s.skip_whitespace()
// end of file
@ -395,7 +390,7 @@ pub fn (s mut Scanner) scan() token.Token {
// Check if not .eof to prevent panic
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
if token.is_key(name) {
return s.scan_res(token.key_to_token(name), '')
return s.new_token(token.key_to_token(name), '')
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
@ -417,7 +412,7 @@ pub fn (s mut Scanner) scan() token.Token {
// Otherwise the scanner would be stuck at s.pos = 0
s.pos++
}
return s.scan_res(.name, name)
return s.new_token(.name, name)
}
// `123`, `.123`
else if c.is_digit() || (c == `.` && nextc.is_digit()) {
@ -435,7 +430,7 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos += prefix_zero_num // jump these zeros
}
num := s.ident_number()
return s.scan_res(.number, num)
return s.new_token(.number, num)
}
// Handle `'$fn()'`
if c == `)` && s.inter_start {
@ -445,88 +440,88 @@ pub fn (s mut Scanner) scan() token.Token {
if next_char == s.quote {
s.inside_string = false
}
return s.scan_res(.rpar, '')
return s.new_token(.rpar, '')
}
// all other tokens
match c {
`+` {
if nextc == `+` {
s.pos++
return s.scan_res(.inc, '')
return s.new_token(.inc, '')
}
else if nextc == `=` {
s.pos++
return s.scan_res(.plus_assign, '')
return s.new_token(.plus_assign, '')
}
return s.scan_res(.plus, '')
return s.new_token(.plus, '')
}
`-` {
if nextc == `-` {
s.pos++
return s.scan_res(.dec, '')
return s.new_token(.dec, '')
}
else if nextc == `=` {
s.pos++
return s.scan_res(.minus_assign, '')
return s.new_token(.minus_assign, '')
}
return s.scan_res(.minus, '')
return s.new_token(.minus, '')
}
`*` {
if nextc == `=` {
s.pos++
return s.scan_res(.mult_assign, '')
return s.new_token(.mult_assign, '')
}
return s.scan_res(.mul, '')
return s.new_token(.mul, '')
}
`^` {
if nextc == `=` {
s.pos++
return s.scan_res(.xor_assign, '')
return s.new_token(.xor_assign, '')
}
return s.scan_res(.xor, '')
return s.new_token(.xor, '')
}
`%` {
if nextc == `=` {
s.pos++
return s.scan_res(.mod_assign, '')
return s.new_token(.mod_assign, '')
}
return s.scan_res(.mod, '')
return s.new_token(.mod, '')
}
`?` {
return s.scan_res(.question, '')
return s.new_token(.question, '')
}
single_quote, double_quote {
return s.scan_res(.string, s.ident_string())
return s.new_token(.string, s.ident_string())
}
`\`` {
// ` // apostrophe balance comment. do not remove
return s.scan_res(.chartoken, s.ident_char())
return s.new_token(.chartoken, s.ident_char())
}
`(` {
return s.scan_res(.lpar, '')
return s.new_token(.lpar, '')
}
`)` {
return s.scan_res(.rpar, '')
return s.new_token(.rpar, '')
}
`[` {
return s.scan_res(.lsbr, '')
return s.new_token(.lsbr, '')
}
`]` {
return s.scan_res(.rsbr, '')
return s.new_token(.rsbr, '')
}
`{` {
// Skip { in `${` in strings
if s.inside_string {
return s.scan()
}
return s.scan_res(.lcbr, '')
return s.new_token(.lcbr, '')
}
`$` {
if s.inside_string {
return s.scan_res(.str_dollar, '')
return s.new_token(.str_dollar, '')
}
else {
return s.scan_res(.dollar, '')
return s.new_token(.dollar, '')
}
}
`}` {
@ -536,38 +531,38 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos++
if s.text[s.pos] == s.quote {
s.inside_string = false
return s.scan_res(.string, '')
return s.new_token(.string, '')
}
return s.scan_res(.string, s.ident_string())
return s.new_token(.string, s.ident_string())
}
else {
return s.scan_res(.rcbr, '')
return s.new_token(.rcbr, '')
}
}
`&` {
if nextc == `=` {
s.pos++
return s.scan_res(.and_assign, '')
return s.new_token(.and_assign, '')
}
if nextc == `&` {
s.pos++
return s.scan_res(.and, '')
return s.new_token(.and, '')
}
return s.scan_res(.amp, '')
return s.new_token(.amp, '')
}
`|` {
if nextc == `|` {
s.pos++
return s.scan_res(.logical_or, '')
return s.new_token(.logical_or, '')
}
if nextc == `=` {
s.pos++
return s.scan_res(.or_assign, '')
return s.new_token(.or_assign, '')
}
return s.scan_res(.pipe, '')
return s.new_token(.pipe, '')
}
`,` {
return s.scan_res(.comma, '')
return s.new_token(.comma, '')
}
`@` {
s.pos++
@ -582,40 +577,40 @@ pub fn (s mut Scanner) scan() token.Token {
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN)
// ... which is useful while debugging/tracing
if name == 'FN' {
return s.scan_res(.string, s.fn_name)
return s.new_token(.string, s.fn_name)
}
if name == 'VEXE' {
vexe := pref.vexe_path()
return s.scan_res(.string, cescaped_path(vexe))
return s.new_token(.string, cescaped_path(vexe))
}
if name == 'FILE' {
return s.scan_res(.string, cescaped_path(os.real_path(s.file_path)))
return s.new_token(.string, cescaped_path(os.real_path(s.file_path)))
}
if name == 'LINE' {
return s.scan_res(.string, (s.line_nr + 1).str())
return s.new_token(.string, (s.line_nr + 1).str())
}
if name == 'COLUMN' {
return s.scan_res(.string, (s.current_column()).str())
return s.new_token(.string, (s.current_column()).str())
}
if name == 'VHASH' {
return s.scan_res(.string, vhash())
return s.new_token(.string, vhash())
}
if !token.is_key(name) {
s.error('@ must be used before keywords (e.g. `@type string`)')
}
return s.scan_res(.name, name)
return s.new_token(.name, name)
}
/*
case `\r`:
if nextc == `\n` {
s.pos++
s.last_nl_pos = s.pos
return s.scan_res(.nl, '')
return s.new_token(.nl, '')
}
}
case `\n`:
s.last_nl_pos = s.pos
return s.scan_res(.nl, '')
return s.new_token(.nl, '')
}
*/
@ -624,11 +619,11 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos++
if s.text[s.pos + 1] == `.` {
s.pos++
return s.scan_res(.ellipsis, '')
return s.new_token(.ellipsis, '')
}
return s.scan_res(.dotdot, '')
return s.new_token(.dotdot, '')
}
return s.scan_res(.dot, '')
return s.new_token(.dot, '')
}
`#` {
start := s.pos + 1
@ -640,100 +635,100 @@ pub fn (s mut Scanner) scan() token.Token {
return s.scan()
}
hash := s.text[start..s.pos]
return s.scan_res(.hash, hash.trim_space())
return s.new_token(.hash, hash.trim_space())
}
`>` {
if nextc == `=` {
s.pos++
return s.scan_res(.ge, '')
return s.new_token(.ge, '')
}
else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.scan_res(.right_shift_assign, '')
return s.new_token(.right_shift_assign, '')
}
s.pos++
return s.scan_res(.right_shift, '')
return s.new_token(.right_shift, '')
}
else {
return s.scan_res(.gt, '')
return s.new_token(.gt, '')
}
}
0xE2 {
// case `≠`:
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
s.pos += 2
return s.scan_res(.ne, '')
return s.new_token(.ne, '')
}
// ⩽
else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
s.pos += 2
return s.scan_res(.le, '')
return s.new_token(.le, '')
}
// ⩾
else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
return s.scan_res(.ge, '')
return s.new_token(.ge, '')
}
}
`<` {
if nextc == `=` {
s.pos++
return s.scan_res(.le, '')
return s.new_token(.le, '')
}
else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.scan_res(.left_shift_assign, '')
return s.new_token(.left_shift_assign, '')
}
s.pos++
return s.scan_res(.left_shift, '')
return s.new_token(.left_shift, '')
}
else {
return s.scan_res(.lt, '')
return s.new_token(.lt, '')
}
}
`=` {
if nextc == `=` {
s.pos++
return s.scan_res(.eq, '')
return s.new_token(.eq, '')
}
else if nextc == `>` {
s.pos++
return s.scan_res(.arrow, '')
return s.new_token(.arrow, '')
}
else {
return s.scan_res(.assign, '')
return s.new_token(.assign, '')
}
}
`:` {
if nextc == `=` {
s.pos++
return s.scan_res(.decl_assign, '')
return s.new_token(.decl_assign, '')
}
else {
return s.scan_res(.colon, '')
return s.new_token(.colon, '')
}
}
`;` {
return s.scan_res(.semicolon, '')
return s.new_token(.semicolon, '')
}
`!` {
if nextc == `=` {
s.pos++
return s.scan_res(.ne, '')
return s.new_token(.ne, '')
}
else {
return s.scan_res(.not, '')
return s.new_token(.not, '')
}
}
`~` {
return s.scan_res(.bit_not, '')
return s.new_token(.bit_not, '')
}
`/` {
if nextc == `=` {
s.pos++
return s.scan_res(.div_assign, '')
return s.new_token(.div_assign, '')
}
if nextc == `/` {
start := s.pos + 1
@ -751,7 +746,7 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos--
// println("'" + s.text[s.pos].str() + "'")
// s.line_nr--
return s.scan_res(.line_comment, comment)
return s.new_token(.line_comment, comment)
}
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token)
@ -783,12 +778,12 @@ pub fn (s mut Scanner) scan() token.Token {
s.pos++
if s.comments_mode == .parse_comments {
comment := s.text[start..(s.pos - 1)].trim_space()
return s.scan_res(.mline_comment, comment)
return s.new_token(.mline_comment, comment)
}
// Skip if not in fmt mode
return s.scan()
}
return s.scan_res(.div, '')
return s.new_token(.div, '')
}
else {}
}