From 24499aa6decf7686b4aec4cf6e695625dc4707d1 Mon Sep 17 00:00:00 2001 From: yuyi Date: Thu, 2 Apr 2020 15:27:00 +0800 Subject: [PATCH] scanner: minor refactoring --- vlib/v/scanner/scanner.v | 163 +++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 84 deletions(-) diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 8b67cf9d89..91439482f6 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -14,6 +14,8 @@ const ( double_quote = `"` error_context_before = 2 // how many lines of source context to print before the pointer line error_context_after = 2 // ^^^ same, but after + is_fmt = os.getenv('VEXE').contains('vfmt') + num_sep = `_` // char used as number separator ) pub struct Scanner { @@ -72,9 +74,6 @@ pub fn new_scanner_file(file_path string, comments_mode CommentsMode) &Scanner { return s } -const ( - is_fmt = os.getenv('VEXE').contains('vfmt') -) // new scanner from string. pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner { return &Scanner{ @@ -87,7 +86,7 @@ pub fn new_scanner(text string, comments_mode CommentsMode) &Scanner { } } -fn (s &Scanner) scan_res(tok_kind token.Kind, lit string) token.Token { +fn (s &Scanner) new_token(tok_kind token.Kind, lit string) token.Token { return token.Token{ kind: tok_kind lit: lit @@ -107,10 +106,6 @@ fn (s mut Scanner) ident_name() string { return name } -const ( - num_sep = `_` // char used as number separator -) - fn filter_num_sep(txt byteptr, start int, end int) string { unsafe{ mut b := malloc(end - start + 1) // add a byte for the endstring 0 @@ -347,7 +342,7 @@ fn (s mut Scanner) skip_whitespace() { fn (s mut Scanner) end_of_file() token.Token { s.pos = s.text.len s.inc_line_number() - return s.scan_res(.eof, '') + return s.new_token(.eof, '') } pub fn (s mut Scanner) scan() token.Token { @@ -372,10 +367,10 @@ pub fn (s mut Scanner) scan() token.Token { if s.inter_end { if s.text[s.pos] == s.quote { s.inter_end = false - return s.scan_res(.string, '') + return s.new_token(.string, '') } s.inter_end = false - return s.scan_res(.string, s.ident_string()) + return s.new_token(.string, s.ident_string()) } s.skip_whitespace() // end of file @@ -395,7 +390,7 @@ pub fn (s mut Scanner) scan() token.Token { // Check if not .eof to prevent panic next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` } if token.is_key(name) { - return s.scan_res(token.key_to_token(name), '') + return s.new_token(token.key_to_token(name), '') } // 'asdf $b' => "b" is the last name in the string, dont start parsing string // at the next ', skip it @@ -417,7 +412,7 @@ pub fn (s mut Scanner) scan() token.Token { // Otherwise the scanner would be stuck at s.pos = 0 s.pos++ } - return s.scan_res(.name, name) + return s.new_token(.name, name) } // `123`, `.123` else if c.is_digit() || (c == `.` && nextc.is_digit()) { @@ -435,7 +430,7 @@ pub fn (s mut Scanner) scan() token.Token { s.pos += prefix_zero_num // jump these zeros } num := s.ident_number() - return s.scan_res(.number, num) + return s.new_token(.number, num) } // Handle `'$fn()'` if c == `)` && s.inter_start { @@ -445,88 +440,88 @@ pub fn (s mut Scanner) scan() token.Token { if next_char == s.quote { s.inside_string = false } - return s.scan_res(.rpar, '') + return s.new_token(.rpar, '') } // all other tokens match c { `+` { if nextc == `+` { s.pos++ - return s.scan_res(.inc, '') + return s.new_token(.inc, '') } else if nextc == `=` { s.pos++ - return s.scan_res(.plus_assign, '') + return s.new_token(.plus_assign, '') } - return s.scan_res(.plus, '') + return s.new_token(.plus, '') } `-` { if nextc == `-` { s.pos++ - return s.scan_res(.dec, '') + return s.new_token(.dec, '') } else if nextc == `=` { s.pos++ - return s.scan_res(.minus_assign, '') + return s.new_token(.minus_assign, '') } - return s.scan_res(.minus, '') + return s.new_token(.minus, '') } `*` { if nextc == `=` { s.pos++ - return s.scan_res(.mult_assign, '') + return s.new_token(.mult_assign, '') } - return s.scan_res(.mul, '') + return s.new_token(.mul, '') } `^` { if nextc == `=` { s.pos++ - return s.scan_res(.xor_assign, '') + return s.new_token(.xor_assign, '') } - return s.scan_res(.xor, '') + return s.new_token(.xor, '') } `%` { if nextc == `=` { s.pos++ - return s.scan_res(.mod_assign, '') + return s.new_token(.mod_assign, '') } - return s.scan_res(.mod, '') + return s.new_token(.mod, '') } `?` { - return s.scan_res(.question, '') + return s.new_token(.question, '') } single_quote, double_quote { - return s.scan_res(.string, s.ident_string()) + return s.new_token(.string, s.ident_string()) } `\`` { // ` // apostrophe balance comment. do not remove - return s.scan_res(.chartoken, s.ident_char()) + return s.new_token(.chartoken, s.ident_char()) } `(` { - return s.scan_res(.lpar, '') + return s.new_token(.lpar, '') } `)` { - return s.scan_res(.rpar, '') + return s.new_token(.rpar, '') } `[` { - return s.scan_res(.lsbr, '') + return s.new_token(.lsbr, '') } `]` { - return s.scan_res(.rsbr, '') + return s.new_token(.rsbr, '') } `{` { // Skip { in `${` in strings if s.inside_string { return s.scan() } - return s.scan_res(.lcbr, '') + return s.new_token(.lcbr, '') } `$` { if s.inside_string { - return s.scan_res(.str_dollar, '') + return s.new_token(.str_dollar, '') } else { - return s.scan_res(.dollar, '') + return s.new_token(.dollar, '') } } `}` { @@ -536,38 +531,38 @@ pub fn (s mut Scanner) scan() token.Token { s.pos++ if s.text[s.pos] == s.quote { s.inside_string = false - return s.scan_res(.string, '') + return s.new_token(.string, '') } - return s.scan_res(.string, s.ident_string()) + return s.new_token(.string, s.ident_string()) } else { - return s.scan_res(.rcbr, '') + return s.new_token(.rcbr, '') } } `&` { if nextc == `=` { s.pos++ - return s.scan_res(.and_assign, '') + return s.new_token(.and_assign, '') } if nextc == `&` { s.pos++ - return s.scan_res(.and, '') + return s.new_token(.and, '') } - return s.scan_res(.amp, '') + return s.new_token(.amp, '') } `|` { if nextc == `|` { s.pos++ - return s.scan_res(.logical_or, '') + return s.new_token(.logical_or, '') } if nextc == `=` { s.pos++ - return s.scan_res(.or_assign, '') + return s.new_token(.or_assign, '') } - return s.scan_res(.pipe, '') + return s.new_token(.pipe, '') } `,` { - return s.scan_res(.comma, '') + return s.new_token(.comma, '') } `@` { s.pos++ @@ -582,40 +577,40 @@ pub fn (s mut Scanner) scan() token.Token { // println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN) // ... which is useful while debugging/tracing if name == 'FN' { - return s.scan_res(.string, s.fn_name) + return s.new_token(.string, s.fn_name) } if name == 'VEXE' { vexe := pref.vexe_path() - return s.scan_res(.string, cescaped_path(vexe)) + return s.new_token(.string, cescaped_path(vexe)) } if name == 'FILE' { - return s.scan_res(.string, cescaped_path(os.real_path(s.file_path))) + return s.new_token(.string, cescaped_path(os.real_path(s.file_path))) } if name == 'LINE' { - return s.scan_res(.string, (s.line_nr + 1).str()) + return s.new_token(.string, (s.line_nr + 1).str()) } if name == 'COLUMN' { - return s.scan_res(.string, (s.current_column()).str()) + return s.new_token(.string, (s.current_column()).str()) } if name == 'VHASH' { - return s.scan_res(.string, vhash()) + return s.new_token(.string, vhash()) } if !token.is_key(name) { s.error('@ must be used before keywords (e.g. `@type string`)') } - return s.scan_res(.name, name) + return s.new_token(.name, name) } /* case `\r`: if nextc == `\n` { s.pos++ s.last_nl_pos = s.pos - return s.scan_res(.nl, '') + return s.new_token(.nl, '') } } case `\n`: s.last_nl_pos = s.pos - return s.scan_res(.nl, '') + return s.new_token(.nl, '') } */ @@ -624,11 +619,11 @@ pub fn (s mut Scanner) scan() token.Token { s.pos++ if s.text[s.pos + 1] == `.` { s.pos++ - return s.scan_res(.ellipsis, '') + return s.new_token(.ellipsis, '') } - return s.scan_res(.dotdot, '') + return s.new_token(.dotdot, '') } - return s.scan_res(.dot, '') + return s.new_token(.dot, '') } `#` { start := s.pos + 1 @@ -640,100 +635,100 @@ pub fn (s mut Scanner) scan() token.Token { return s.scan() } hash := s.text[start..s.pos] - return s.scan_res(.hash, hash.trim_space()) + return s.new_token(.hash, hash.trim_space()) } `>` { if nextc == `=` { s.pos++ - return s.scan_res(.ge, '') + return s.new_token(.ge, '') } else if nextc == `>` { if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { s.pos += 2 - return s.scan_res(.right_shift_assign, '') + return s.new_token(.right_shift_assign, '') } s.pos++ - return s.scan_res(.right_shift, '') + return s.new_token(.right_shift, '') } else { - return s.scan_res(.gt, '') + return s.new_token(.gt, '') } } 0xE2 { // case `≠`: if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 { s.pos += 2 - return s.scan_res(.ne, '') + return s.new_token(.ne, '') } // ⩽ else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD { s.pos += 2 - return s.scan_res(.le, '') + return s.new_token(.le, '') } // ⩾ else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE { s.pos += 2 - return s.scan_res(.ge, '') + return s.new_token(.ge, '') } } `<` { if nextc == `=` { s.pos++ - return s.scan_res(.le, '') + return s.new_token(.le, '') } else if nextc == `<` { if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { s.pos += 2 - return s.scan_res(.left_shift_assign, '') + return s.new_token(.left_shift_assign, '') } s.pos++ - return s.scan_res(.left_shift, '') + return s.new_token(.left_shift, '') } else { - return s.scan_res(.lt, '') + return s.new_token(.lt, '') } } `=` { if nextc == `=` { s.pos++ - return s.scan_res(.eq, '') + return s.new_token(.eq, '') } else if nextc == `>` { s.pos++ - return s.scan_res(.arrow, '') + return s.new_token(.arrow, '') } else { - return s.scan_res(.assign, '') + return s.new_token(.assign, '') } } `:` { if nextc == `=` { s.pos++ - return s.scan_res(.decl_assign, '') + return s.new_token(.decl_assign, '') } else { - return s.scan_res(.colon, '') + return s.new_token(.colon, '') } } `;` { - return s.scan_res(.semicolon, '') + return s.new_token(.semicolon, '') } `!` { if nextc == `=` { s.pos++ - return s.scan_res(.ne, '') + return s.new_token(.ne, '') } else { - return s.scan_res(.not, '') + return s.new_token(.not, '') } } `~` { - return s.scan_res(.bit_not, '') + return s.new_token(.bit_not, '') } `/` { if nextc == `=` { s.pos++ - return s.scan_res(.div_assign, '') + return s.new_token(.div_assign, '') } if nextc == `/` { start := s.pos + 1 @@ -751,7 +746,7 @@ pub fn (s mut Scanner) scan() token.Token { s.pos-- // println("'" + s.text[s.pos].str() + "'") // s.line_nr-- - return s.scan_res(.line_comment, comment) + return s.new_token(.line_comment, comment) } // s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"') // Skip the comment (return the next token) @@ -783,12 +778,12 @@ pub fn (s mut Scanner) scan() token.Token { s.pos++ if s.comments_mode == .parse_comments { comment := s.text[start..(s.pos - 1)].trim_space() - return s.scan_res(.mline_comment, comment) + return s.new_token(.mline_comment, comment) } // Skip if not in fmt mode return s.scan() } - return s.scan_res(.div, '') + return s.new_token(.div, '') } else {} }