From 022cc7274071cc7a24448c52a8226a55e4c5a6c1 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Sat, 11 Jul 2020 20:52:05 +0300 Subject: [PATCH] scanner: parse multiple comments and long strings without a stackoverflow --- vlib/v/scanner/scanner.v | 909 ++++++++++++++++++++------------------- 1 file changed, 459 insertions(+), 450 deletions(-) diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 175ad58cb6..10f64fd75e 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -614,307 +614,314 @@ fn (s Scanner) look_ahead(n int) byte { } fn (mut s Scanner) text_scan() token.Token { - // if s.comments_mode == .parse_comments { - // println('\nscan()') - // } - // if s.line_comment != '' { - // s.fgenln('// LC "$s.line_comment"') - // s.line_comment = '' - // } - if s.is_started { - s.pos++ - } - s.is_started = true - if s.pos >= s.text.len { - return s.end_of_file() - } - if !s.is_inside_string { - s.skip_whitespace() - } - // End of $var, start next string - if s.is_inter_end { - if s.text[s.pos] == s.quote { - s.is_inter_end = false - return s.new_token(.string, '', 1) - } - s.is_inter_end = false - ident_string := s.ident_string() - return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes - } - s.skip_whitespace() - // end of file - if s.pos >= s.text.len { - return s.end_of_file() - } - // handle each char - c := s.text[s.pos] - nextc := s.look_ahead(1) - // name or keyword - if util.is_name_char(c) { - name := s.ident_name() - // tmp hack to detect . in ${} - // Check if not .eof to prevent panic - next_char := s.look_ahead(1) - kind := token.keywords[name] - if kind != .unknown { - if kind == .key_fn { - s.struct_name = s.ident_struct_name() - s.fn_name = s.ident_fn_name() - } else if kind == .key_module { - s.mod_name = s.ident_mod_name() - } - return s.new_token(kind, name, name.len) - } - // 'asdf $b' => "b" is the last name in the string, dont start parsing string - // at the next ', skip it - if s.is_inside_string { - if next_char == s.quote { - s.is_inter_end = true - s.is_inter_start = false - s.is_inside_string = false - } - } - // end of `$expr` - // allow `'$a.b'` and `'$a.c()'` - if s.is_inter_start && next_char != `.` && next_char != `(` { - s.is_inter_end = true - s.is_inter_start = false - } - if s.pos == 0 && next_char == ` ` { - // If a single letter name at the start of the file, increment - // Otherwise the scanner would be stuck at s.pos = 0 + // The for loop here is so that instead of doing + // `return s.scan()` (which will use a new call stack frame), + // text_scan can just do continue, keeping + // memory & stack usage low. + // That optimization mostly matters for long sections + // of comments and string literals. + for { + // if s.comments_mode == .parse_comments { + // println('\nscan()') + // } + // if s.line_comment != '' { + // s.fgenln('// LC "$s.line_comment"') + // s.line_comment = '' + // } + if s.is_started { s.pos++ } - return s.new_token(.name, name, name.len) - } else if c.is_digit() || (c == `.` && nextc.is_digit()) { - // `123`, `.123` + s.is_started = true + if s.pos >= s.text.len { + return s.end_of_file() + } if !s.is_inside_string { - // In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed. - mut start_pos := s.pos - for start_pos < s.text.len && s.text[start_pos] == `0` { - start_pos++ - } - mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped - // for 0b, 0o, 0x the heading zero shouldn't be jumped - if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) { - prefix_zero_num-- - } - s.pos += prefix_zero_num // jump these zeros + s.skip_whitespace() } - num := s.ident_number() - return s.new_token(.number, num, num.len) - } - // Handle `'$fn()'` - if c == `)` && s.is_inter_start { - next_char := s.look_ahead(1) - if next_char != `.` { - s.is_inter_end = true - s.is_inter_start = false - if next_char == s.quote { - s.is_inside_string = false + // End of $var, start next string + if s.is_inter_end { + if s.text[s.pos] == s.quote { + s.is_inter_end = false + return s.new_token(.string, '', 1) } - return s.new_token(.rpar, '', 1) - } - } - // all other tokens - match c { - `+` { - if nextc == `+` { - s.pos++ - return s.new_token(.inc, '', 2) - } else if nextc == `=` { - s.pos++ - return s.new_token(.plus_assign, '', 2) - } - return s.new_token(.plus, '', 1) - } - `-` { - if nextc == `-` { - s.pos++ - return s.new_token(.dec, '', 2) - } else if nextc == `=` { - s.pos++ - return s.new_token(.minus_assign, '', 2) - } - return s.new_token(.minus, '', 1) - } - `*` { - if nextc == `=` { - s.pos++ - return s.new_token(.mult_assign, '', 2) - } - return s.new_token(.mul, '', 1) - } - `^` { - if nextc == `=` { - s.pos++ - return s.new_token(.xor_assign, '', 2) - } - return s.new_token(.xor, '', 1) - } - `%` { - if nextc == `=` { - s.pos++ - return s.new_token(.mod_assign, '', 2) - } - return s.new_token(.mod, '', 1) - } - `?` { - return s.new_token(.question, '', 1) - } - single_quote, double_quote { + s.is_inter_end = false ident_string := s.ident_string() return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes } - `\`` { - // ` // apostrophe balance comment. do not remove - ident_char := s.ident_char() - return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes + s.skip_whitespace() + // end of file + if s.pos >= s.text.len { + return s.end_of_file() } - `(` { - // TODO `$if vet {` for performance - if s.pref.is_vet && s.text[s.pos + 1] == ` ` { - println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`') - } - return s.new_token(.lpar, '', 1) - } - `)` { - // TODO `$if vet {` for performance - if s.pref.is_vet && s.text[s.pos - 1] == ` ` { - println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`') - } - return s.new_token(.rpar, '', 1) - } - `[` { - return s.new_token(.lsbr, '', 1) - } - `]` { - return s.new_token(.rsbr, '', 1) - } - `{` { - // Skip { in `${` in strings - if s.is_inside_string { - return s.scan() - } - return s.new_token(.lcbr, '', 1) - } - `$` { - if s.is_inside_string { - return s.new_token(.str_dollar, '', 1) - } else { - return s.new_token(.dollar, '', 1) - } - } - `}` { - // s = `hello $name !` - // s = `hello ${name} !` - if s.is_inside_string { - s.pos++ - if s.text[s.pos] == s.quote { - s.is_inside_string = false - return s.new_token(.string, '', 1) - } - ident_string := s.ident_string() - return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes - } else { - return s.new_token(.rcbr, '', 1) - } - } - `&` { - if nextc == `=` { - s.pos++ - return s.new_token(.and_assign, '', 2) - } - afternextc := s.look_ahead(2) - if nextc == `&` && afternextc.is_space() { - s.pos++ - return s.new_token(.and, '', 2) - } - return s.new_token(.amp, '', 1) - } - `|` { - if nextc == `|` { - s.pos++ - return s.new_token(.logical_or, '', 2) - } - if nextc == `=` { - s.pos++ - return s.new_token(.or_assign, '', 2) - } - return s.new_token(.pipe, '', 1) - } - `,` { - return s.new_token(.comma, '', 1) - } - `@` { - s.pos++ + // handle each char + c := s.text[s.pos] + nextc := s.look_ahead(1) + // name or keyword + if util.is_name_char(c) { name := s.ident_name() - if s.is_fmt { - return s.new_token(.name, '@' + name, name.len + 1) - } - // @FN => will be substituted with the name of the current V function - // @MOD => will be substituted with the name of the current V module - // @STRUCT => will be substituted with the name of the current V struct - // @VEXE => will be substituted with the path to the V compiler - // @FILE => will be substituted with the path of the V source file - // @LINE => will be substituted with the V line number where it appears (as a string). - // @COLUMN => will be substituted with the column where it appears (as a string). - // @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string). - // @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string). - // This allows things like this: - // println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN) - // ... which is useful while debugging/tracing - if name == 'FN' { - return s.new_token(.string, s.fn_name, 3) - } - if name == 'MOD' { - return s.new_token(.string, s.mod_name, 4) - } - if name == 'STRUCT' { - return s.new_token(.string, s.struct_name, 7) - } - if name == 'VEXE' { - vexe := pref.vexe_path() - return s.new_token(.string, util.cescaped_path(vexe), 5) - } - if name == 'FILE' { - fpath := os.real_path(s.file_path) - return s.new_token(.string, util.cescaped_path(fpath), 5) - } - if name == 'LINE' { - return s.new_token(.string, (s.line_nr + 1).str(), 5) - } - if name == 'COLUMN' { - return s.new_token(.string, s.current_column().str(), 7) - } - if name == 'VHASH' { - return s.new_token(.string, util.vhash(), 6) - } - if name == 'VMOD_FILE' { - if s.vmod_file_content.len == 0 { - mcache := vmod.get_cache() - vmod_file_location := mcache.get_by_file(s.file_path) - if vmod_file_location.vmod_file.len == 0 { - s.error('@VMOD_FILE can be used only in projects, that have v.mod file') - } - vmod_content := os.read_file(vmod_file_location.vmod_file) or { - '' - } - $if windows { - s.vmod_file_content = vmod_content.replace('\r\n', '\n') - } $else { - s.vmod_file_content = vmod_content - } + // tmp hack to detect . in ${} + // Check if not .eof to prevent panic + next_char := s.look_ahead(1) + kind := token.keywords[name] + if kind != .unknown { + if kind == .key_fn { + s.struct_name = s.ident_struct_name() + s.fn_name = s.ident_fn_name() + } else if kind == .key_module { + s.mod_name = s.ident_mod_name() } - return s.new_token(.string, s.vmod_file_content, 10) + return s.new_token(kind, name, name.len) } - if !token.is_key(name) { - s.error('@ must be used before keywords (e.g. `@type string`)') + // 'asdf $b' => "b" is the last name in the string, dont start parsing string + // at the next ', skip it + if s.is_inside_string { + if next_char == s.quote { + s.is_inter_end = true + s.is_inter_start = false + s.is_inside_string = false + } + } + // end of `$expr` + // allow `'$a.b'` and `'$a.c()'` + if s.is_inter_start && next_char != `.` && next_char != `(` { + s.is_inter_end = true + s.is_inter_start = false + } + if s.pos == 0 && next_char == ` ` { + // If a single letter name at the start of the file, increment + // Otherwise the scanner would be stuck at s.pos = 0 + s.pos++ } return s.new_token(.name, name, name.len) + } else if c.is_digit() || (c == `.` && nextc.is_digit()) { + // `123`, `.123` + if !s.is_inside_string { + // In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed. + mut start_pos := s.pos + for start_pos < s.text.len && s.text[start_pos] == `0` { + start_pos++ + } + mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped + // for 0b, 0o, 0x the heading zero shouldn't be jumped + if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) { + prefix_zero_num-- + } + s.pos += prefix_zero_num // jump these zeros + } + num := s.ident_number() + return s.new_token(.number, num, num.len) } - /* - case `\r`: + // Handle `'$fn()'` + if c == `)` && s.is_inter_start { + next_char := s.look_ahead(1) + if next_char != `.` { + s.is_inter_end = true + s.is_inter_start = false + if next_char == s.quote { + s.is_inside_string = false + } + return s.new_token(.rpar, '', 1) + } + } + // all other tokens + match c { + `+` { + if nextc == `+` { + s.pos++ + return s.new_token(.inc, '', 2) + } else if nextc == `=` { + s.pos++ + return s.new_token(.plus_assign, '', 2) + } + return s.new_token(.plus, '', 1) + } + `-` { + if nextc == `-` { + s.pos++ + return s.new_token(.dec, '', 2) + } else if nextc == `=` { + s.pos++ + return s.new_token(.minus_assign, '', 2) + } + return s.new_token(.minus, '', 1) + } + `*` { + if nextc == `=` { + s.pos++ + return s.new_token(.mult_assign, '', 2) + } + return s.new_token(.mul, '', 1) + } + `^` { + if nextc == `=` { + s.pos++ + return s.new_token(.xor_assign, '', 2) + } + return s.new_token(.xor, '', 1) + } + `%` { + if nextc == `=` { + s.pos++ + return s.new_token(.mod_assign, '', 2) + } + return s.new_token(.mod, '', 1) + } + `?` { + return s.new_token(.question, '', 1) + } + single_quote, double_quote { + ident_string := s.ident_string() + return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes + } + `\`` { + // ` // apostrophe balance comment. do not remove + ident_char := s.ident_char() + return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes + } + `(` { + // TODO `$if vet {` for performance + if s.pref.is_vet && s.text[s.pos + 1] == ` ` { + println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`') + } + return s.new_token(.lpar, '', 1) + } + `)` { + // TODO `$if vet {` for performance + if s.pref.is_vet && s.text[s.pos - 1] == ` ` { + println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`') + } + return s.new_token(.rpar, '', 1) + } + `[` { + return s.new_token(.lsbr, '', 1) + } + `]` { + return s.new_token(.rsbr, '', 1) + } + `{` { + // Skip { in `${` in strings + if s.is_inside_string { + continue + } + return s.new_token(.lcbr, '', 1) + } + `$` { + if s.is_inside_string { + return s.new_token(.str_dollar, '', 1) + } else { + return s.new_token(.dollar, '', 1) + } + } + `}` { + // s = `hello $name !` + // s = `hello ${name} !` + if s.is_inside_string { + s.pos++ + if s.text[s.pos] == s.quote { + s.is_inside_string = false + return s.new_token(.string, '', 1) + } + ident_string := s.ident_string() + return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes + } else { + return s.new_token(.rcbr, '', 1) + } + } + `&` { + if nextc == `=` { + s.pos++ + return s.new_token(.and_assign, '', 2) + } + afternextc := s.look_ahead(2) + if nextc == `&` && afternextc.is_space() { + s.pos++ + return s.new_token(.and, '', 2) + } + return s.new_token(.amp, '', 1) + } + `|` { + if nextc == `|` { + s.pos++ + return s.new_token(.logical_or, '', 2) + } + if nextc == `=` { + s.pos++ + return s.new_token(.or_assign, '', 2) + } + return s.new_token(.pipe, '', 1) + } + `,` { + return s.new_token(.comma, '', 1) + } + `@` { + s.pos++ + name := s.ident_name() + if s.is_fmt { + return s.new_token(.name, '@' + name, name.len + 1) + } + // @FN => will be substituted with the name of the current V function + // @MOD => will be substituted with the name of the current V module + // @STRUCT => will be substituted with the name of the current V struct + // @VEXE => will be substituted with the path to the V compiler + // @FILE => will be substituted with the path of the V source file + // @LINE => will be substituted with the V line number where it appears (as a string). + // @COLUMN => will be substituted with the column where it appears (as a string). + // @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string). + // @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string). + // This allows things like this: + // println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN) + // ... which is useful while debugging/tracing + if name == 'FN' { + return s.new_token(.string, s.fn_name, 3) + } + if name == 'MOD' { + return s.new_token(.string, s.mod_name, 4) + } + if name == 'STRUCT' { + return s.new_token(.string, s.struct_name, 7) + } + if name == 'VEXE' { + vexe := pref.vexe_path() + return s.new_token(.string, util.cescaped_path(vexe), 5) + } + if name == 'FILE' { + fpath := os.real_path(s.file_path) + return s.new_token(.string, util.cescaped_path(fpath), 5) + } + if name == 'LINE' { + return s.new_token(.string, (s.line_nr + 1).str(), 5) + } + if name == 'COLUMN' { + return s.new_token(.string, s.current_column().str(), 7) + } + if name == 'VHASH' { + return s.new_token(.string, util.vhash(), 6) + } + if name == 'VMOD_FILE' { + if s.vmod_file_content.len == 0 { + mcache := vmod.get_cache() + vmod_file_location := mcache.get_by_file(s.file_path) + if vmod_file_location.vmod_file.len == 0 { + s.error('@VMOD_FILE can be used only in projects, that have v.mod file') + } + vmod_content := os.read_file(vmod_file_location.vmod_file) or { + '' + } + $if windows { + s.vmod_file_content = vmod_content.replace('\r\n', '\n') + } $else { + s.vmod_file_content = vmod_content + } + } + return s.new_token(.string, s.vmod_file_content, 10) + } + if !token.is_key(name) { + s.error('@ must be used before keywords (e.g. `@type string`)') + } + return s.new_token(.name, name, name.len) + } + /* + case `\r`: if nextc == `\n` { s.pos++ s.last_nl_pos = s.pos @@ -925,184 +932,186 @@ fn (mut s Scanner) text_scan() token.Token { s.last_nl_pos = s.pos return s.new_token(.nl, '') } - */ - `.` { - if nextc == `.` { - s.pos++ - if s.text[s.pos + 1] == `.` { + */ + `.` { + if nextc == `.` { s.pos++ - return s.new_token(.ellipsis, '', 3) + if s.text[s.pos + 1] == `.` { + s.pos++ + return s.new_token(.ellipsis, '', 3) + } + return s.new_token(.dotdot, '', 2) } - return s.new_token(.dotdot, '', 2) + return s.new_token(.dot, '', 1) } - return s.new_token(.dot, '', 1) - } - `#` { - start := s.pos + 1 - s.ignore_line() - if nextc == `!` { - // treat shebang line (#!) as a comment - s.line_comment = s.text[start + 1..s.pos].trim_space() - // s.fgenln('// shebang line "$s.line_comment"') - return s.scan() - } - hash := s.text[start..s.pos].trim_space() - return s.new_token(.hash, hash, hash.len) - } - `>` { - if nextc == `=` { - s.pos++ - return s.new_token(.ge, '', 2) - } else if nextc == `>` { - if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { - s.pos += 2 - return s.new_token(.right_shift_assign, '', 3) - } - s.pos++ - return s.new_token(.right_shift, '', 2) - } else { - return s.new_token(.gt, '', 1) - } - } - 0xE2 { - if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 { - // case `≠`: - s.pos += 2 - return s.new_token(.ne, '', 3) - } else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD { - s.pos += 2 - return s.new_token(.le, '', 3) - } else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE { - s.pos += 2 - return s.new_token(.ge, '', 3) - } - } - `<` { - if nextc == `=` { - s.pos++ - return s.new_token(.le, '', 2) - } else if nextc == `<` { - if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { - s.pos += 2 - return s.new_token(.left_shift_assign, '', 3) - } - s.pos++ - return s.new_token(.left_shift, '', 2) - } else { - return s.new_token(.lt, '', 1) - } - } - `=` { - if nextc == `=` { - s.pos++ - return s.new_token(.eq, '', 2) - } else if nextc == `>` { - s.pos++ - return s.new_token(.arrow, '', 2) - } else { - return s.new_token(.assign, '', 1) - } - } - `:` { - if nextc == `=` { - s.pos++ - return s.new_token(.decl_assign, '', 2) - } else { - return s.new_token(.colon, '', 1) - } - } - `;` { - return s.new_token(.semicolon, '', 1) - } - `!` { - if nextc == `=` { - s.pos++ - return s.new_token(.ne, '', 2) - } else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() { - s.pos += 2 - return s.new_token(.not_in, '', 3) - } else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() { - s.pos += 2 - return s.new_token(.not_is, '', 3) - } else { - return s.new_token(.not, '', 1) - } - } - `~` { - return s.new_token(.bit_not, '', 1) - } - `/` { - if nextc == `=` { - s.pos++ - return s.new_token(.div_assign, '', 2) - } - if nextc == `/` { + `#` { start := s.pos + 1 s.ignore_line() - s.line_comment = s.text[start + 1..s.pos] - mut comment := s.line_comment.trim_space() - s.pos-- - // fix line_nr, \n was read, and the comment is marked - // on the next line - s.line_nr-- - if s.should_parse_comment() { - // Find out if this comment is on its own line (for vfmt) - mut is_separate_line_comment := true - for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- { - if s.text[j] !in [`\t`, ` `] { - is_separate_line_comment = false + if nextc == `!` { + // treat shebang line (#!) as a comment + s.line_comment = s.text[start + 1..s.pos].trim_space() + // s.fgenln('// shebang line "$s.line_comment"') + continue + } + hash := s.text[start..s.pos].trim_space() + return s.new_token(.hash, hash, hash.len) + } + `>` { + if nextc == `=` { + s.pos++ + return s.new_token(.ge, '', 2) + } else if nextc == `>` { + if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { + s.pos += 2 + return s.new_token(.right_shift_assign, '', 3) + } + s.pos++ + return s.new_token(.right_shift, '', 2) + } else { + return s.new_token(.gt, '', 1) + } + } + 0xE2 { + if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 { + // case `≠`: + s.pos += 2 + return s.new_token(.ne, '', 3) + } else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD { + s.pos += 2 + return s.new_token(.le, '', 3) + } else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE { + s.pos += 2 + return s.new_token(.ge, '', 3) + } + } + `<` { + if nextc == `=` { + s.pos++ + return s.new_token(.le, '', 2) + } else if nextc == `<` { + if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { + s.pos += 2 + return s.new_token(.left_shift_assign, '', 3) + } + s.pos++ + return s.new_token(.left_shift, '', 2) + } else { + return s.new_token(.lt, '', 1) + } + } + `=` { + if nextc == `=` { + s.pos++ + return s.new_token(.eq, '', 2) + } else if nextc == `>` { + s.pos++ + return s.new_token(.arrow, '', 2) + } else { + return s.new_token(.assign, '', 1) + } + } + `:` { + if nextc == `=` { + s.pos++ + return s.new_token(.decl_assign, '', 2) + } else { + return s.new_token(.colon, '', 1) + } + } + `;` { + return s.new_token(.semicolon, '', 1) + } + `!` { + if nextc == `=` { + s.pos++ + return s.new_token(.ne, '', 2) + } else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() { + s.pos += 2 + return s.new_token(.not_in, '', 3) + } else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() { + s.pos += 2 + return s.new_token(.not_is, '', 3) + } else { + return s.new_token(.not, '', 1) + } + } + `~` { + return s.new_token(.bit_not, '', 1) + } + `/` { + if nextc == `=` { + s.pos++ + return s.new_token(.div_assign, '', 2) + } + if nextc == `/` { + start := s.pos + 1 + s.ignore_line() + s.line_comment = s.text[start + 1..s.pos] + mut comment := s.line_comment.trim_space() + s.pos-- + // fix line_nr, \n was read, and the comment is marked + // on the next line + s.line_nr-- + if s.should_parse_comment() { + // Find out if this comment is on its own line (for vfmt) + mut is_separate_line_comment := true + for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- { + if s.text[j] !in [`\t`, ` `] { + is_separate_line_comment = false + } + } + if is_separate_line_comment { + comment = '|' + comment + } + return s.new_token(.comment, comment, comment.len + 2) + } + // s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"') + // Skip the comment (return the next token) + continue + } + // Multiline comments + if nextc == `*` { + start := s.pos + 2 + mut nest_count := 1 + // Skip comment + for nest_count > 0 { + s.pos++ + if s.pos >= s.text.len { + s.line_nr-- + s.error('comment not terminated') + } + if s.text[s.pos] == `\n` { + s.inc_line_number() + continue + } + if s.expect('/*', s.pos) { + nest_count++ + continue + } + if s.expect('*/', s.pos) { + nest_count-- } } - if is_separate_line_comment { - comment = '|' + comment - } - return s.new_token(.comment, comment, comment.len + 2) - } - // s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"') - // Skip the comment (return the next token) - return s.scan() - } - // Multiline comments - if nextc == `*` { - start := s.pos + 2 - mut nest_count := 1 - // Skip comment - for nest_count > 0 { s.pos++ - if s.pos >= s.text.len { - s.line_nr-- - s.error('comment not terminated') - } - if s.text[s.pos] == `\n` { - s.inc_line_number() - continue - } - if s.expect('/*', s.pos) { - nest_count++ - continue - } - if s.expect('*/', s.pos) { - nest_count-- + if s.should_parse_comment() { + comment := s.text[start..(s.pos - 1)].trim_space() + return s.new_token(.comment, comment, comment.len + 4) } + // Skip if not in fmt mode + continue } - s.pos++ - if s.should_parse_comment() { - comment := s.text[start..(s.pos - 1)].trim_space() - return s.new_token(.comment, comment, comment.len + 4) - } - // Skip if not in fmt mode - return s.scan() + return s.new_token(.div, '', 1) } - return s.new_token(.div, '', 1) + else {} } - else {} - } - $if windows { - if c == `\0` { - return s.end_of_file() + $if windows { + if c == `\0` { + return s.end_of_file() + } } + s.error('invalid character `$c.str()`') + break } - s.error('invalid character `$c.str()`') return s.end_of_file() }