scanner: parse multiple comments and long strings without a stackoverflow

pull/5809/head
Delyan Angelov 2020-07-11 20:52:05 +03:00
parent a2cb01e16a
commit 022cc72740
1 changed files with 459 additions and 450 deletions

View File

@ -614,307 +614,314 @@ fn (s Scanner) look_ahead(n int) byte {
}
fn (mut s Scanner) text_scan() token.Token {
// if s.comments_mode == .parse_comments {
// println('\nscan()')
// }
// if s.line_comment != '' {
// s.fgenln('// LC "$s.line_comment"')
// s.line_comment = ''
// }
if s.is_started {
s.pos++
}
s.is_started = true
if s.pos >= s.text.len {
return s.end_of_file()
}
if !s.is_inside_string {
s.skip_whitespace()
}
// End of $var, start next string
if s.is_inter_end {
if s.text[s.pos] == s.quote {
s.is_inter_end = false
return s.new_token(.string, '', 1)
}
s.is_inter_end = false
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
}
s.skip_whitespace()
// end of file
if s.pos >= s.text.len {
return s.end_of_file()
}
// handle each char
c := s.text[s.pos]
nextc := s.look_ahead(1)
// name or keyword
if util.is_name_char(c) {
name := s.ident_name()
// tmp hack to detect . in ${}
// Check if not .eof to prevent panic
next_char := s.look_ahead(1)
kind := token.keywords[name]
if kind != .unknown {
if kind == .key_fn {
s.struct_name = s.ident_struct_name()
s.fn_name = s.ident_fn_name()
} else if kind == .key_module {
s.mod_name = s.ident_mod_name()
}
return s.new_token(kind, name, name.len)
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
if s.is_inside_string {
if next_char == s.quote {
s.is_inter_end = true
s.is_inter_start = false
s.is_inside_string = false
}
}
// end of `$expr`
// allow `'$a.b'` and `'$a.c()'`
if s.is_inter_start && next_char != `.` && next_char != `(` {
s.is_inter_end = true
s.is_inter_start = false
}
if s.pos == 0 && next_char == ` ` {
// If a single letter name at the start of the file, increment
// Otherwise the scanner would be stuck at s.pos = 0
// The for loop here is so that instead of doing
// `return s.scan()` (which will use a new call stack frame),
// text_scan can just do continue, keeping
// memory & stack usage low.
// That optimization mostly matters for long sections
// of comments and string literals.
for {
// if s.comments_mode == .parse_comments {
// println('\nscan()')
// }
// if s.line_comment != '' {
// s.fgenln('// LC "$s.line_comment"')
// s.line_comment = ''
// }
if s.is_started {
s.pos++
}
return s.new_token(.name, name, name.len)
} else if c.is_digit() || (c == `.` && nextc.is_digit()) {
// `123`, `.123`
s.is_started = true
if s.pos >= s.text.len {
return s.end_of_file()
}
if !s.is_inside_string {
// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
mut start_pos := s.pos
for start_pos < s.text.len && s.text[start_pos] == `0` {
start_pos++
}
mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
// for 0b, 0o, 0x the heading zero shouldn't be jumped
if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
prefix_zero_num--
}
s.pos += prefix_zero_num // jump these zeros
s.skip_whitespace()
}
num := s.ident_number()
return s.new_token(.number, num, num.len)
}
// Handle `'$fn()'`
if c == `)` && s.is_inter_start {
next_char := s.look_ahead(1)
if next_char != `.` {
s.is_inter_end = true
s.is_inter_start = false
if next_char == s.quote {
s.is_inside_string = false
// End of $var, start next string
if s.is_inter_end {
if s.text[s.pos] == s.quote {
s.is_inter_end = false
return s.new_token(.string, '', 1)
}
return s.new_token(.rpar, '', 1)
}
}
// all other tokens
match c {
`+` {
if nextc == `+` {
s.pos++
return s.new_token(.inc, '', 2)
} else if nextc == `=` {
s.pos++
return s.new_token(.plus_assign, '', 2)
}
return s.new_token(.plus, '', 1)
}
`-` {
if nextc == `-` {
s.pos++
return s.new_token(.dec, '', 2)
} else if nextc == `=` {
s.pos++
return s.new_token(.minus_assign, '', 2)
}
return s.new_token(.minus, '', 1)
}
`*` {
if nextc == `=` {
s.pos++
return s.new_token(.mult_assign, '', 2)
}
return s.new_token(.mul, '', 1)
}
`^` {
if nextc == `=` {
s.pos++
return s.new_token(.xor_assign, '', 2)
}
return s.new_token(.xor, '', 1)
}
`%` {
if nextc == `=` {
s.pos++
return s.new_token(.mod_assign, '', 2)
}
return s.new_token(.mod, '', 1)
}
`?` {
return s.new_token(.question, '', 1)
}
single_quote, double_quote {
s.is_inter_end = false
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
}
`\`` {
// ` // apostrophe balance comment. do not remove
ident_char := s.ident_char()
return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
s.skip_whitespace()
// end of file
if s.pos >= s.text.len {
return s.end_of_file()
}
`(` {
// TODO `$if vet {` for performance
if s.pref.is_vet && s.text[s.pos + 1] == ` ` {
println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`')
}
return s.new_token(.lpar, '', 1)
}
`)` {
// TODO `$if vet {` for performance
if s.pref.is_vet && s.text[s.pos - 1] == ` ` {
println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`')
}
return s.new_token(.rpar, '', 1)
}
`[` {
return s.new_token(.lsbr, '', 1)
}
`]` {
return s.new_token(.rsbr, '', 1)
}
`{` {
// Skip { in `${` in strings
if s.is_inside_string {
return s.scan()
}
return s.new_token(.lcbr, '', 1)
}
`$` {
if s.is_inside_string {
return s.new_token(.str_dollar, '', 1)
} else {
return s.new_token(.dollar, '', 1)
}
}
`}` {
// s = `hello $name !`
// s = `hello ${name} !`
if s.is_inside_string {
s.pos++
if s.text[s.pos] == s.quote {
s.is_inside_string = false
return s.new_token(.string, '', 1)
}
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
} else {
return s.new_token(.rcbr, '', 1)
}
}
`&` {
if nextc == `=` {
s.pos++
return s.new_token(.and_assign, '', 2)
}
afternextc := s.look_ahead(2)
if nextc == `&` && afternextc.is_space() {
s.pos++
return s.new_token(.and, '', 2)
}
return s.new_token(.amp, '', 1)
}
`|` {
if nextc == `|` {
s.pos++
return s.new_token(.logical_or, '', 2)
}
if nextc == `=` {
s.pos++
return s.new_token(.or_assign, '', 2)
}
return s.new_token(.pipe, '', 1)
}
`,` {
return s.new_token(.comma, '', 1)
}
`@` {
s.pos++
// handle each char
c := s.text[s.pos]
nextc := s.look_ahead(1)
// name or keyword
if util.is_name_char(c) {
name := s.ident_name()
if s.is_fmt {
return s.new_token(.name, '@' + name, name.len + 1)
}
// @FN => will be substituted with the name of the current V function
// @MOD => will be substituted with the name of the current V module
// @STRUCT => will be substituted with the name of the current V struct
// @VEXE => will be substituted with the path to the V compiler
// @FILE => will be substituted with the path of the V source file
// @LINE => will be substituted with the V line number where it appears (as a string).
// @COLUMN => will be substituted with the column where it appears (as a string).
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
// This allows things like this:
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
// ... which is useful while debugging/tracing
if name == 'FN' {
return s.new_token(.string, s.fn_name, 3)
}
if name == 'MOD' {
return s.new_token(.string, s.mod_name, 4)
}
if name == 'STRUCT' {
return s.new_token(.string, s.struct_name, 7)
}
if name == 'VEXE' {
vexe := pref.vexe_path()
return s.new_token(.string, util.cescaped_path(vexe), 5)
}
if name == 'FILE' {
fpath := os.real_path(s.file_path)
return s.new_token(.string, util.cescaped_path(fpath), 5)
}
if name == 'LINE' {
return s.new_token(.string, (s.line_nr + 1).str(), 5)
}
if name == 'COLUMN' {
return s.new_token(.string, s.current_column().str(), 7)
}
if name == 'VHASH' {
return s.new_token(.string, util.vhash(), 6)
}
if name == 'VMOD_FILE' {
if s.vmod_file_content.len == 0 {
mcache := vmod.get_cache()
vmod_file_location := mcache.get_by_file(s.file_path)
if vmod_file_location.vmod_file.len == 0 {
s.error('@VMOD_FILE can be used only in projects, that have v.mod file')
}
vmod_content := os.read_file(vmod_file_location.vmod_file) or {
''
}
$if windows {
s.vmod_file_content = vmod_content.replace('\r\n', '\n')
} $else {
s.vmod_file_content = vmod_content
}
// tmp hack to detect . in ${}
// Check if not .eof to prevent panic
next_char := s.look_ahead(1)
kind := token.keywords[name]
if kind != .unknown {
if kind == .key_fn {
s.struct_name = s.ident_struct_name()
s.fn_name = s.ident_fn_name()
} else if kind == .key_module {
s.mod_name = s.ident_mod_name()
}
return s.new_token(.string, s.vmod_file_content, 10)
return s.new_token(kind, name, name.len)
}
if !token.is_key(name) {
s.error('@ must be used before keywords (e.g. `@type string`)')
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
if s.is_inside_string {
if next_char == s.quote {
s.is_inter_end = true
s.is_inter_start = false
s.is_inside_string = false
}
}
// end of `$expr`
// allow `'$a.b'` and `'$a.c()'`
if s.is_inter_start && next_char != `.` && next_char != `(` {
s.is_inter_end = true
s.is_inter_start = false
}
if s.pos == 0 && next_char == ` ` {
// If a single letter name at the start of the file, increment
// Otherwise the scanner would be stuck at s.pos = 0
s.pos++
}
return s.new_token(.name, name, name.len)
} else if c.is_digit() || (c == `.` && nextc.is_digit()) {
// `123`, `.123`
if !s.is_inside_string {
// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
mut start_pos := s.pos
for start_pos < s.text.len && s.text[start_pos] == `0` {
start_pos++
}
mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
// for 0b, 0o, 0x the heading zero shouldn't be jumped
if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
prefix_zero_num--
}
s.pos += prefix_zero_num // jump these zeros
}
num := s.ident_number()
return s.new_token(.number, num, num.len)
}
/*
case `\r`:
// Handle `'$fn()'`
if c == `)` && s.is_inter_start {
next_char := s.look_ahead(1)
if next_char != `.` {
s.is_inter_end = true
s.is_inter_start = false
if next_char == s.quote {
s.is_inside_string = false
}
return s.new_token(.rpar, '', 1)
}
}
// all other tokens
match c {
`+` {
if nextc == `+` {
s.pos++
return s.new_token(.inc, '', 2)
} else if nextc == `=` {
s.pos++
return s.new_token(.plus_assign, '', 2)
}
return s.new_token(.plus, '', 1)
}
`-` {
if nextc == `-` {
s.pos++
return s.new_token(.dec, '', 2)
} else if nextc == `=` {
s.pos++
return s.new_token(.minus_assign, '', 2)
}
return s.new_token(.minus, '', 1)
}
`*` {
if nextc == `=` {
s.pos++
return s.new_token(.mult_assign, '', 2)
}
return s.new_token(.mul, '', 1)
}
`^` {
if nextc == `=` {
s.pos++
return s.new_token(.xor_assign, '', 2)
}
return s.new_token(.xor, '', 1)
}
`%` {
if nextc == `=` {
s.pos++
return s.new_token(.mod_assign, '', 2)
}
return s.new_token(.mod, '', 1)
}
`?` {
return s.new_token(.question, '', 1)
}
single_quote, double_quote {
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
}
`\`` {
// ` // apostrophe balance comment. do not remove
ident_char := s.ident_char()
return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
}
`(` {
// TODO `$if vet {` for performance
if s.pref.is_vet && s.text[s.pos + 1] == ` ` {
println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`')
}
return s.new_token(.lpar, '', 1)
}
`)` {
// TODO `$if vet {` for performance
if s.pref.is_vet && s.text[s.pos - 1] == ` ` {
println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`')
}
return s.new_token(.rpar, '', 1)
}
`[` {
return s.new_token(.lsbr, '', 1)
}
`]` {
return s.new_token(.rsbr, '', 1)
}
`{` {
// Skip { in `${` in strings
if s.is_inside_string {
continue
}
return s.new_token(.lcbr, '', 1)
}
`$` {
if s.is_inside_string {
return s.new_token(.str_dollar, '', 1)
} else {
return s.new_token(.dollar, '', 1)
}
}
`}` {
// s = `hello $name !`
// s = `hello ${name} !`
if s.is_inside_string {
s.pos++
if s.text[s.pos] == s.quote {
s.is_inside_string = false
return s.new_token(.string, '', 1)
}
ident_string := s.ident_string()
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
} else {
return s.new_token(.rcbr, '', 1)
}
}
`&` {
if nextc == `=` {
s.pos++
return s.new_token(.and_assign, '', 2)
}
afternextc := s.look_ahead(2)
if nextc == `&` && afternextc.is_space() {
s.pos++
return s.new_token(.and, '', 2)
}
return s.new_token(.amp, '', 1)
}
`|` {
if nextc == `|` {
s.pos++
return s.new_token(.logical_or, '', 2)
}
if nextc == `=` {
s.pos++
return s.new_token(.or_assign, '', 2)
}
return s.new_token(.pipe, '', 1)
}
`,` {
return s.new_token(.comma, '', 1)
}
`@` {
s.pos++
name := s.ident_name()
if s.is_fmt {
return s.new_token(.name, '@' + name, name.len + 1)
}
// @FN => will be substituted with the name of the current V function
// @MOD => will be substituted with the name of the current V module
// @STRUCT => will be substituted with the name of the current V struct
// @VEXE => will be substituted with the path to the V compiler
// @FILE => will be substituted with the path of the V source file
// @LINE => will be substituted with the V line number where it appears (as a string).
// @COLUMN => will be substituted with the column where it appears (as a string).
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
// This allows things like this:
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
// ... which is useful while debugging/tracing
if name == 'FN' {
return s.new_token(.string, s.fn_name, 3)
}
if name == 'MOD' {
return s.new_token(.string, s.mod_name, 4)
}
if name == 'STRUCT' {
return s.new_token(.string, s.struct_name, 7)
}
if name == 'VEXE' {
vexe := pref.vexe_path()
return s.new_token(.string, util.cescaped_path(vexe), 5)
}
if name == 'FILE' {
fpath := os.real_path(s.file_path)
return s.new_token(.string, util.cescaped_path(fpath), 5)
}
if name == 'LINE' {
return s.new_token(.string, (s.line_nr + 1).str(), 5)
}
if name == 'COLUMN' {
return s.new_token(.string, s.current_column().str(), 7)
}
if name == 'VHASH' {
return s.new_token(.string, util.vhash(), 6)
}
if name == 'VMOD_FILE' {
if s.vmod_file_content.len == 0 {
mcache := vmod.get_cache()
vmod_file_location := mcache.get_by_file(s.file_path)
if vmod_file_location.vmod_file.len == 0 {
s.error('@VMOD_FILE can be used only in projects, that have v.mod file')
}
vmod_content := os.read_file(vmod_file_location.vmod_file) or {
''
}
$if windows {
s.vmod_file_content = vmod_content.replace('\r\n', '\n')
} $else {
s.vmod_file_content = vmod_content
}
}
return s.new_token(.string, s.vmod_file_content, 10)
}
if !token.is_key(name) {
s.error('@ must be used before keywords (e.g. `@type string`)')
}
return s.new_token(.name, name, name.len)
}
/*
case `\r`:
if nextc == `\n` {
s.pos++
s.last_nl_pos = s.pos
@ -925,184 +932,186 @@ fn (mut s Scanner) text_scan() token.Token {
s.last_nl_pos = s.pos
return s.new_token(.nl, '')
}
*/
`.` {
if nextc == `.` {
s.pos++
if s.text[s.pos + 1] == `.` {
*/
`.` {
if nextc == `.` {
s.pos++
return s.new_token(.ellipsis, '', 3)
if s.text[s.pos + 1] == `.` {
s.pos++
return s.new_token(.ellipsis, '', 3)
}
return s.new_token(.dotdot, '', 2)
}
return s.new_token(.dotdot, '', 2)
return s.new_token(.dot, '', 1)
}
return s.new_token(.dot, '', 1)
}
`#` {
start := s.pos + 1
s.ignore_line()
if nextc == `!` {
// treat shebang line (#!) as a comment
s.line_comment = s.text[start + 1..s.pos].trim_space()
// s.fgenln('// shebang line "$s.line_comment"')
return s.scan()
}
hash := s.text[start..s.pos].trim_space()
return s.new_token(.hash, hash, hash.len)
}
`>` {
if nextc == `=` {
s.pos++
return s.new_token(.ge, '', 2)
} else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.right_shift_assign, '', 3)
}
s.pos++
return s.new_token(.right_shift, '', 2)
} else {
return s.new_token(.gt, '', 1)
}
}
0xE2 {
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
// case `≠`:
s.pos += 2
return s.new_token(.ne, '', 3)
} else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
s.pos += 2
return s.new_token(.le, '', 3)
} else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
return s.new_token(.ge, '', 3)
}
}
`<` {
if nextc == `=` {
s.pos++
return s.new_token(.le, '', 2)
} else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.left_shift_assign, '', 3)
}
s.pos++
return s.new_token(.left_shift, '', 2)
} else {
return s.new_token(.lt, '', 1)
}
}
`=` {
if nextc == `=` {
s.pos++
return s.new_token(.eq, '', 2)
} else if nextc == `>` {
s.pos++
return s.new_token(.arrow, '', 2)
} else {
return s.new_token(.assign, '', 1)
}
}
`:` {
if nextc == `=` {
s.pos++
return s.new_token(.decl_assign, '', 2)
} else {
return s.new_token(.colon, '', 1)
}
}
`;` {
return s.new_token(.semicolon, '', 1)
}
`!` {
if nextc == `=` {
s.pos++
return s.new_token(.ne, '', 2)
} else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() {
s.pos += 2
return s.new_token(.not_in, '', 3)
} else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() {
s.pos += 2
return s.new_token(.not_is, '', 3)
} else {
return s.new_token(.not, '', 1)
}
}
`~` {
return s.new_token(.bit_not, '', 1)
}
`/` {
if nextc == `=` {
s.pos++
return s.new_token(.div_assign, '', 2)
}
if nextc == `/` {
`#` {
start := s.pos + 1
s.ignore_line()
s.line_comment = s.text[start + 1..s.pos]
mut comment := s.line_comment.trim_space()
s.pos--
// fix line_nr, \n was read, and the comment is marked
// on the next line
s.line_nr--
if s.should_parse_comment() {
// Find out if this comment is on its own line (for vfmt)
mut is_separate_line_comment := true
for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- {
if s.text[j] !in [`\t`, ` `] {
is_separate_line_comment = false
if nextc == `!` {
// treat shebang line (#!) as a comment
s.line_comment = s.text[start + 1..s.pos].trim_space()
// s.fgenln('// shebang line "$s.line_comment"')
continue
}
hash := s.text[start..s.pos].trim_space()
return s.new_token(.hash, hash, hash.len)
}
`>` {
if nextc == `=` {
s.pos++
return s.new_token(.ge, '', 2)
} else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.right_shift_assign, '', 3)
}
s.pos++
return s.new_token(.right_shift, '', 2)
} else {
return s.new_token(.gt, '', 1)
}
}
0xE2 {
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
// case `≠`:
s.pos += 2
return s.new_token(.ne, '', 3)
} else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
s.pos += 2
return s.new_token(.le, '', 3)
} else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
return s.new_token(.ge, '', 3)
}
}
`<` {
if nextc == `=` {
s.pos++
return s.new_token(.le, '', 2)
} else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return s.new_token(.left_shift_assign, '', 3)
}
s.pos++
return s.new_token(.left_shift, '', 2)
} else {
return s.new_token(.lt, '', 1)
}
}
`=` {
if nextc == `=` {
s.pos++
return s.new_token(.eq, '', 2)
} else if nextc == `>` {
s.pos++
return s.new_token(.arrow, '', 2)
} else {
return s.new_token(.assign, '', 1)
}
}
`:` {
if nextc == `=` {
s.pos++
return s.new_token(.decl_assign, '', 2)
} else {
return s.new_token(.colon, '', 1)
}
}
`;` {
return s.new_token(.semicolon, '', 1)
}
`!` {
if nextc == `=` {
s.pos++
return s.new_token(.ne, '', 2)
} else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() {
s.pos += 2
return s.new_token(.not_in, '', 3)
} else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() {
s.pos += 2
return s.new_token(.not_is, '', 3)
} else {
return s.new_token(.not, '', 1)
}
}
`~` {
return s.new_token(.bit_not, '', 1)
}
`/` {
if nextc == `=` {
s.pos++
return s.new_token(.div_assign, '', 2)
}
if nextc == `/` {
start := s.pos + 1
s.ignore_line()
s.line_comment = s.text[start + 1..s.pos]
mut comment := s.line_comment.trim_space()
s.pos--
// fix line_nr, \n was read, and the comment is marked
// on the next line
s.line_nr--
if s.should_parse_comment() {
// Find out if this comment is on its own line (for vfmt)
mut is_separate_line_comment := true
for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- {
if s.text[j] !in [`\t`, ` `] {
is_separate_line_comment = false
}
}
if is_separate_line_comment {
comment = '|' + comment
}
return s.new_token(.comment, comment, comment.len + 2)
}
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token)
continue
}
// Multiline comments
if nextc == `*` {
start := s.pos + 2
mut nest_count := 1
// Skip comment
for nest_count > 0 {
s.pos++
if s.pos >= s.text.len {
s.line_nr--
s.error('comment not terminated')
}
if s.text[s.pos] == `\n` {
s.inc_line_number()
continue
}
if s.expect('/*', s.pos) {
nest_count++
continue
}
if s.expect('*/', s.pos) {
nest_count--
}
}
if is_separate_line_comment {
comment = '|' + comment
}
return s.new_token(.comment, comment, comment.len + 2)
}
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token)
return s.scan()
}
// Multiline comments
if nextc == `*` {
start := s.pos + 2
mut nest_count := 1
// Skip comment
for nest_count > 0 {
s.pos++
if s.pos >= s.text.len {
s.line_nr--
s.error('comment not terminated')
}
if s.text[s.pos] == `\n` {
s.inc_line_number()
continue
}
if s.expect('/*', s.pos) {
nest_count++
continue
}
if s.expect('*/', s.pos) {
nest_count--
if s.should_parse_comment() {
comment := s.text[start..(s.pos - 1)].trim_space()
return s.new_token(.comment, comment, comment.len + 4)
}
// Skip if not in fmt mode
continue
}
s.pos++
if s.should_parse_comment() {
comment := s.text[start..(s.pos - 1)].trim_space()
return s.new_token(.comment, comment, comment.len + 4)
}
// Skip if not in fmt mode
return s.scan()
return s.new_token(.div, '', 1)
}
return s.new_token(.div, '', 1)
else {}
}
else {}
}
$if windows {
if c == `\0` {
return s.end_of_file()
$if windows {
if c == `\0` {
return s.end_of_file()
}
}
s.error('invalid character `$c.str()`')
break
}
s.error('invalid character `$c.str()`')
return s.end_of_file()
}