scanner: parse multiple comments and long strings without a stackoverflow

pull/5809/head
Delyan Angelov 2020-07-11 20:52:05 +03:00
parent a2cb01e16a
commit 022cc72740
1 changed files with 459 additions and 450 deletions

View File

@ -614,6 +614,13 @@ fn (s Scanner) look_ahead(n int) byte {
} }
fn (mut s Scanner) text_scan() token.Token { fn (mut s Scanner) text_scan() token.Token {
// The for loop here is so that instead of doing
// `return s.scan()` (which will use a new call stack frame),
// text_scan can just do continue, keeping
// memory & stack usage low.
// That optimization mostly matters for long sections
// of comments and string literals.
for {
// if s.comments_mode == .parse_comments { // if s.comments_mode == .parse_comments {
// println('\nscan()') // println('\nscan()')
// } // }
@ -794,7 +801,7 @@ fn (mut s Scanner) text_scan() token.Token {
`{` { `{` {
// Skip { in `${` in strings // Skip { in `${` in strings
if s.is_inside_string { if s.is_inside_string {
return s.scan() continue
} }
return s.new_token(.lcbr, '', 1) return s.new_token(.lcbr, '', 1)
} }
@ -944,7 +951,7 @@ fn (mut s Scanner) text_scan() token.Token {
// treat shebang line (#!) as a comment // treat shebang line (#!) as a comment
s.line_comment = s.text[start + 1..s.pos].trim_space() s.line_comment = s.text[start + 1..s.pos].trim_space()
// s.fgenln('// shebang line "$s.line_comment"') // s.fgenln('// shebang line "$s.line_comment"')
return s.scan() continue
} }
hash := s.text[start..s.pos].trim_space() hash := s.text[start..s.pos].trim_space()
return s.new_token(.hash, hash, hash.len) return s.new_token(.hash, hash, hash.len)
@ -1060,7 +1067,7 @@ fn (mut s Scanner) text_scan() token.Token {
} }
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"') // s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token) // Skip the comment (return the next token)
return s.scan() continue
} }
// Multiline comments // Multiline comments
if nextc == `*` { if nextc == `*` {
@ -1091,7 +1098,7 @@ fn (mut s Scanner) text_scan() token.Token {
return s.new_token(.comment, comment, comment.len + 4) return s.new_token(.comment, comment, comment.len + 4)
} }
// Skip if not in fmt mode // Skip if not in fmt mode
return s.scan() continue
} }
return s.new_token(.div, '', 1) return s.new_token(.div, '', 1)
} }
@ -1103,6 +1110,8 @@ fn (mut s Scanner) text_scan() token.Token {
} }
} }
s.error('invalid character `$c.str()`') s.error('invalid character `$c.str()`')
break
}
return s.end_of_file() return s.end_of_file()
} }