scanner: parse multiple comments and long strings without a stackoverflow

2020-07-11 20:52:05 +03:00 · 2020-07-11 20:52:05 +03:00 · 022cc72740
parent a2cb01e16a
commit 022cc72740
1 changed files with 459 additions and 450 deletions
--- a/vlib/v/scanner/scanner.v
+++ b/vlib/v/scanner/scanner.v
@ -614,307 +614,314 @@ fn (s Scanner) look_ahead(n int) byte {
 }
 fn (mut s Scanner) text_scan() token.Token {
-	// if s.comments_mode == .parse_comments {
+	// The for loop here is so that instead of doing
-	// println('\nscan()')
+	// `return s.scan()` (which will use a new call stack frame),
-	// }
+	// text_scan can just do continue, keeping
-	// if s.line_comment != '' {
+	// memory & stack usage low.
-	// s.fgenln('// LC "$s.line_comment"')
+	// That optimization mostly matters for long sections
-	// s.line_comment = ''
+	// of comments and string literals.
-	// }
+	for {
-	if s.is_started {
+		// if s.comments_mode == .parse_comments {
-		s.pos++
+		// println('\nscan()')
-	}
+		// }
-	s.is_started = true
+		// if s.line_comment != '' {
-	if s.pos >= s.text.len {
+		// s.fgenln('// LC "$s.line_comment"')
-		return s.end_of_file()
+		// s.line_comment = ''
-	}
+		// }
-	if !s.is_inside_string {
+		if s.is_started {
 		s.skip_whitespace()
 	}
 	// End of $var, start next string
 	if s.is_inter_end {
 		if s.text[s.pos] == s.quote {
 			s.is_inter_end = false
 			return s.new_token(.string, '', 1)
 		}
 		s.is_inter_end = false
 		ident_string := s.ident_string()
 		return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 	}
 	s.skip_whitespace()
 	// end of file
 	if s.pos >= s.text.len {
 		return s.end_of_file()
 	}
 	// handle each char
 	c := s.text[s.pos]
 	nextc := s.look_ahead(1)
 	// name or keyword
 	if util.is_name_char(c) {
 		name := s.ident_name()
 		// tmp hack to detect . in ${}
 		// Check if not .eof to prevent panic
 		next_char := s.look_ahead(1)
 		kind := token.keywords[name]
 		if kind != .unknown {
 			if kind == .key_fn {
 				s.struct_name = s.ident_struct_name()
 				s.fn_name = s.ident_fn_name()
 			} else if kind == .key_module {
 				s.mod_name = s.ident_mod_name()
 			}
 			return s.new_token(kind, name, name.len)
 		}
 		// 'asdf $b' => "b" is the last name in the string, dont start parsing string
 		// at the next ', skip it
 		if s.is_inside_string {
 			if next_char == s.quote {
 				s.is_inter_end = true
 				s.is_inter_start = false
 				s.is_inside_string = false
 			}
 		}
 		// end of `$expr`
 		// allow `'$a.b'` and `'$a.c()'`
 		if s.is_inter_start && next_char != `.` && next_char != `(` {
 			s.is_inter_end = true
 			s.is_inter_start = false
 		}
 		if s.pos == 0 && next_char == ` ` {
 			// If a single letter name at the start of the file, increment
 			// Otherwise the scanner would be stuck at s.pos = 0
 			s.pos++
 		}
-		return s.new_token(.name, name, name.len)
+		s.is_started = true
-	} else if c.is_digit() || (c == `.` && nextc.is_digit()) {
+		if s.pos >= s.text.len {
-		// `123`, `.123`
+			return s.end_of_file()
 		}
 		if !s.is_inside_string {
-			// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
+			s.skip_whitespace()
 			mut start_pos := s.pos
 			for start_pos < s.text.len && s.text[start_pos] == `0` {
 				start_pos++
 			}
 			mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
 			// for 0b, 0o, 0x the heading zero shouldn't be jumped
 			if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
 				prefix_zero_num--
 			}
 			s.pos += prefix_zero_num // jump these zeros
 		}
-		num := s.ident_number()
+		// End of $var, start next string
-		return s.new_token(.number, num, num.len)
+		if s.is_inter_end {
-	}
+			if s.text[s.pos] == s.quote {
-	// Handle `'$fn()'`
+				s.is_inter_end = false
-	if c == `)` && s.is_inter_start {
+				return s.new_token(.string, '', 1)
 		next_char := s.look_ahead(1)
 		if next_char != `.` {
 			s.is_inter_end = true
 			s.is_inter_start = false
 			if next_char == s.quote {
 				s.is_inside_string = false
 			}
-			return s.new_token(.rpar, '', 1)
+			s.is_inter_end = false
 		}
 	}
 	// all other tokens
 	match c {
 		`+` {
 			if nextc == `+` {
 				s.pos++
 				return s.new_token(.inc, '', 2)
 			} else if nextc == `=` {
 				s.pos++
 				return s.new_token(.plus_assign, '', 2)
 			}
 			return s.new_token(.plus, '', 1)
 		}
 		`-` {
 			if nextc == `-` {
 				s.pos++
 				return s.new_token(.dec, '', 2)
 			} else if nextc == `=` {
 				s.pos++
 				return s.new_token(.minus_assign, '', 2)
 			}
 			return s.new_token(.minus, '', 1)
 		}
 		`*` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.mult_assign, '', 2)
 			}
 			return s.new_token(.mul, '', 1)
 		}
 		`^` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.xor_assign, '', 2)
 			}
 			return s.new_token(.xor, '', 1)
 		}
 		`%` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.mod_assign, '', 2)
 			}
 			return s.new_token(.mod, '', 1)
 		}
 		`?` {
 			return s.new_token(.question, '', 1)
 		}
 		single_quote, double_quote {
 			ident_string := s.ident_string()
 			return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 		}
-		`\`` {
+		s.skip_whitespace()
-			// ` // apostrophe balance comment. do not remove
+		// end of file
-			ident_char := s.ident_char()
+		if s.pos >= s.text.len {
-			return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
+			return s.end_of_file()
 		}
-		`(` {
+		// handle each char
-			// TODO `$if vet {` for performance
+		c := s.text[s.pos]
-			if s.pref.is_vet && s.text[s.pos + 1] == ` ` {
+		nextc := s.look_ahead(1)
-				println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`')
+		// name or keyword
-			}
+		if util.is_name_char(c) {
 			return s.new_token(.lpar, '', 1)
 		}
 		`)` {
 			// TODO `$if vet {` for performance
 			if s.pref.is_vet && s.text[s.pos - 1] == ` ` {
 				println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`')
 			}
 			return s.new_token(.rpar, '', 1)
 		}
 		`[` {
 			return s.new_token(.lsbr, '', 1)
 		}
 		`]` {
 			return s.new_token(.rsbr, '', 1)
 		}
 		`{` {
 			// Skip { in `${` in strings
 			if s.is_inside_string {
 				return s.scan()
 			}
 			return s.new_token(.lcbr, '', 1)
 		}
 		`$` {
 			if s.is_inside_string {
 				return s.new_token(.str_dollar, '', 1)
 			} else {
 				return s.new_token(.dollar, '', 1)
 			}
 		}
 		`}` {
 			// s = `hello $name !`
 			// s = `hello ${name} !`
 			if s.is_inside_string {
 				s.pos++
 				if s.text[s.pos] == s.quote {
 					s.is_inside_string = false
 					return s.new_token(.string, '', 1)
 				}
 				ident_string := s.ident_string()
 				return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 			} else {
 				return s.new_token(.rcbr, '', 1)
 			}
 		}
 		`&` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.and_assign, '', 2)
 			}
 			afternextc := s.look_ahead(2)
 			if nextc == `&` && afternextc.is_space() {
 				s.pos++
 				return s.new_token(.and, '', 2)
 			}
 			return s.new_token(.amp, '', 1)
 		}
 		`|` {
 			if nextc == `|` {
 				s.pos++
 				return s.new_token(.logical_or, '', 2)
 			}
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.or_assign, '', 2)
 			}
 			return s.new_token(.pipe, '', 1)
 		}
 		`,` {
 			return s.new_token(.comma, '', 1)
 		}
 		`@` {
 			s.pos++
 			name := s.ident_name()
-			if s.is_fmt {
+			// tmp hack to detect . in ${}
-				return s.new_token(.name, '@' + name, name.len + 1)
+			// Check if not .eof to prevent panic
-			}
+			next_char := s.look_ahead(1)
-			// @FN => will be substituted with the name of the current V function
+			kind := token.keywords[name]
-			// @MOD => will be substituted with the name of the current V module
+			if kind != .unknown {
-			// @STRUCT => will be substituted with the name of the current V struct
+				if kind == .key_fn {
-			// @VEXE => will be substituted with the path to the V compiler
+					s.struct_name = s.ident_struct_name()
-			// @FILE => will be substituted with the path of the V source file
+					s.fn_name = s.ident_fn_name()
-			// @LINE => will be substituted with the V line number where it appears (as a string).
+				} else if kind == .key_module {
-			// @COLUMN => will be substituted with the column where it appears (as a string).
+					s.mod_name = s.ident_mod_name()
 			// @VHASH  => will be substituted with the shortened commit hash of the V compiler (as a string).
 			// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
 			// This allows things like this:
 			// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
 			// ... which is useful while debugging/tracing
 			if name == 'FN' {
 				return s.new_token(.string, s.fn_name, 3)
 			}
 			if name == 'MOD' {
 				return s.new_token(.string, s.mod_name, 4)
 			}
 			if name == 'STRUCT' {
 				return s.new_token(.string, s.struct_name, 7)
 			}
 			if name == 'VEXE' {
 				vexe := pref.vexe_path()
 				return s.new_token(.string, util.cescaped_path(vexe), 5)
 			}
 			if name == 'FILE' {
 				fpath := os.real_path(s.file_path)
 				return s.new_token(.string, util.cescaped_path(fpath), 5)
 			}
 			if name == 'LINE' {
 				return s.new_token(.string, (s.line_nr + 1).str(), 5)
 			}
 			if name == 'COLUMN' {
 				return s.new_token(.string, s.current_column().str(), 7)
 			}
 			if name == 'VHASH' {
 				return s.new_token(.string, util.vhash(), 6)
 			}
 			if name == 'VMOD_FILE' {
 				if s.vmod_file_content.len == 0 {
 					mcache := vmod.get_cache()
 					vmod_file_location := mcache.get_by_file(s.file_path)
 					if vmod_file_location.vmod_file.len == 0 {
 						s.error('@VMOD_FILE can be used only in projects, that have v.mod file')
 					}
 					vmod_content := os.read_file(vmod_file_location.vmod_file) or {
 						''
 					}
 					$if windows {
 						s.vmod_file_content = vmod_content.replace('\r\n', '\n')
 					} $else {
 						s.vmod_file_content = vmod_content
 					}
 				}
-				return s.new_token(.string, s.vmod_file_content, 10)
+				return s.new_token(kind, name, name.len)
 			}
-			if !token.is_key(name) {
+			// 'asdf $b' => "b" is the last name in the string, dont start parsing string
-				s.error('@ must be used before keywords (e.g. `@type string`)')
+			// at the next ', skip it
 			if s.is_inside_string {
 				if next_char == s.quote {
 					s.is_inter_end = true
 					s.is_inter_start = false
 					s.is_inside_string = false
 				}
 			}
 			// end of `$expr`
 			// allow `'$a.b'` and `'$a.c()'`
 			if s.is_inter_start && next_char != `.` && next_char != `(` {
 				s.is_inter_end = true
 				s.is_inter_start = false
 			}
 			if s.pos == 0 && next_char == ` ` {
 				// If a single letter name at the start of the file, increment
 				// Otherwise the scanner would be stuck at s.pos = 0
 				s.pos++
 			}
 			return s.new_token(.name, name, name.len)
 		} else if c.is_digit() || (c == `.` && nextc.is_digit()) {
 			// `123`, `.123`
 			if !s.is_inside_string {
 				// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
 				mut start_pos := s.pos
 				for start_pos < s.text.len && s.text[start_pos] == `0` {
 					start_pos++
 				}
 				mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
 				// for 0b, 0o, 0x the heading zero shouldn't be jumped
 				if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) {
 					prefix_zero_num--
 				}
 				s.pos += prefix_zero_num // jump these zeros
 			}
 			num := s.ident_number()
 			return s.new_token(.number, num, num.len)
 		}
-		/*
+		// Handle `'$fn()'`
-		case `\r`:
+		if c == `)` && s.is_inter_start {
 			next_char := s.look_ahead(1)
 			if next_char != `.` {
 				s.is_inter_end = true
 				s.is_inter_start = false
 				if next_char == s.quote {
 					s.is_inside_string = false
 				}
 				return s.new_token(.rpar, '', 1)
 			}
 		}
 		// all other tokens
 		match c {
 			`+` {
 				if nextc == `+` {
 					s.pos++
 					return s.new_token(.inc, '', 2)
 				} else if nextc == `=` {
 					s.pos++
 					return s.new_token(.plus_assign, '', 2)
 				}
 				return s.new_token(.plus, '', 1)
 			}
 			`-` {
 				if nextc == `-` {
 					s.pos++
 					return s.new_token(.dec, '', 2)
 				} else if nextc == `=` {
 					s.pos++
 					return s.new_token(.minus_assign, '', 2)
 				}
 				return s.new_token(.minus, '', 1)
 			}
 			`*` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.mult_assign, '', 2)
 				}
 				return s.new_token(.mul, '', 1)
 			}
 			`^` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.xor_assign, '', 2)
 				}
 				return s.new_token(.xor, '', 1)
 			}
 			`%` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.mod_assign, '', 2)
 				}
 				return s.new_token(.mod, '', 1)
 			}
 			`?` {
 				return s.new_token(.question, '', 1)
 			}
 			single_quote, double_quote {
 				ident_string := s.ident_string()
 				return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 			}
 			`\`` {
 				// ` // apostrophe balance comment. do not remove
 				ident_char := s.ident_char()
 				return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
 			}
 			`(` {
 				// TODO `$if vet {` for performance
 				if s.pref.is_vet && s.text[s.pos + 1] == ` ` {
 					println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`')
 				}
 				return s.new_token(.lpar, '', 1)
 			}
 			`)` {
 				// TODO `$if vet {` for performance
 				if s.pref.is_vet && s.text[s.pos - 1] == ` ` {
 					println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`')
 				}
 				return s.new_token(.rpar, '', 1)
 			}
 			`[` {
 				return s.new_token(.lsbr, '', 1)
 			}
 			`]` {
 				return s.new_token(.rsbr, '', 1)
 			}
 			`{` {
 				// Skip { in `${` in strings
 				if s.is_inside_string {
 					continue
 				}
 				return s.new_token(.lcbr, '', 1)
 			}
 			`$` {
 				if s.is_inside_string {
 					return s.new_token(.str_dollar, '', 1)
 				} else {
 					return s.new_token(.dollar, '', 1)
 				}
 			}
 			`}` {
 				// s = `hello $name !`
 				// s = `hello ${name} !`
 				if s.is_inside_string {
 					s.pos++
 					if s.text[s.pos] == s.quote {
 						s.is_inside_string = false
 						return s.new_token(.string, '', 1)
 					}
 					ident_string := s.ident_string()
 					return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 				} else {
 					return s.new_token(.rcbr, '', 1)
 				}
 			}
 			`&` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.and_assign, '', 2)
 				}
 				afternextc := s.look_ahead(2)
 				if nextc == `&` && afternextc.is_space() {
 					s.pos++
 					return s.new_token(.and, '', 2)
 				}
 				return s.new_token(.amp, '', 1)
 			}
 			`|` {
 				if nextc == `|` {
 					s.pos++
 					return s.new_token(.logical_or, '', 2)
 				}
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.or_assign, '', 2)
 				}
 				return s.new_token(.pipe, '', 1)
 			}
 			`,` {
 				return s.new_token(.comma, '', 1)
 			}
 			`@` {
 				s.pos++
 				name := s.ident_name()
 				if s.is_fmt {
 					return s.new_token(.name, '@' + name, name.len + 1)
 				}
 				// @FN => will be substituted with the name of the current V function
 				// @MOD => will be substituted with the name of the current V module
 				// @STRUCT => will be substituted with the name of the current V struct
 				// @VEXE => will be substituted with the path to the V compiler
 				// @FILE => will be substituted with the path of the V source file
 				// @LINE => will be substituted with the V line number where it appears (as a string).
 				// @COLUMN => will be substituted with the column where it appears (as a string).
 				// @VHASH  => will be substituted with the shortened commit hash of the V compiler (as a string).
 				// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
 				// This allows things like this:
 				// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
 				// ... which is useful while debugging/tracing
 				if name == 'FN' {
 					return s.new_token(.string, s.fn_name, 3)
 				}
 				if name == 'MOD' {
 					return s.new_token(.string, s.mod_name, 4)
 				}
 				if name == 'STRUCT' {
 					return s.new_token(.string, s.struct_name, 7)
 				}
 				if name == 'VEXE' {
 					vexe := pref.vexe_path()
 					return s.new_token(.string, util.cescaped_path(vexe), 5)
 				}
 				if name == 'FILE' {
 					fpath := os.real_path(s.file_path)
 					return s.new_token(.string, util.cescaped_path(fpath), 5)
 				}
 				if name == 'LINE' {
 					return s.new_token(.string, (s.line_nr + 1).str(), 5)
 				}
 				if name == 'COLUMN' {
 					return s.new_token(.string, s.current_column().str(), 7)
 				}
 				if name == 'VHASH' {
 					return s.new_token(.string, util.vhash(), 6)
 				}
 				if name == 'VMOD_FILE' {
 					if s.vmod_file_content.len == 0 {
 						mcache := vmod.get_cache()
 						vmod_file_location := mcache.get_by_file(s.file_path)
 						if vmod_file_location.vmod_file.len == 0 {
 							s.error('@VMOD_FILE can be used only in projects, that have v.mod file')
 						}
 						vmod_content := os.read_file(vmod_file_location.vmod_file) or {
 							''
 						}
 						$if windows {
 							s.vmod_file_content = vmod_content.replace('\r\n', '\n')
 						} $else {
 							s.vmod_file_content = vmod_content
 						}
 					}
 					return s.new_token(.string, s.vmod_file_content, 10)
 				}
 				if !token.is_key(name) {
 					s.error('@ must be used before keywords (e.g. `@type string`)')
 				}
 				return s.new_token(.name, name, name.len)
 			}
 			/*
 			case `\r`:
 		if nextc == `\n` {
 			s.pos++
 			s.last_nl_pos = s.pos
@ -925,184 +932,186 @@ fn (mut s Scanner) text_scan() token.Token {
 		s.last_nl_pos = s.pos
 		return s.new_token(.nl, '')
 	 }
-		*/
+			*/
-		`.` {
+			`.` {
-			if nextc == `.` {
+				if nextc == `.` {
 				s.pos++
 				if s.text[s.pos + 1] == `.` {
 					s.pos++
-					return s.new_token(.ellipsis, '', 3)
+					if s.text[s.pos + 1] == `.` {
 						s.pos++
 						return s.new_token(.ellipsis, '', 3)
 					}
 					return s.new_token(.dotdot, '', 2)
 				}
-				return s.new_token(.dotdot, '', 2)
+				return s.new_token(.dot, '', 1)
 			}
-			return s.new_token(.dot, '', 1)
+			`#` {
 		}
 		`#` {
 			start := s.pos + 1
 			s.ignore_line()
 			if nextc == `!` {
 				// treat shebang line (#!) as a comment
 				s.line_comment = s.text[start + 1..s.pos].trim_space()
 				// s.fgenln('// shebang line "$s.line_comment"')
 				return s.scan()
 			}
 			hash := s.text[start..s.pos].trim_space()
 			return s.new_token(.hash, hash, hash.len)
 		}
 		`>` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.ge, '', 2)
 			} else if nextc == `>` {
 				if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 					s.pos += 2
 					return s.new_token(.right_shift_assign, '', 3)
 				}
 				s.pos++
 				return s.new_token(.right_shift, '', 2)
 			} else {
 				return s.new_token(.gt, '', 1)
 			}
 		}
 		0xE2 {
 			if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
 				// case `≠`:
 				s.pos += 2
 				return s.new_token(.ne, '', 3)
 			} else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
 				s.pos += 2
 				return s.new_token(.le, '', 3)
 			} else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
 				s.pos += 2
 				return s.new_token(.ge, '', 3)
 			}
 		}
 		`<` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.le, '', 2)
 			} else if nextc == `<` {
 				if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 					s.pos += 2
 					return s.new_token(.left_shift_assign, '', 3)
 				}
 				s.pos++
 				return s.new_token(.left_shift, '', 2)
 			} else {
 				return s.new_token(.lt, '', 1)
 			}
 		}
 		`=` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.eq, '', 2)
 			} else if nextc == `>` {
 				s.pos++
 				return s.new_token(.arrow, '', 2)
 			} else {
 				return s.new_token(.assign, '', 1)
 			}
 		}
 		`:` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.decl_assign, '', 2)
 			} else {
 				return s.new_token(.colon, '', 1)
 			}
 		}
 		`;` {
 			return s.new_token(.semicolon, '', 1)
 		}
 		`!` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.ne, '', 2)
 			} else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() {
 				s.pos += 2
 				return s.new_token(.not_in, '', 3)
 			} else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() {
 				s.pos += 2
 				return s.new_token(.not_is, '', 3)
 			} else {
 				return s.new_token(.not, '', 1)
 			}
 		}
 		`~` {
 			return s.new_token(.bit_not, '', 1)
 		}
 		`/` {
 			if nextc == `=` {
 				s.pos++
 				return s.new_token(.div_assign, '', 2)
 			}
 			if nextc == `/` {
 				start := s.pos + 1
 				s.ignore_line()
-				s.line_comment = s.text[start + 1..s.pos]
+				if nextc == `!` {
-				mut comment := s.line_comment.trim_space()
+					// treat shebang line (#!) as a comment
-				s.pos--
+					s.line_comment = s.text[start + 1..s.pos].trim_space()
-				// fix line_nr, \n was read, and the comment is marked
+					// s.fgenln('// shebang line "$s.line_comment"')
-				// on the next line
+					continue
-				s.line_nr--
+				}
-				if s.should_parse_comment() {
+				hash := s.text[start..s.pos].trim_space()
-					// Find out if this comment is on its own line (for vfmt)
+				return s.new_token(.hash, hash, hash.len)
-					mut is_separate_line_comment := true
+			}
-					for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- {
+			`>` {
-						if s.text[j] !in [`\t`, ` `] {
+				if nextc == `=` {
-							is_separate_line_comment = false
+					s.pos++
 					return s.new_token(.ge, '', 2)
 				} else if nextc == `>` {
 					if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 						s.pos += 2
 						return s.new_token(.right_shift_assign, '', 3)
 					}
 					s.pos++
 					return s.new_token(.right_shift, '', 2)
 				} else {
 					return s.new_token(.gt, '', 1)
 				}
 			}
 			0xE2 {
 				if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
 					// case `≠`:
 					s.pos += 2
 					return s.new_token(.ne, '', 3)
 				} else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
 					s.pos += 2
 					return s.new_token(.le, '', 3)
 				} else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
 					s.pos += 2
 					return s.new_token(.ge, '', 3)
 				}
 			}
 			`<` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.le, '', 2)
 				} else if nextc == `<` {
 					if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
 						s.pos += 2
 						return s.new_token(.left_shift_assign, '', 3)
 					}
 					s.pos++
 					return s.new_token(.left_shift, '', 2)
 				} else {
 					return s.new_token(.lt, '', 1)
 				}
 			}
 			`=` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.eq, '', 2)
 				} else if nextc == `>` {
 					s.pos++
 					return s.new_token(.arrow, '', 2)
 				} else {
 					return s.new_token(.assign, '', 1)
 				}
 			}
 			`:` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.decl_assign, '', 2)
 				} else {
 					return s.new_token(.colon, '', 1)
 				}
 			}
 			`;` {
 				return s.new_token(.semicolon, '', 1)
 			}
 			`!` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.ne, '', 2)
 				} else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() {
 					s.pos += 2
 					return s.new_token(.not_in, '', 3)
 				} else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() {
 					s.pos += 2
 					return s.new_token(.not_is, '', 3)
 				} else {
 					return s.new_token(.not, '', 1)
 				}
 			}
 			`~` {
 				return s.new_token(.bit_not, '', 1)
 			}
 			`/` {
 				if nextc == `=` {
 					s.pos++
 					return s.new_token(.div_assign, '', 2)
 				}
 				if nextc == `/` {
 					start := s.pos + 1
 					s.ignore_line()
 					s.line_comment = s.text[start + 1..s.pos]
 					mut comment := s.line_comment.trim_space()
 					s.pos--
 					// fix line_nr, \n was read, and the comment is marked
 					// on the next line
 					s.line_nr--
 					if s.should_parse_comment() {
 						// Find out if this comment is on its own line (for vfmt)
 						mut is_separate_line_comment := true
 						for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- {
 							if s.text[j] !in [`\t`, ` `] {
 								is_separate_line_comment = false
 							}
 						}
 						if is_separate_line_comment {
 							comment = '|' + comment
 						}
 						return s.new_token(.comment, comment, comment.len + 2)
 					}
 					// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
 					// Skip the comment (return the next token)
 					continue
 				}
 				// Multiline comments
 				if nextc == `*` {
 					start := s.pos + 2
 					mut nest_count := 1
 					// Skip comment
 					for nest_count > 0 {
 						s.pos++
 						if s.pos >= s.text.len {
 							s.line_nr--
 							s.error('comment not terminated')
 						}
 						if s.text[s.pos] == `\n` {
 							s.inc_line_number()
 							continue
 						}
 						if s.expect('/*', s.pos) {
 							nest_count++
 							continue
 						}
 						if s.expect('*/', s.pos) {
 							nest_count--
 						}
 					}
 					if is_separate_line_comment {
 						comment = '|' + comment
 					}
 					return s.new_token(.comment, comment, comment.len + 2)
 				}
 				// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
 				// Skip the comment (return the next token)
 				return s.scan()
 			}
 			// Multiline comments
 			if nextc == `*` {
 				start := s.pos + 2
 				mut nest_count := 1
 				// Skip comment
 				for nest_count > 0 {
 					s.pos++
-					if s.pos >= s.text.len {
+					if s.should_parse_comment() {
-						s.line_nr--
+						comment := s.text[start..(s.pos - 1)].trim_space()
-						s.error('comment not terminated')
+						return s.new_token(.comment, comment, comment.len + 4)
 					}
 					if s.text[s.pos] == `\n` {
 						s.inc_line_number()
 						continue
 					}
 					if s.expect('/*', s.pos) {
 						nest_count++
 						continue
 					}
 					if s.expect('*/', s.pos) {
 						nest_count--
 					}
 					// Skip if not in fmt mode
 					continue
 				}
-				s.pos++
+				return s.new_token(.div, '', 1)
 				if s.should_parse_comment() {
 					comment := s.text[start..(s.pos - 1)].trim_space()
 					return s.new_token(.comment, comment, comment.len + 4)
 				}
 				// Skip if not in fmt mode
 				return s.scan()
 			}
-			return s.new_token(.div, '', 1)
+			else {}
 		}
-		else {}
+		$if windows {
-	}
+			if c == `\0` {
-	$if windows {
+				return s.end_of_file()
-		if c == `\0` {
+			}
 			return s.end_of_file()
 		}
 		s.error('invalid character `$c.str()`')
 		break
 	}
 	s.error('invalid character `$c.str()`')
 	return s.end_of_file()
 }