scanner: parse multiple comments and long strings without a stackoverflow
							parent
							
								
									a2cb01e16a
								
							
						
					
					
						commit
						022cc72740
					
				|  | @ -614,307 +614,314 @@ fn (s Scanner) look_ahead(n int) byte { | |||
| } | ||||
| 
 | ||||
| fn (mut s Scanner) text_scan() token.Token { | ||||
| 	// if s.comments_mode == .parse_comments {
 | ||||
| 	// println('\nscan()')
 | ||||
| 	// }
 | ||||
| 	// if s.line_comment != '' {
 | ||||
| 	// s.fgenln('// LC "$s.line_comment"')
 | ||||
| 	// s.line_comment = ''
 | ||||
| 	// }
 | ||||
| 	if s.is_started { | ||||
| 		s.pos++ | ||||
| 	} | ||||
| 	s.is_started = true | ||||
| 	if s.pos >= s.text.len { | ||||
| 		return s.end_of_file() | ||||
| 	} | ||||
| 	if !s.is_inside_string { | ||||
| 		s.skip_whitespace() | ||||
| 	} | ||||
| 	// End of $var, start next string
 | ||||
| 	if s.is_inter_end { | ||||
| 		if s.text[s.pos] == s.quote { | ||||
| 			s.is_inter_end = false | ||||
| 			return s.new_token(.string, '', 1) | ||||
| 		} | ||||
| 		s.is_inter_end = false | ||||
| 		ident_string := s.ident_string() | ||||
| 		return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 | ||||
| 	} | ||||
| 	s.skip_whitespace() | ||||
| 	// end of file
 | ||||
| 	if s.pos >= s.text.len { | ||||
| 		return s.end_of_file() | ||||
| 	} | ||||
| 	// handle each char
 | ||||
| 	c := s.text[s.pos] | ||||
| 	nextc := s.look_ahead(1) | ||||
| 	// name or keyword
 | ||||
| 	if util.is_name_char(c) { | ||||
| 		name := s.ident_name() | ||||
| 		// tmp hack to detect . in ${}
 | ||||
| 		// Check if not .eof to prevent panic
 | ||||
| 		next_char := s.look_ahead(1) | ||||
| 		kind := token.keywords[name] | ||||
| 		if kind != .unknown { | ||||
| 			if kind == .key_fn { | ||||
| 				s.struct_name = s.ident_struct_name() | ||||
| 				s.fn_name = s.ident_fn_name() | ||||
| 			} else if kind == .key_module { | ||||
| 				s.mod_name = s.ident_mod_name() | ||||
| 			} | ||||
| 			return s.new_token(kind, name, name.len) | ||||
| 		} | ||||
| 		// 'asdf $b' => "b" is the last name in the string, dont start parsing string
 | ||||
| 		// at the next ', skip it
 | ||||
| 		if s.is_inside_string { | ||||
| 			if next_char == s.quote { | ||||
| 				s.is_inter_end = true | ||||
| 				s.is_inter_start = false | ||||
| 				s.is_inside_string = false | ||||
| 			} | ||||
| 		} | ||||
| 		// end of `$expr`
 | ||||
| 		// allow `'$a.b'` and `'$a.c()'`
 | ||||
| 		if s.is_inter_start && next_char != `.` && next_char != `(` { | ||||
| 			s.is_inter_end = true | ||||
| 			s.is_inter_start = false | ||||
| 		} | ||||
| 		if s.pos == 0 && next_char == ` ` { | ||||
| 			// If a single letter name at the start of the file, increment
 | ||||
| 			// Otherwise the scanner would be stuck at s.pos = 0
 | ||||
| 	// The for loop here is so that instead of doing
 | ||||
| 	// `return s.scan()` (which will use a new call stack frame),
 | ||||
| 	// text_scan can just do continue, keeping
 | ||||
| 	// memory & stack usage low.
 | ||||
| 	// That optimization mostly matters for long sections
 | ||||
| 	// of comments and string literals.
 | ||||
| 	for { | ||||
| 		// if s.comments_mode == .parse_comments {
 | ||||
| 		// println('\nscan()')
 | ||||
| 		// }
 | ||||
| 		// if s.line_comment != '' {
 | ||||
| 		// s.fgenln('// LC "$s.line_comment"')
 | ||||
| 		// s.line_comment = ''
 | ||||
| 		// }
 | ||||
| 		if s.is_started { | ||||
| 			s.pos++ | ||||
| 		} | ||||
| 		return s.new_token(.name, name, name.len) | ||||
| 	} else if c.is_digit() || (c == `.` && nextc.is_digit()) { | ||||
| 		// `123`, `.123`
 | ||||
| 		s.is_started = true | ||||
| 		if s.pos >= s.text.len { | ||||
| 			return s.end_of_file() | ||||
| 		} | ||||
| 		if !s.is_inside_string { | ||||
| 			// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
 | ||||
| 			mut start_pos := s.pos | ||||
| 			for start_pos < s.text.len && s.text[start_pos] == `0` { | ||||
| 				start_pos++ | ||||
| 			} | ||||
| 			mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
 | ||||
| 			// for 0b, 0o, 0x the heading zero shouldn't be jumped
 | ||||
| 			if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) { | ||||
| 				prefix_zero_num-- | ||||
| 			} | ||||
| 			s.pos += prefix_zero_num // jump these zeros
 | ||||
| 			s.skip_whitespace() | ||||
| 		} | ||||
| 		num := s.ident_number() | ||||
| 		return s.new_token(.number, num, num.len) | ||||
| 	} | ||||
| 	// Handle `'$fn()'`
 | ||||
| 	if c == `)` && s.is_inter_start { | ||||
| 		next_char := s.look_ahead(1) | ||||
| 		if next_char != `.` { | ||||
| 			s.is_inter_end = true | ||||
| 			s.is_inter_start = false | ||||
| 			if next_char == s.quote { | ||||
| 				s.is_inside_string = false | ||||
| 		// End of $var, start next string
 | ||||
| 		if s.is_inter_end { | ||||
| 			if s.text[s.pos] == s.quote { | ||||
| 				s.is_inter_end = false | ||||
| 				return s.new_token(.string, '', 1) | ||||
| 			} | ||||
| 			return s.new_token(.rpar, '', 1) | ||||
| 		} | ||||
| 	} | ||||
| 	// all other tokens
 | ||||
| 	match c { | ||||
| 		`+` { | ||||
| 			if nextc == `+` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.inc, '', 2) | ||||
| 			} else if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.plus_assign, '', 2) | ||||
| 			} | ||||
| 			return s.new_token(.plus, '', 1) | ||||
| 		} | ||||
| 		`-` { | ||||
| 			if nextc == `-` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.dec, '', 2) | ||||
| 			} else if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.minus_assign, '', 2) | ||||
| 			} | ||||
| 			return s.new_token(.minus, '', 1) | ||||
| 		} | ||||
| 		`*` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.mult_assign, '', 2) | ||||
| 			} | ||||
| 			return s.new_token(.mul, '', 1) | ||||
| 		} | ||||
| 		`^` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.xor_assign, '', 2) | ||||
| 			} | ||||
| 			return s.new_token(.xor, '', 1) | ||||
| 		} | ||||
| 		`%` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.mod_assign, '', 2) | ||||
| 			} | ||||
| 			return s.new_token(.mod, '', 1) | ||||
| 		} | ||||
| 		`?` { | ||||
| 			return s.new_token(.question, '', 1) | ||||
| 		} | ||||
| 		single_quote, double_quote { | ||||
| 			s.is_inter_end = false | ||||
| 			ident_string := s.ident_string() | ||||
| 			return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 | ||||
| 		} | ||||
| 		`\`` { | ||||
| 			// ` // apostrophe balance comment. do not remove
 | ||||
| 			ident_char := s.ident_char() | ||||
| 			return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
 | ||||
| 		s.skip_whitespace() | ||||
| 		// end of file
 | ||||
| 		if s.pos >= s.text.len { | ||||
| 			return s.end_of_file() | ||||
| 		} | ||||
| 		`(` { | ||||
| 			// TODO `$if vet {` for performance
 | ||||
| 			if s.pref.is_vet && s.text[s.pos + 1] == ` ` { | ||||
| 				println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`') | ||||
| 			} | ||||
| 			return s.new_token(.lpar, '', 1) | ||||
| 		} | ||||
| 		`)` { | ||||
| 			// TODO `$if vet {` for performance
 | ||||
| 			if s.pref.is_vet && s.text[s.pos - 1] == ` ` { | ||||
| 				println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`') | ||||
| 			} | ||||
| 			return s.new_token(.rpar, '', 1) | ||||
| 		} | ||||
| 		`[` { | ||||
| 			return s.new_token(.lsbr, '', 1) | ||||
| 		} | ||||
| 		`]` { | ||||
| 			return s.new_token(.rsbr, '', 1) | ||||
| 		} | ||||
| 		`{` { | ||||
| 			// Skip { in `${` in strings
 | ||||
| 			if s.is_inside_string { | ||||
| 				return s.scan() | ||||
| 			} | ||||
| 			return s.new_token(.lcbr, '', 1) | ||||
| 		} | ||||
| 		`$` { | ||||
| 			if s.is_inside_string { | ||||
| 				return s.new_token(.str_dollar, '', 1) | ||||
| 			} else { | ||||
| 				return s.new_token(.dollar, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		`}` { | ||||
| 			// s = `hello $name !`
 | ||||
| 			// s = `hello ${name} !`
 | ||||
| 			if s.is_inside_string { | ||||
| 				s.pos++ | ||||
| 				if s.text[s.pos] == s.quote { | ||||
| 					s.is_inside_string = false | ||||
| 					return s.new_token(.string, '', 1) | ||||
| 				} | ||||
| 				ident_string := s.ident_string() | ||||
| 				return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 | ||||
| 			} else { | ||||
| 				return s.new_token(.rcbr, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		`&` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.and_assign, '', 2) | ||||
| 			} | ||||
| 			afternextc := s.look_ahead(2) | ||||
| 			if nextc == `&` && afternextc.is_space() { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.and, '', 2) | ||||
| 			} | ||||
| 			return s.new_token(.amp, '', 1) | ||||
| 		} | ||||
| 		`|` { | ||||
| 			if nextc == `|` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.logical_or, '', 2) | ||||
| 			} | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.or_assign, '', 2) | ||||
| 			} | ||||
| 			return s.new_token(.pipe, '', 1) | ||||
| 		} | ||||
| 		`,` { | ||||
| 			return s.new_token(.comma, '', 1) | ||||
| 		} | ||||
| 		`@` { | ||||
| 			s.pos++ | ||||
| 		// handle each char
 | ||||
| 		c := s.text[s.pos] | ||||
| 		nextc := s.look_ahead(1) | ||||
| 		// name or keyword
 | ||||
| 		if util.is_name_char(c) { | ||||
| 			name := s.ident_name() | ||||
| 			if s.is_fmt { | ||||
| 				return s.new_token(.name, '@' + name, name.len + 1) | ||||
| 			} | ||||
| 			// @FN => will be substituted with the name of the current V function
 | ||||
| 			// @MOD => will be substituted with the name of the current V module
 | ||||
| 			// @STRUCT => will be substituted with the name of the current V struct
 | ||||
| 			// @VEXE => will be substituted with the path to the V compiler
 | ||||
| 			// @FILE => will be substituted with the path of the V source file
 | ||||
| 			// @LINE => will be substituted with the V line number where it appears (as a string).
 | ||||
| 			// @COLUMN => will be substituted with the column where it appears (as a string).
 | ||||
| 			// @VHASH  => will be substituted with the shortened commit hash of the V compiler (as a string).
 | ||||
| 			// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
 | ||||
| 			// This allows things like this:
 | ||||
| 			// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
 | ||||
| 			// ... which is useful while debugging/tracing
 | ||||
| 			if name == 'FN' { | ||||
| 				return s.new_token(.string, s.fn_name, 3) | ||||
| 			} | ||||
| 			if name == 'MOD' { | ||||
| 				return s.new_token(.string, s.mod_name, 4) | ||||
| 			} | ||||
| 			if name == 'STRUCT' { | ||||
| 				return s.new_token(.string, s.struct_name, 7) | ||||
| 			} | ||||
| 			if name == 'VEXE' { | ||||
| 				vexe := pref.vexe_path() | ||||
| 				return s.new_token(.string, util.cescaped_path(vexe), 5) | ||||
| 			} | ||||
| 			if name == 'FILE' { | ||||
| 				fpath := os.real_path(s.file_path) | ||||
| 				return s.new_token(.string, util.cescaped_path(fpath), 5) | ||||
| 			} | ||||
| 			if name == 'LINE' { | ||||
| 				return s.new_token(.string, (s.line_nr + 1).str(), 5) | ||||
| 			} | ||||
| 			if name == 'COLUMN' { | ||||
| 				return s.new_token(.string, s.current_column().str(), 7) | ||||
| 			} | ||||
| 			if name == 'VHASH' { | ||||
| 				return s.new_token(.string, util.vhash(), 6) | ||||
| 			} | ||||
| 			if name == 'VMOD_FILE' { | ||||
| 				if s.vmod_file_content.len == 0 { | ||||
| 					mcache := vmod.get_cache() | ||||
| 					vmod_file_location := mcache.get_by_file(s.file_path) | ||||
| 					if vmod_file_location.vmod_file.len == 0 { | ||||
| 						s.error('@VMOD_FILE can be used only in projects, that have v.mod file') | ||||
| 					} | ||||
| 					vmod_content := os.read_file(vmod_file_location.vmod_file) or { | ||||
| 						'' | ||||
| 					} | ||||
| 					$if windows { | ||||
| 						s.vmod_file_content = vmod_content.replace('\r\n', '\n') | ||||
| 					} $else { | ||||
| 						s.vmod_file_content = vmod_content | ||||
| 					} | ||||
| 			// tmp hack to detect . in ${}
 | ||||
| 			// Check if not .eof to prevent panic
 | ||||
| 			next_char := s.look_ahead(1) | ||||
| 			kind := token.keywords[name] | ||||
| 			if kind != .unknown { | ||||
| 				if kind == .key_fn { | ||||
| 					s.struct_name = s.ident_struct_name() | ||||
| 					s.fn_name = s.ident_fn_name() | ||||
| 				} else if kind == .key_module { | ||||
| 					s.mod_name = s.ident_mod_name() | ||||
| 				} | ||||
| 				return s.new_token(.string, s.vmod_file_content, 10) | ||||
| 				return s.new_token(kind, name, name.len) | ||||
| 			} | ||||
| 			if !token.is_key(name) { | ||||
| 				s.error('@ must be used before keywords (e.g. `@type string`)') | ||||
| 			// 'asdf $b' => "b" is the last name in the string, dont start parsing string
 | ||||
| 			// at the next ', skip it
 | ||||
| 			if s.is_inside_string { | ||||
| 				if next_char == s.quote { | ||||
| 					s.is_inter_end = true | ||||
| 					s.is_inter_start = false | ||||
| 					s.is_inside_string = false | ||||
| 				} | ||||
| 			} | ||||
| 			// end of `$expr`
 | ||||
| 			// allow `'$a.b'` and `'$a.c()'`
 | ||||
| 			if s.is_inter_start && next_char != `.` && next_char != `(` { | ||||
| 				s.is_inter_end = true | ||||
| 				s.is_inter_start = false | ||||
| 			} | ||||
| 			if s.pos == 0 && next_char == ` ` { | ||||
| 				// If a single letter name at the start of the file, increment
 | ||||
| 				// Otherwise the scanner would be stuck at s.pos = 0
 | ||||
| 				s.pos++ | ||||
| 			} | ||||
| 			return s.new_token(.name, name, name.len) | ||||
| 		} else if c.is_digit() || (c == `.` && nextc.is_digit()) { | ||||
| 			// `123`, `.123`
 | ||||
| 			if !s.is_inside_string { | ||||
| 				// In C ints with `0` prefix are octal (in V they're decimal), so discarding heading zeros is needed.
 | ||||
| 				mut start_pos := s.pos | ||||
| 				for start_pos < s.text.len && s.text[start_pos] == `0` { | ||||
| 					start_pos++ | ||||
| 				} | ||||
| 				mut prefix_zero_num := start_pos - s.pos // how many prefix zeros should be jumped
 | ||||
| 				// for 0b, 0o, 0x the heading zero shouldn't be jumped
 | ||||
| 				if start_pos == s.text.len || (c == `0` && !s.text[start_pos].is_digit()) { | ||||
| 					prefix_zero_num-- | ||||
| 				} | ||||
| 				s.pos += prefix_zero_num // jump these zeros
 | ||||
| 			} | ||||
| 			num := s.ident_number() | ||||
| 			return s.new_token(.number, num, num.len) | ||||
| 		} | ||||
| 		/* | ||||
| 		case `\r`: | ||||
| 		// Handle `'$fn()'`
 | ||||
| 		if c == `)` && s.is_inter_start { | ||||
| 			next_char := s.look_ahead(1) | ||||
| 			if next_char != `.` { | ||||
| 				s.is_inter_end = true | ||||
| 				s.is_inter_start = false | ||||
| 				if next_char == s.quote { | ||||
| 					s.is_inside_string = false | ||||
| 				} | ||||
| 				return s.new_token(.rpar, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		// all other tokens
 | ||||
| 		match c { | ||||
| 			`+` { | ||||
| 				if nextc == `+` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.inc, '', 2) | ||||
| 				} else if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.plus_assign, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.plus, '', 1) | ||||
| 			} | ||||
| 			`-` { | ||||
| 				if nextc == `-` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.dec, '', 2) | ||||
| 				} else if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.minus_assign, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.minus, '', 1) | ||||
| 			} | ||||
| 			`*` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.mult_assign, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.mul, '', 1) | ||||
| 			} | ||||
| 			`^` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.xor_assign, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.xor, '', 1) | ||||
| 			} | ||||
| 			`%` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.mod_assign, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.mod, '', 1) | ||||
| 			} | ||||
| 			`?` { | ||||
| 				return s.new_token(.question, '', 1) | ||||
| 			} | ||||
| 			single_quote, double_quote { | ||||
| 				ident_string := s.ident_string() | ||||
| 				return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 | ||||
| 			} | ||||
| 			`\`` { | ||||
| 				// ` // apostrophe balance comment. do not remove
 | ||||
| 				ident_char := s.ident_char() | ||||
| 				return s.new_token(.chartoken, ident_char, ident_char.len + 2) // + two quotes
 | ||||
| 			} | ||||
| 			`(` { | ||||
| 				// TODO `$if vet {` for performance
 | ||||
| 				if s.pref.is_vet && s.text[s.pos + 1] == ` ` { | ||||
| 					println('$s.file_path:$s.line_nr: Looks like you are adding a space after `(`') | ||||
| 				} | ||||
| 				return s.new_token(.lpar, '', 1) | ||||
| 			} | ||||
| 			`)` { | ||||
| 				// TODO `$if vet {` for performance
 | ||||
| 				if s.pref.is_vet && s.text[s.pos - 1] == ` ` { | ||||
| 					println('$s.file_path:$s.line_nr: Looks like you are adding a space before `)`') | ||||
| 				} | ||||
| 				return s.new_token(.rpar, '', 1) | ||||
| 			} | ||||
| 			`[` { | ||||
| 				return s.new_token(.lsbr, '', 1) | ||||
| 			} | ||||
| 			`]` { | ||||
| 				return s.new_token(.rsbr, '', 1) | ||||
| 			} | ||||
| 			`{` { | ||||
| 				// Skip { in `${` in strings
 | ||||
| 				if s.is_inside_string { | ||||
| 					continue | ||||
| 				} | ||||
| 				return s.new_token(.lcbr, '', 1) | ||||
| 			} | ||||
| 			`$` { | ||||
| 				if s.is_inside_string { | ||||
| 					return s.new_token(.str_dollar, '', 1) | ||||
| 				} else { | ||||
| 					return s.new_token(.dollar, '', 1) | ||||
| 				} | ||||
| 			} | ||||
| 			`}` { | ||||
| 				// s = `hello $name !`
 | ||||
| 				// s = `hello ${name} !`
 | ||||
| 				if s.is_inside_string { | ||||
| 					s.pos++ | ||||
| 					if s.text[s.pos] == s.quote { | ||||
| 						s.is_inside_string = false | ||||
| 						return s.new_token(.string, '', 1) | ||||
| 					} | ||||
| 					ident_string := s.ident_string() | ||||
| 					return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
 | ||||
| 				} else { | ||||
| 					return s.new_token(.rcbr, '', 1) | ||||
| 				} | ||||
| 			} | ||||
| 			`&` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.and_assign, '', 2) | ||||
| 				} | ||||
| 				afternextc := s.look_ahead(2) | ||||
| 				if nextc == `&` && afternextc.is_space() { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.and, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.amp, '', 1) | ||||
| 			} | ||||
| 			`|` { | ||||
| 				if nextc == `|` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.logical_or, '', 2) | ||||
| 				} | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.or_assign, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.pipe, '', 1) | ||||
| 			} | ||||
| 			`,` { | ||||
| 				return s.new_token(.comma, '', 1) | ||||
| 			} | ||||
| 			`@` { | ||||
| 				s.pos++ | ||||
| 				name := s.ident_name() | ||||
| 				if s.is_fmt { | ||||
| 					return s.new_token(.name, '@' + name, name.len + 1) | ||||
| 				} | ||||
| 				// @FN => will be substituted with the name of the current V function
 | ||||
| 				// @MOD => will be substituted with the name of the current V module
 | ||||
| 				// @STRUCT => will be substituted with the name of the current V struct
 | ||||
| 				// @VEXE => will be substituted with the path to the V compiler
 | ||||
| 				// @FILE => will be substituted with the path of the V source file
 | ||||
| 				// @LINE => will be substituted with the V line number where it appears (as a string).
 | ||||
| 				// @COLUMN => will be substituted with the column where it appears (as a string).
 | ||||
| 				// @VHASH  => will be substituted with the shortened commit hash of the V compiler (as a string).
 | ||||
| 				// @VMOD_FILE => will be substituted with the contents of the nearest v.mod file (as a string).
 | ||||
| 				// This allows things like this:
 | ||||
| 				// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @MOD + '.' + @FN)
 | ||||
| 				// ... which is useful while debugging/tracing
 | ||||
| 				if name == 'FN' { | ||||
| 					return s.new_token(.string, s.fn_name, 3) | ||||
| 				} | ||||
| 				if name == 'MOD' { | ||||
| 					return s.new_token(.string, s.mod_name, 4) | ||||
| 				} | ||||
| 				if name == 'STRUCT' { | ||||
| 					return s.new_token(.string, s.struct_name, 7) | ||||
| 				} | ||||
| 				if name == 'VEXE' { | ||||
| 					vexe := pref.vexe_path() | ||||
| 					return s.new_token(.string, util.cescaped_path(vexe), 5) | ||||
| 				} | ||||
| 				if name == 'FILE' { | ||||
| 					fpath := os.real_path(s.file_path) | ||||
| 					return s.new_token(.string, util.cescaped_path(fpath), 5) | ||||
| 				} | ||||
| 				if name == 'LINE' { | ||||
| 					return s.new_token(.string, (s.line_nr + 1).str(), 5) | ||||
| 				} | ||||
| 				if name == 'COLUMN' { | ||||
| 					return s.new_token(.string, s.current_column().str(), 7) | ||||
| 				} | ||||
| 				if name == 'VHASH' { | ||||
| 					return s.new_token(.string, util.vhash(), 6) | ||||
| 				} | ||||
| 				if name == 'VMOD_FILE' { | ||||
| 					if s.vmod_file_content.len == 0 { | ||||
| 						mcache := vmod.get_cache() | ||||
| 						vmod_file_location := mcache.get_by_file(s.file_path) | ||||
| 						if vmod_file_location.vmod_file.len == 0 { | ||||
| 							s.error('@VMOD_FILE can be used only in projects, that have v.mod file') | ||||
| 						} | ||||
| 						vmod_content := os.read_file(vmod_file_location.vmod_file) or { | ||||
| 							'' | ||||
| 						} | ||||
| 						$if windows { | ||||
| 							s.vmod_file_content = vmod_content.replace('\r\n', '\n') | ||||
| 						} $else { | ||||
| 							s.vmod_file_content = vmod_content | ||||
| 						} | ||||
| 					} | ||||
| 					return s.new_token(.string, s.vmod_file_content, 10) | ||||
| 				} | ||||
| 				if !token.is_key(name) { | ||||
| 					s.error('@ must be used before keywords (e.g. `@type string`)') | ||||
| 				} | ||||
| 				return s.new_token(.name, name, name.len) | ||||
| 			} | ||||
| 			/* | ||||
| 			case `\r`: | ||||
| 		if nextc == `\n` { | ||||
| 			s.pos++ | ||||
| 			s.last_nl_pos = s.pos | ||||
|  | @ -925,184 +932,186 @@ fn (mut s Scanner) text_scan() token.Token { | |||
| 		s.last_nl_pos = s.pos | ||||
| 		return s.new_token(.nl, '') | ||||
| 	 } | ||||
| 		*/ | ||||
| 		`.` { | ||||
| 			if nextc == `.` { | ||||
| 				s.pos++ | ||||
| 				if s.text[s.pos + 1] == `.` { | ||||
| 			*/ | ||||
| 			`.` { | ||||
| 				if nextc == `.` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.ellipsis, '', 3) | ||||
| 					if s.text[s.pos + 1] == `.` { | ||||
| 						s.pos++ | ||||
| 						return s.new_token(.ellipsis, '', 3) | ||||
| 					} | ||||
| 					return s.new_token(.dotdot, '', 2) | ||||
| 				} | ||||
| 				return s.new_token(.dotdot, '', 2) | ||||
| 				return s.new_token(.dot, '', 1) | ||||
| 			} | ||||
| 			return s.new_token(.dot, '', 1) | ||||
| 		} | ||||
| 		`#` { | ||||
| 			start := s.pos + 1 | ||||
| 			s.ignore_line() | ||||
| 			if nextc == `!` { | ||||
| 				// treat shebang line (#!) as a comment
 | ||||
| 				s.line_comment = s.text[start + 1..s.pos].trim_space() | ||||
| 				// s.fgenln('// shebang line "$s.line_comment"')
 | ||||
| 				return s.scan() | ||||
| 			} | ||||
| 			hash := s.text[start..s.pos].trim_space() | ||||
| 			return s.new_token(.hash, hash, hash.len) | ||||
| 		} | ||||
| 		`>` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.ge, '', 2) | ||||
| 			} else if nextc == `>` { | ||||
| 				if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { | ||||
| 					s.pos += 2 | ||||
| 					return s.new_token(.right_shift_assign, '', 3) | ||||
| 				} | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.right_shift, '', 2) | ||||
| 			} else { | ||||
| 				return s.new_token(.gt, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		0xE2 { | ||||
| 			if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 { | ||||
| 				// case `≠`:
 | ||||
| 				s.pos += 2 | ||||
| 				return s.new_token(.ne, '', 3) | ||||
| 			} else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD { | ||||
| 				s.pos += 2 | ||||
| 				return s.new_token(.le, '', 3) | ||||
| 			} else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE { | ||||
| 				s.pos += 2 | ||||
| 				return s.new_token(.ge, '', 3) | ||||
| 			} | ||||
| 		} | ||||
| 		`<` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.le, '', 2) | ||||
| 			} else if nextc == `<` { | ||||
| 				if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { | ||||
| 					s.pos += 2 | ||||
| 					return s.new_token(.left_shift_assign, '', 3) | ||||
| 				} | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.left_shift, '', 2) | ||||
| 			} else { | ||||
| 				return s.new_token(.lt, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		`=` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.eq, '', 2) | ||||
| 			} else if nextc == `>` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.arrow, '', 2) | ||||
| 			} else { | ||||
| 				return s.new_token(.assign, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		`:` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.decl_assign, '', 2) | ||||
| 			} else { | ||||
| 				return s.new_token(.colon, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		`;` { | ||||
| 			return s.new_token(.semicolon, '', 1) | ||||
| 		} | ||||
| 		`!` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.ne, '', 2) | ||||
| 			} else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() { | ||||
| 				s.pos += 2 | ||||
| 				return s.new_token(.not_in, '', 3) | ||||
| 			} else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() { | ||||
| 				s.pos += 2 | ||||
| 				return s.new_token(.not_is, '', 3) | ||||
| 			} else { | ||||
| 				return s.new_token(.not, '', 1) | ||||
| 			} | ||||
| 		} | ||||
| 		`~` { | ||||
| 			return s.new_token(.bit_not, '', 1) | ||||
| 		} | ||||
| 		`/` { | ||||
| 			if nextc == `=` { | ||||
| 				s.pos++ | ||||
| 				return s.new_token(.div_assign, '', 2) | ||||
| 			} | ||||
| 			if nextc == `/` { | ||||
| 			`#` { | ||||
| 				start := s.pos + 1 | ||||
| 				s.ignore_line() | ||||
| 				s.line_comment = s.text[start + 1..s.pos] | ||||
| 				mut comment := s.line_comment.trim_space() | ||||
| 				s.pos-- | ||||
| 				// fix line_nr, \n was read, and the comment is marked
 | ||||
| 				// on the next line
 | ||||
| 				s.line_nr-- | ||||
| 				if s.should_parse_comment() { | ||||
| 					// Find out if this comment is on its own line (for vfmt)
 | ||||
| 					mut is_separate_line_comment := true | ||||
| 					for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- { | ||||
| 						if s.text[j] !in [`\t`, ` `] { | ||||
| 							is_separate_line_comment = false | ||||
| 				if nextc == `!` { | ||||
| 					// treat shebang line (#!) as a comment
 | ||||
| 					s.line_comment = s.text[start + 1..s.pos].trim_space() | ||||
| 					// s.fgenln('// shebang line "$s.line_comment"')
 | ||||
| 					continue | ||||
| 				} | ||||
| 				hash := s.text[start..s.pos].trim_space() | ||||
| 				return s.new_token(.hash, hash, hash.len) | ||||
| 			} | ||||
| 			`>` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.ge, '', 2) | ||||
| 				} else if nextc == `>` { | ||||
| 					if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { | ||||
| 						s.pos += 2 | ||||
| 						return s.new_token(.right_shift_assign, '', 3) | ||||
| 					} | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.right_shift, '', 2) | ||||
| 				} else { | ||||
| 					return s.new_token(.gt, '', 1) | ||||
| 				} | ||||
| 			} | ||||
| 			0xE2 { | ||||
| 				if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 { | ||||
| 					// case `≠`:
 | ||||
| 					s.pos += 2 | ||||
| 					return s.new_token(.ne, '', 3) | ||||
| 				} else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD { | ||||
| 					s.pos += 2 | ||||
| 					return s.new_token(.le, '', 3) | ||||
| 				} else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE { | ||||
| 					s.pos += 2 | ||||
| 					return s.new_token(.ge, '', 3) | ||||
| 				} | ||||
| 			} | ||||
| 			`<` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.le, '', 2) | ||||
| 				} else if nextc == `<` { | ||||
| 					if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` { | ||||
| 						s.pos += 2 | ||||
| 						return s.new_token(.left_shift_assign, '', 3) | ||||
| 					} | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.left_shift, '', 2) | ||||
| 				} else { | ||||
| 					return s.new_token(.lt, '', 1) | ||||
| 				} | ||||
| 			} | ||||
| 			`=` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.eq, '', 2) | ||||
| 				} else if nextc == `>` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.arrow, '', 2) | ||||
| 				} else { | ||||
| 					return s.new_token(.assign, '', 1) | ||||
| 				} | ||||
| 			} | ||||
| 			`:` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.decl_assign, '', 2) | ||||
| 				} else { | ||||
| 					return s.new_token(.colon, '', 1) | ||||
| 				} | ||||
| 			} | ||||
| 			`;` { | ||||
| 				return s.new_token(.semicolon, '', 1) | ||||
| 			} | ||||
| 			`!` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.ne, '', 2) | ||||
| 				} else if nextc == `i` && s.text[s.pos + 2] == `n` && s.text[s.pos + 3].is_space() { | ||||
| 					s.pos += 2 | ||||
| 					return s.new_token(.not_in, '', 3) | ||||
| 				} else if nextc == `i` && s.text[s.pos + 2] == `s` && s.text[s.pos + 3].is_space() { | ||||
| 					s.pos += 2 | ||||
| 					return s.new_token(.not_is, '', 3) | ||||
| 				} else { | ||||
| 					return s.new_token(.not, '', 1) | ||||
| 				} | ||||
| 			} | ||||
| 			`~` { | ||||
| 				return s.new_token(.bit_not, '', 1) | ||||
| 			} | ||||
| 			`/` { | ||||
| 				if nextc == `=` { | ||||
| 					s.pos++ | ||||
| 					return s.new_token(.div_assign, '', 2) | ||||
| 				} | ||||
| 				if nextc == `/` { | ||||
| 					start := s.pos + 1 | ||||
| 					s.ignore_line() | ||||
| 					s.line_comment = s.text[start + 1..s.pos] | ||||
| 					mut comment := s.line_comment.trim_space() | ||||
| 					s.pos-- | ||||
| 					// fix line_nr, \n was read, and the comment is marked
 | ||||
| 					// on the next line
 | ||||
| 					s.line_nr-- | ||||
| 					if s.should_parse_comment() { | ||||
| 						// Find out if this comment is on its own line (for vfmt)
 | ||||
| 						mut is_separate_line_comment := true | ||||
| 						for j := start - 2; j >= 0 && s.text[j] != `\n`; j-- { | ||||
| 							if s.text[j] !in [`\t`, ` `] { | ||||
| 								is_separate_line_comment = false | ||||
| 							} | ||||
| 						} | ||||
| 						if is_separate_line_comment { | ||||
| 							comment = '|' + comment | ||||
| 						} | ||||
| 						return s.new_token(.comment, comment, comment.len + 2) | ||||
| 					} | ||||
| 					// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
 | ||||
| 					// Skip the comment (return the next token)
 | ||||
| 					continue | ||||
| 				} | ||||
| 				// Multiline comments
 | ||||
| 				if nextc == `*` { | ||||
| 					start := s.pos + 2 | ||||
| 					mut nest_count := 1 | ||||
| 					// Skip comment
 | ||||
| 					for nest_count > 0 { | ||||
| 						s.pos++ | ||||
| 						if s.pos >= s.text.len { | ||||
| 							s.line_nr-- | ||||
| 							s.error('comment not terminated') | ||||
| 						} | ||||
| 						if s.text[s.pos] == `\n` { | ||||
| 							s.inc_line_number() | ||||
| 							continue | ||||
| 						} | ||||
| 						if s.expect('/*', s.pos) { | ||||
| 							nest_count++ | ||||
| 							continue | ||||
| 						} | ||||
| 						if s.expect('*/', s.pos) { | ||||
| 							nest_count-- | ||||
| 						} | ||||
| 					} | ||||
| 					if is_separate_line_comment { | ||||
| 						comment = '|' + comment | ||||
| 					} | ||||
| 					return s.new_token(.comment, comment, comment.len + 2) | ||||
| 				} | ||||
| 				// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
 | ||||
| 				// Skip the comment (return the next token)
 | ||||
| 				return s.scan() | ||||
| 			} | ||||
| 			// Multiline comments
 | ||||
| 			if nextc == `*` { | ||||
| 				start := s.pos + 2 | ||||
| 				mut nest_count := 1 | ||||
| 				// Skip comment
 | ||||
| 				for nest_count > 0 { | ||||
| 					s.pos++ | ||||
| 					if s.pos >= s.text.len { | ||||
| 						s.line_nr-- | ||||
| 						s.error('comment not terminated') | ||||
| 					} | ||||
| 					if s.text[s.pos] == `\n` { | ||||
| 						s.inc_line_number() | ||||
| 						continue | ||||
| 					} | ||||
| 					if s.expect('/*', s.pos) { | ||||
| 						nest_count++ | ||||
| 						continue | ||||
| 					} | ||||
| 					if s.expect('*/', s.pos) { | ||||
| 						nest_count-- | ||||
| 					if s.should_parse_comment() { | ||||
| 						comment := s.text[start..(s.pos - 1)].trim_space() | ||||
| 						return s.new_token(.comment, comment, comment.len + 4) | ||||
| 					} | ||||
| 					// Skip if not in fmt mode
 | ||||
| 					continue | ||||
| 				} | ||||
| 				s.pos++ | ||||
| 				if s.should_parse_comment() { | ||||
| 					comment := s.text[start..(s.pos - 1)].trim_space() | ||||
| 					return s.new_token(.comment, comment, comment.len + 4) | ||||
| 				} | ||||
| 				// Skip if not in fmt mode
 | ||||
| 				return s.scan() | ||||
| 				return s.new_token(.div, '', 1) | ||||
| 			} | ||||
| 			return s.new_token(.div, '', 1) | ||||
| 			else {} | ||||
| 		} | ||||
| 		else {} | ||||
| 	} | ||||
| 	$if windows { | ||||
| 		if c == `\0` { | ||||
| 			return s.end_of_file() | ||||
| 		$if windows { | ||||
| 			if c == `\0` { | ||||
| 				return s.end_of_file() | ||||
| 			} | ||||
| 		} | ||||
| 		s.error('invalid character `$c.str()`') | ||||
| 		break | ||||
| 	} | ||||
| 	s.error('invalid character `$c.str()`') | ||||
| 	return s.end_of_file() | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue