toml: fix scanner escape collecting in literal strings (#12022)
							parent
							
								
									d4b3c65c45
								
							
						
					
					
						commit
						8705db5844
					
				|  | @ -2,3 +2,4 @@ | |||
| *.vv linguist-language=V text=auto eol=lf | ||||
| *.bat text=auto eol=crlf | ||||
|  Dockerfile.* linguist-language=Dockerfile | ||||
| *.toml text eol=lf | ||||
|  |  | |||
|  | @ -484,22 +484,33 @@ fn (mut s Scanner) extract_multiline_string() ?string { | |||
| fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) { | ||||
| 	c := s.at() | ||||
| 	mut lit := c.ascii_str() | ||||
| 	if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit() | ||||
| 		&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() { | ||||
| 		lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
 | ||||
| 		util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`') | ||||
| 		return lit, 4 | ||||
| 	} else if s.peek(1) == quote { | ||||
| 		if (!is_multiline && s.peek(2) == `\n`) | ||||
| 			|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) { | ||||
| 			util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string') | ||||
| 	is_literal_string := quote == `'` | ||||
| 	if !is_literal_string { | ||||
| 		if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit() | ||||
| 			&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() { | ||||
| 			lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
 | ||||
| 			util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`') | ||||
| 			return lit, 4 | ||||
| 		} else if s.peek(1) == quote { | ||||
| 			if (!is_multiline && s.peek(2) == `\n`) | ||||
| 				|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) { | ||||
| 				util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string') | ||||
| 				return '', 0 | ||||
| 			} | ||||
| 			lit += quote.ascii_str() | ||||
| 			util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`') | ||||
| 			return lit, 1 | ||||
| 		} | ||||
| 	} | ||||
| 	if is_literal_string { | ||||
| 		if s.peek(1) == quote { | ||||
| 			util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore escape `$lit${byte(s.peek(1)).ascii_str()}` in literal string') | ||||
| 			return '', 0 | ||||
| 		} | ||||
| 		lit += quote.ascii_str() | ||||
| 		util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`') | ||||
| 		return lit, 1 | ||||
| 	} | ||||
| 
 | ||||
| 	lit += byte(s.peek(1)).ascii_str() | ||||
| 	util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`') | ||||
| 	return lit, 1 | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -65,3 +65,22 @@ fn test_multiline_strings() { | |||
| 	value = toml_doc.value('mismatch2') | ||||
| 	assert value.string() == 'aaa' + '"""' + 'bbb' | ||||
| } | ||||
| 
 | ||||
| fn test_literal_strings() { | ||||
| 	toml_file := | ||||
| 		os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + | ||||
| 		'.toml' | ||||
| 	toml_doc := toml.parse(toml_file) or { panic(err) } | ||||
| 
 | ||||
| 	assert toml_doc.value('lit1').string() == r'\' // '\'
 | ||||
| 	assert toml_doc.value('lit2').string() == r'\\' // '\\'
 | ||||
| 	assert toml_doc.value('lit3').string() == r'\tricky\' // '\tricky\'
 | ||||
| 
 | ||||
| 	// NOTE to Windows users: git is set to use Unix EOLs for all TOML files (*.toml) in the repo.
 | ||||
| 	// See `.gitattributes` in the project root for the rule in action.
 | ||||
| 	// These lines would look like this on Windows:
 | ||||
| 	// assert toml_doc.value('ml_lit1').string() == '\r\n\\'
 | ||||
| 	assert toml_doc.value('ml_lit1').string() == '\n\\' | ||||
| 	assert toml_doc.value('ml_lit2').string() == '\\\n\\' | ||||
| 	assert toml_doc.value('ml_lit3').string() == '\\\ntricky\\\n' | ||||
| } | ||||
|  |  | |||
|  | @ -13,3 +13,15 @@ two_space = """ ""two quotes"" """ | |||
| 
 | ||||
| mismatch1 = """aaa'''bbb""" | ||||
| mismatch2 = '''aaa"""bbb''' | ||||
| 
 | ||||
| lit1 = '\' | ||||
| lit2 = '\\' | ||||
| lit3 = '\tricky\' | ||||
| 
 | ||||
| ml_lit1 = ''' | ||||
| \''' | ||||
| ml_lit2 = '''\ | ||||
| \''' | ||||
| ml_lit3 = '''\ | ||||
| tricky\ | ||||
| ''' | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue