toml: fix scanning of short unicode escapes (#12491)

pull/12495/head
Larpon 2021-11-17 16:24:40 +01:00 committed by GitHub
parent 3e1fb22a04
commit 7cdc906683
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 1 deletions

View File

@ -516,7 +516,7 @@ fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() { && byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str() lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
return lit, 4 return lit, 5
} else if s.peek(1) == quote { } else if s.peek(1) == quote {
if (!is_multiline && s.peek(2) == `\n`) if (!is_multiline && s.peek(2) == `\n`)
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) { || (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {

View File

@ -28,6 +28,8 @@ two
three three
four four
'''" '''"
toml_unicode_escapes = r'short = "\u03B4"
long = "\U000003B4"'
) )
fn test_multiline_strings() { fn test_multiline_strings() {
@ -66,6 +68,15 @@ fn test_multiline_strings() {
assert value.string() == 'aaa' + '"""' + 'bbb' assert value.string() == 'aaa' + '"""' + 'bbb'
} }
fn test_unicode_escapes() {
mut toml_doc := toml.parse(toml_unicode_escapes) or { panic(err) }
mut value := toml_doc.value('short')
assert value.string() == r'\u03B4'
value = toml_doc.value('long')
assert value.string() == r'\U000003B4'
}
fn test_literal_strings() { fn test_literal_strings() {
toml_file := toml_file :=
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +