toml: fix scanner escape collecting in literal strings (#12022)

pull/12030/head
Larpon 2021-09-30 15:04:21 +02:00 committed by GitHub
parent d4b3c65c45
commit 8705db5844
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 12 deletions

1
.gitattributes vendored
View File

@ -2,3 +2,4 @@
*.vv linguist-language=V text=auto eol=lf
*.bat text=auto eol=crlf
Dockerfile.* linguist-language=Dockerfile
*.toml text eol=lf

View File

@ -484,22 +484,33 @@ fn (mut s Scanner) extract_multiline_string() ?string {
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
c := s.at()
mut lit := c.ascii_str()
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
return lit, 4
} else if s.peek(1) == quote {
if (!is_multiline && s.peek(2) == `\n`)
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string')
is_literal_string := quote == `'`
if !is_literal_string {
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
return lit, 4
} else if s.peek(1) == quote {
if (!is_multiline && s.peek(2) == `\n`)
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string')
return '', 0
}
lit += quote.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
return lit, 1
}
}
if is_literal_string {
if s.peek(1) == quote {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore escape `$lit${byte(s.peek(1)).ascii_str()}` in literal string')
return '', 0
}
lit += quote.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
return lit, 1
}
lit += byte(s.peek(1)).ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
return lit, 1
}

View File

@ -65,3 +65,22 @@ fn test_multiline_strings() {
value = toml_doc.value('mismatch2')
assert value.string() == 'aaa' + '"""' + 'bbb'
}
fn test_literal_strings() {
toml_file :=
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.toml'
toml_doc := toml.parse(toml_file) or { panic(err) }
assert toml_doc.value('lit1').string() == r'\' // '\'
assert toml_doc.value('lit2').string() == r'\\' // '\\'
assert toml_doc.value('lit3').string() == r'\tricky\' // '\tricky\'
// NOTE to Windows users: git is set to use Unix EOLs for all TOML files (*.toml) in the repo.
// See `.gitattributes` in the project root for the rule in action.
// These lines would look like this on Windows:
// assert toml_doc.value('ml_lit1').string() == '\r\n\\'
assert toml_doc.value('ml_lit1').string() == '\n\\'
assert toml_doc.value('ml_lit2').string() == '\\\n\\'
assert toml_doc.value('ml_lit3').string() == '\\\ntricky\\\n'
}

View File

@ -13,3 +13,15 @@ two_space = """ ""two quotes"" """
mismatch1 = """aaa'''bbb"""
mismatch2 = '''aaa"""bbb'''
lit1 = '\'
lit2 = '\\'
lit3 = '\tricky\'
ml_lit1 = '''
\'''
ml_lit2 = '''\
\'''
ml_lit3 = '''\
tricky\
'''