toml: fix scanner escape collecting in literal strings (#12022)
parent
d4b3c65c45
commit
8705db5844
vlib/toml
scanner
tests
testdata
|
@ -2,3 +2,4 @@
|
||||||
*.vv linguist-language=V text=auto eol=lf
|
*.vv linguist-language=V text=auto eol=lf
|
||||||
*.bat text=auto eol=crlf
|
*.bat text=auto eol=crlf
|
||||||
Dockerfile.* linguist-language=Dockerfile
|
Dockerfile.* linguist-language=Dockerfile
|
||||||
|
*.toml text eol=lf
|
||||||
|
|
|
@ -484,22 +484,33 @@ fn (mut s Scanner) extract_multiline_string() ?string {
|
||||||
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
|
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
|
||||||
c := s.at()
|
c := s.at()
|
||||||
mut lit := c.ascii_str()
|
mut lit := c.ascii_str()
|
||||||
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
|
is_literal_string := quote == `'`
|
||||||
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
|
if !is_literal_string {
|
||||||
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
|
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
|
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
|
||||||
return lit, 4
|
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
|
||||||
} else if s.peek(1) == quote {
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
|
||||||
if (!is_multiline && s.peek(2) == `\n`)
|
return lit, 4
|
||||||
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
|
} else if s.peek(1) == quote {
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string')
|
if (!is_multiline && s.peek(2) == `\n`)
|
||||||
|
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
|
||||||
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string')
|
||||||
|
return '', 0
|
||||||
|
}
|
||||||
|
lit += quote.ascii_str()
|
||||||
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
||||||
|
return lit, 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if is_literal_string {
|
||||||
|
if s.peek(1) == quote {
|
||||||
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore escape `$lit${byte(s.peek(1)).ascii_str()}` in literal string')
|
||||||
return '', 0
|
return '', 0
|
||||||
}
|
}
|
||||||
lit += quote.ascii_str()
|
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
|
||||||
return lit, 1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lit += byte(s.peek(1)).ascii_str()
|
lit += byte(s.peek(1)).ascii_str()
|
||||||
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
||||||
return lit, 1
|
return lit, 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,3 +65,22 @@ fn test_multiline_strings() {
|
||||||
value = toml_doc.value('mismatch2')
|
value = toml_doc.value('mismatch2')
|
||||||
assert value.string() == 'aaa' + '"""' + 'bbb'
|
assert value.string() == 'aaa' + '"""' + 'bbb'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn test_literal_strings() {
|
||||||
|
toml_file :=
|
||||||
|
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
|
||||||
|
'.toml'
|
||||||
|
toml_doc := toml.parse(toml_file) or { panic(err) }
|
||||||
|
|
||||||
|
assert toml_doc.value('lit1').string() == r'\' // '\'
|
||||||
|
assert toml_doc.value('lit2').string() == r'\\' // '\\'
|
||||||
|
assert toml_doc.value('lit3').string() == r'\tricky\' // '\tricky\'
|
||||||
|
|
||||||
|
// NOTE to Windows users: git is set to use Unix EOLs for all TOML files (*.toml) in the repo.
|
||||||
|
// See `.gitattributes` in the project root for the rule in action.
|
||||||
|
// These lines would look like this on Windows:
|
||||||
|
// assert toml_doc.value('ml_lit1').string() == '\r\n\\'
|
||||||
|
assert toml_doc.value('ml_lit1').string() == '\n\\'
|
||||||
|
assert toml_doc.value('ml_lit2').string() == '\\\n\\'
|
||||||
|
assert toml_doc.value('ml_lit3').string() == '\\\ntricky\\\n'
|
||||||
|
}
|
||||||
|
|
|
@ -13,3 +13,15 @@ two_space = """ ""two quotes"" """
|
||||||
|
|
||||||
mismatch1 = """aaa'''bbb"""
|
mismatch1 = """aaa'''bbb"""
|
||||||
mismatch2 = '''aaa"""bbb'''
|
mismatch2 = '''aaa"""bbb'''
|
||||||
|
|
||||||
|
lit1 = '\'
|
||||||
|
lit2 = '\\'
|
||||||
|
lit3 = '\tricky\'
|
||||||
|
|
||||||
|
ml_lit1 = '''
|
||||||
|
\'''
|
||||||
|
ml_lit2 = '''\
|
||||||
|
\'''
|
||||||
|
ml_lit3 = '''\
|
||||||
|
tricky\
|
||||||
|
'''
|
||||||
|
|
Loading…
Reference in New Issue