toml: fix scanner escape collecting in literal strings (#12022)
parent
d4b3c65c45
commit
8705db5844
|
@ -2,3 +2,4 @@
|
|||
*.vv linguist-language=V text=auto eol=lf
|
||||
*.bat text=auto eol=crlf
|
||||
Dockerfile.* linguist-language=Dockerfile
|
||||
*.toml text eol=lf
|
||||
|
|
|
@ -484,22 +484,33 @@ fn (mut s Scanner) extract_multiline_string() ?string {
|
|||
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
|
||||
c := s.at()
|
||||
mut lit := c.ascii_str()
|
||||
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
|
||||
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
|
||||
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
|
||||
return lit, 4
|
||||
} else if s.peek(1) == quote {
|
||||
if (!is_multiline && s.peek(2) == `\n`)
|
||||
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string')
|
||||
is_literal_string := quote == `'`
|
||||
if !is_literal_string {
|
||||
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
|
||||
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
|
||||
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
|
||||
return lit, 4
|
||||
} else if s.peek(1) == quote {
|
||||
if (!is_multiline && s.peek(2) == `\n`)
|
||||
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string')
|
||||
return '', 0
|
||||
}
|
||||
lit += quote.ascii_str()
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
||||
return lit, 1
|
||||
}
|
||||
}
|
||||
if is_literal_string {
|
||||
if s.peek(1) == quote {
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore escape `$lit${byte(s.peek(1)).ascii_str()}` in literal string')
|
||||
return '', 0
|
||||
}
|
||||
lit += quote.ascii_str()
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
||||
return lit, 1
|
||||
}
|
||||
|
||||
lit += byte(s.peek(1)).ascii_str()
|
||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
||||
return lit, 1
|
||||
}
|
||||
|
||||
|
|
|
@ -65,3 +65,22 @@ fn test_multiline_strings() {
|
|||
value = toml_doc.value('mismatch2')
|
||||
assert value.string() == 'aaa' + '"""' + 'bbb'
|
||||
}
|
||||
|
||||
fn test_literal_strings() {
|
||||
toml_file :=
|
||||
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
|
||||
'.toml'
|
||||
toml_doc := toml.parse(toml_file) or { panic(err) }
|
||||
|
||||
assert toml_doc.value('lit1').string() == r'\' // '\'
|
||||
assert toml_doc.value('lit2').string() == r'\\' // '\\'
|
||||
assert toml_doc.value('lit3').string() == r'\tricky\' // '\tricky\'
|
||||
|
||||
// NOTE to Windows users: git is set to use Unix EOLs for all TOML files (*.toml) in the repo.
|
||||
// See `.gitattributes` in the project root for the rule in action.
|
||||
// These lines would look like this on Windows:
|
||||
// assert toml_doc.value('ml_lit1').string() == '\r\n\\'
|
||||
assert toml_doc.value('ml_lit1').string() == '\n\\'
|
||||
assert toml_doc.value('ml_lit2').string() == '\\\n\\'
|
||||
assert toml_doc.value('ml_lit3').string() == '\\\ntricky\\\n'
|
||||
}
|
||||
|
|
|
@ -13,3 +13,15 @@ two_space = """ ""two quotes"" """
|
|||
|
||||
mismatch1 = """aaa'''bbb"""
|
||||
mismatch2 = '''aaa"""bbb'''
|
||||
|
||||
lit1 = '\'
|
||||
lit2 = '\\'
|
||||
lit3 = '\tricky\'
|
||||
|
||||
ml_lit1 = '''
|
||||
\'''
|
||||
ml_lit2 = '''\
|
||||
\'''
|
||||
ml_lit3 = '''\
|
||||
tricky\
|
||||
'''
|
||||
|
|
Loading…
Reference in New Issue