From 8705db58443ebb310272305d7f39e3a6b0f23c00 Mon Sep 17 00:00:00 2001 From: Larpon Date: Thu, 30 Sep 2021 15:04:21 +0200 Subject: [PATCH] toml: fix scanner escape collecting in literal strings (#12022) --- .gitattributes | 1 + vlib/toml/scanner/scanner.v | 35 ++++++++++++++-------- vlib/toml/tests/strings_test.v | 19 ++++++++++++ vlib/toml/tests/testdata/strings_test.toml | 12 ++++++++ 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/.gitattributes b/.gitattributes index 6d8d6dc22c..11510b0cbc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ *.vv linguist-language=V text=auto eol=lf *.bat text=auto eol=crlf Dockerfile.* linguist-language=Dockerfile +*.toml text eol=lf diff --git a/vlib/toml/scanner/scanner.v b/vlib/toml/scanner/scanner.v index a596014d32..c0494c1950 100644 --- a/vlib/toml/scanner/scanner.v +++ b/vlib/toml/scanner/scanner.v @@ -484,22 +484,33 @@ fn (mut s Scanner) extract_multiline_string() ?string { fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) { c := s.at() mut lit := c.ascii_str() - if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit() - && byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() { - lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str() - util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`') - return lit, 4 - } else if s.peek(1) == quote { - if (!is_multiline && s.peek(2) == `\n`) - || (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) { - util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string') + is_literal_string := quote == `'` + if !is_literal_string { + if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit() + && byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() { + lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str() + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`') + return lit, 4 + } else if s.peek(1) == quote { + if (!is_multiline && s.peek(2) == `\n`) + || (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string') + return '', 0 + } + lit += quote.ascii_str() + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`') + return lit, 1 + } + } + if is_literal_string { + if s.peek(1) == quote { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore escape `$lit${byte(s.peek(1)).ascii_str()}` in literal string') return '', 0 } - lit += quote.ascii_str() - util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`') - return lit, 1 } + lit += byte(s.peek(1)).ascii_str() + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`') return lit, 1 } diff --git a/vlib/toml/tests/strings_test.v b/vlib/toml/tests/strings_test.v index c1be2abb05..e6e0ab0d2c 100644 --- a/vlib/toml/tests/strings_test.v +++ b/vlib/toml/tests/strings_test.v @@ -65,3 +65,22 @@ fn test_multiline_strings() { value = toml_doc.value('mismatch2') assert value.string() == 'aaa' + '"""' + 'bbb' } + +fn test_literal_strings() { + toml_file := + os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + + '.toml' + toml_doc := toml.parse(toml_file) or { panic(err) } + + assert toml_doc.value('lit1').string() == r'\' // '\' + assert toml_doc.value('lit2').string() == r'\\' // '\\' + assert toml_doc.value('lit3').string() == r'\tricky\' // '\tricky\' + + // NOTE to Windows users: git is set to use Unix EOLs for all TOML files (*.toml) in the repo. + // See `.gitattributes` in the project root for the rule in action. + // These lines would look like this on Windows: + // assert toml_doc.value('ml_lit1').string() == '\r\n\\' + assert toml_doc.value('ml_lit1').string() == '\n\\' + assert toml_doc.value('ml_lit2').string() == '\\\n\\' + assert toml_doc.value('ml_lit3').string() == '\\\ntricky\\\n' +} diff --git a/vlib/toml/tests/testdata/strings_test.toml b/vlib/toml/tests/testdata/strings_test.toml index e138e2fa04..953bee9c1c 100644 --- a/vlib/toml/tests/testdata/strings_test.toml +++ b/vlib/toml/tests/testdata/strings_test.toml @@ -13,3 +13,15 @@ two_space = """ ""two quotes"" """ mismatch1 = """aaa'''bbb""" mismatch2 = '''aaa"""bbb''' + +lit1 = '\' +lit2 = '\\' +lit3 = '\tricky\' + +ml_lit1 = ''' +\''' +ml_lit2 = '''\ +\''' +ml_lit3 = '''\ +tricky\ +'''