toml: fix 7 escape tests (#12017)
parent
c2f535fee1
commit
e3d3727c0c
|
@ -9,6 +9,8 @@ import toml.ast.walker
|
||||||
import toml.token
|
import toml.token
|
||||||
import toml.scanner
|
import toml.scanner
|
||||||
|
|
||||||
|
pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`]
|
||||||
|
|
||||||
// Checker checks a tree of TOML `ast.Value`'s for common errors.
|
// Checker checks a tree of TOML `ast.Value`'s for common errors.
|
||||||
pub struct Checker {
|
pub struct Checker {
|
||||||
scanner &scanner.Scanner
|
scanner &scanner.Scanner
|
||||||
|
@ -172,12 +174,68 @@ fn (c Checker) check_boolean(b ast.Bool) ? {
|
||||||
' boolean values like "$lit" can only be `true` or `false` literals, not `$lit` in ...${c.excerpt(b.pos)}...')
|
' boolean values like "$lit" can only be `true` or `false` literals, not `$lit` in ...${c.excerpt(b.pos)}...')
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (c Checker) check_quoted(b ast.Quoted) ? {
|
fn (c Checker) check_quoted(q ast.Quoted) ? {
|
||||||
lit := b.text
|
lit := q.text
|
||||||
quote := b.quote.ascii_str()
|
quote := q.quote.ascii_str()
|
||||||
triple_quote := quote + quote + quote
|
triple_quote := quote + quote + quote
|
||||||
if b.is_multiline && lit.ends_with(triple_quote) {
|
if q.is_multiline && lit.ends_with(triple_quote) {
|
||||||
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
||||||
' string values like "$lit" is has unbalanced quote literals `b.quote` in ...${c.excerpt(b.pos)}...')
|
' string values like "$lit" is has unbalanced quote literals `q.quote` in ...${c.excerpt(q.pos)}...')
|
||||||
|
}
|
||||||
|
c.check_quoted_escapes(q) ?
|
||||||
|
}
|
||||||
|
|
||||||
|
// check_quoted_escapes returns an error for any disallowed escape sequences.
|
||||||
|
// Delimiters in TOML has significant meaning:
|
||||||
|
// '/''' delimits *literal* strings (WYSIWYG / What-you-see-is-what-you-get)
|
||||||
|
// "/""" delimits *basic* strings
|
||||||
|
// Allowed escapes in *basic* strings are:
|
||||||
|
// \b - backspace (U+0008)
|
||||||
|
// \t - tab (U+0009)
|
||||||
|
// \n - linefeed (U+000A)
|
||||||
|
// \f - form feed (U+000C)
|
||||||
|
// \r - carriage return (U+000D)
|
||||||
|
// \" - quote (U+0022)
|
||||||
|
// \\ - backslash (U+005C)
|
||||||
|
// \uXXXX - unicode (U+XXXX)
|
||||||
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||||
|
fn (c Checker) check_quoted_escapes(q ast.Quoted) ? {
|
||||||
|
// Setup a scanner in stack memory for easier navigation.
|
||||||
|
mut s := scanner.new_simple(q.text) ?
|
||||||
|
|
||||||
|
is_basic := q.quote == `\"`
|
||||||
|
for {
|
||||||
|
ch := s.next()
|
||||||
|
if ch == -1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
ch_byte := byte(ch)
|
||||||
|
if ch == `\\` {
|
||||||
|
next_ch := byte(s.at())
|
||||||
|
|
||||||
|
if next_ch == `\\` {
|
||||||
|
s.next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
escape := ch_byte.ascii_str() + next_ch.ascii_str()
|
||||||
|
if is_basic {
|
||||||
|
if q.is_multiline {
|
||||||
|
if next_ch == byte(32) && s.peek(1) == byte(92) {
|
||||||
|
st := s.state()
|
||||||
|
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
||||||
|
' can not escape whitespaces before escapes in multi-line strings (`\\ \\`) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...')
|
||||||
|
}
|
||||||
|
if next_ch in [`\t`, `\n`, ` `] {
|
||||||
|
s.next()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if next_ch !in checker.allowed_basic_escape_chars {
|
||||||
|
st := s.state()
|
||||||
|
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
||||||
|
' unknown basic string escape character `$next_ch.ascii_str()` in `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,16 @@ mut:
|
||||||
mode Mode // sub-mode of the scanner
|
mode Mode // sub-mode of the scanner
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// State is a read-only copy of the scanner's internal state.
|
||||||
|
// See also `Scanner.state()`.
|
||||||
|
pub struct State {
|
||||||
|
pub:
|
||||||
|
col int // current column number (x coordinate)
|
||||||
|
line_nr int = 1 // current line number (y coordinate)
|
||||||
|
pos int // current flat/index position in the `text` field
|
||||||
|
mode Mode // sub-mode of the scanner
|
||||||
|
}
|
||||||
|
|
||||||
enum Mode {
|
enum Mode {
|
||||||
normal
|
normal
|
||||||
inside_string
|
inside_string
|
||||||
|
@ -426,6 +436,8 @@ fn (mut s Scanner) extract_multiline_string() ?string {
|
||||||
}
|
}
|
||||||
|
|
||||||
c := s.at()
|
c := s.at()
|
||||||
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c (quote type: $quote/$quote.ascii_str())')
|
||||||
|
|
||||||
if c == `\n` {
|
if c == `\n` {
|
||||||
s.inc_line_number()
|
s.inc_line_number()
|
||||||
lit += c.ascii_str()
|
lit += c.ascii_str()
|
||||||
|
@ -443,8 +455,6 @@ fn (mut s Scanner) extract_multiline_string() ?string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c')
|
|
||||||
|
|
||||||
if c == quote {
|
if c == quote {
|
||||||
if s.peek(1) == quote && s.peek(2) == quote {
|
if s.peek(1) == quote && s.peek(2) == quote {
|
||||||
if s.peek(3) == -1 {
|
if s.peek(3) == -1 {
|
||||||
|
@ -469,14 +479,16 @@ fn (mut s Scanner) extract_multiline_string() ?string {
|
||||||
return lit
|
return lit
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle_escapes
|
// handle_escapes returns any escape character sequence.
|
||||||
|
// For escape sequence validation see `Checker.check_quoted_escapes`.
|
||||||
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
|
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
|
||||||
c := s.at()
|
c := s.at()
|
||||||
mut lit := c.ascii_str()
|
mut lit := c.ascii_str()
|
||||||
if s.peek(1) == byte(92) {
|
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
|
||||||
lit += lit
|
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
|
||||||
return lit, 1
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
|
||||||
|
return lit, 4
|
||||||
} else if s.peek(1) == quote {
|
} else if s.peek(1) == quote {
|
||||||
if (!is_multiline && s.peek(2) == `\n`)
|
if (!is_multiline && s.peek(2) == `\n`)
|
||||||
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
|
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
|
||||||
|
@ -486,13 +498,9 @@ fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
|
||||||
lit += quote.ascii_str()
|
lit += quote.ascii_str()
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
||||||
return lit, 1
|
return lit, 1
|
||||||
} else if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
|
|
||||||
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
|
|
||||||
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
|
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
|
|
||||||
return lit, 4
|
|
||||||
}
|
}
|
||||||
return '', 0
|
lit += byte(s.peek(1)).ascii_str()
|
||||||
|
return lit, 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract_number collects and returns a string containing
|
// extract_number collects and returns a string containing
|
||||||
|
@ -542,3 +550,13 @@ pub fn (s Scanner) excerpt(pos int, margin int) string {
|
||||||
end := if pos + margin < s.text.len { pos + margin } else { s.text.len }
|
end := if pos + margin < s.text.len { pos + margin } else { s.text.len }
|
||||||
return s.text[start..end].replace('\n', r'\n')
|
return s.text[start..end].replace('\n', r'\n')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// state returns a read-only view of the scanner's internal state.
|
||||||
|
pub fn (s Scanner) state() State {
|
||||||
|
return State{
|
||||||
|
col: s.col
|
||||||
|
line_nr: s.line_nr
|
||||||
|
pos: s.pos
|
||||||
|
mode: s.mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -15,19 +15,12 @@ const (
|
||||||
invalid_exceptions = [
|
invalid_exceptions = [
|
||||||
// String
|
// String
|
||||||
'string/basic-multiline-out-of-range-unicode-escape-1.toml',
|
'string/basic-multiline-out-of-range-unicode-escape-1.toml',
|
||||||
'string/basic-byte-escapes.toml',
|
|
||||||
'string/multiline-escape-space.toml',
|
|
||||||
'string/bad-codepoint.toml',
|
'string/bad-codepoint.toml',
|
||||||
'string/basic-multiline-out-of-range-unicode-escape-2.toml',
|
'string/basic-multiline-out-of-range-unicode-escape-2.toml',
|
||||||
'string/bad-slash-escape.toml',
|
|
||||||
'string/basic-out-of-range-unicode-escape-1.toml',
|
'string/basic-out-of-range-unicode-escape-1.toml',
|
||||||
'string/basic-out-of-range-unicode-escape-2.toml',
|
'string/basic-out-of-range-unicode-escape-2.toml',
|
||||||
'string/bad-uni-esc.toml',
|
'string/bad-uni-esc.toml',
|
||||||
'string/bad-escape.toml',
|
|
||||||
'string/basic-multiline-unknown-escape.toml',
|
|
||||||
'string/missing-quotes.toml',
|
'string/missing-quotes.toml',
|
||||||
'string/bad-byte-escape.toml',
|
|
||||||
'string/basic-unknown-escape.toml',
|
|
||||||
// Integer
|
// Integer
|
||||||
'integer/capital-bin.toml',
|
'integer/capital-bin.toml',
|
||||||
'integer/invalid-bin.toml',
|
'integer/invalid-bin.toml',
|
||||||
|
@ -155,6 +148,10 @@ fn test_burnt_sushi_tomltest() {
|
||||||
if relative !in invalid_exceptions {
|
if relative !in invalid_exceptions {
|
||||||
println('OK [$i/$invalid_test_files.len] "$invalid_test_file"...')
|
println('OK [$i/$invalid_test_files.len] "$invalid_test_file"...')
|
||||||
if toml_doc := toml.parse_file(invalid_test_file) {
|
if toml_doc := toml.parse_file(invalid_test_file) {
|
||||||
|
content_that_should_have_failed := os.read_file(invalid_test_file) or {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
println(' This TOML should have failed:\n${'-'.repeat(40)}\n$content_that_should_have_failed\n${'-'.repeat(40)}')
|
||||||
assert false
|
assert false
|
||||||
} else {
|
} else {
|
||||||
println(' $err.msg')
|
println(' $err.msg')
|
||||||
|
|
Loading…
Reference in New Issue