toml: upgrade the module to 100% BurntSushi test suite parsing compatibility (#12466)

pull/12473/head
Larpon 2021-11-15 14:36:14 +01:00 committed by GitHub
parent 78662c800c
commit cbdb270d2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 96 additions and 50 deletions

View File

@ -14,7 +14,7 @@ jobs:
timeout-minutes: 10 timeout-minutes: 10
env: env:
TOML_TESTS_PATH: vlib/toml/tests/testdata/burntsushi/toml-test TOML_TESTS_PATH: vlib/toml/tests/testdata/burntsushi/toml-test
TOML_TESTS_PINNED_COMMIT: 982482f TOML_TESTS_PINNED_COMMIT: 576db85
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2

View File

@ -20,7 +20,21 @@ pub fn (dk DottedKey) str() string {
return dk.join('.') return dk.join('.')
} }
pub fn (a []DottedKey) has(target DottedKey) bool { // starts_with returns true if the dotted key starts with the same key entries as `target`.
fn (dk DottedKey) starts_with(target DottedKey) bool {
if dk.len >= target.len {
for i := 0; i < target.len; i++ {
if dk[i] != target[i] {
return false
}
}
return true
}
return false
}
// has returns true if the array contains `target`.
fn (a []DottedKey) has(target DottedKey) bool {
for dk in a { for dk in a {
if dk == target { if dk == target {
return true return true
@ -225,6 +239,14 @@ fn (mut p Parser) expect(expected_token token.Kind) ? {
} }
} }
// check_explicitly_declared returns an error if `key` has been explicitly declared.
fn (p Parser) check_explicitly_declared(key DottedKey) ? {
if p.explicit_declared.len > 0 && p.explicit_declared.has(key) {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' key `$key.str()` is already explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."')
}
}
// find_table returns a reference to a map if found in the *root* table given a "dotted" key (`a.b.c`). // find_table returns a reference to a map if found in the *root* table given a "dotted" key (`a.b.c`).
// If some segments of the key does not exist in the root table find_table will // If some segments of the key does not exist in the root table find_table will
// allocate a new map for each segment. This behavior is needed because you can // allocate a new map for each segment. This behavior is needed because you can
@ -362,13 +384,31 @@ pub fn (mut p Parser) root_table() ? {
if peek_tok.kind == .period { if peek_tok.kind == .period {
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
dotkey := p.dotted_key() ? dotted_key := p.dotted_key() ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
p.check(.assign) ? p.check(.assign) ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
val := p.value() ? val := p.value() ?
sub_table, key := p.sub_table_key(dotkey) sub_table, key := p.sub_table_key(dotted_key)
// Check for "table injection":
// https://github.com/BurntSushi/toml-test/blob/576db8523df1b8705ef18c526b4a6ba9c271bbbc/tests/invalid/table/injection-1.toml
// https://github.com/BurntSushi/toml-test/blob/576db8523df1b8705ef18c526b4a6ba9c271bbbc/tests/invalid/table/injection-2.toml
// NOTE this is a *relatively* costly check. In general - and by specification,
// TOML documents are expected to be "small" so this shouldn't be a problem. Famous last words.
for explicit_key in p.explicit_declared {
if explicit_key.len == 1 || explicit_key == p.root_map_key {
continue
}
mut abs_dotted_key := DottedKey([]string{})
abs_dotted_key << p.root_map_key
abs_dotted_key << sub_table
if abs_dotted_key.starts_with(explicit_key) {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' key `$dotted_key` has already been explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."')
}
}
t := p.find_sub_table(sub_table) ? t := p.find_sub_table(sub_table) ?
unsafe { unsafe {
@ -379,6 +419,14 @@ pub fn (mut p Parser) root_table() ? {
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
key, val := p.key_value() ? key, val := p.key_value() ?
// Check and register explicitly declared arrays
if val is []ast.Value {
dotted_key := DottedKey([key.str()])
// Disallow re-declaring the key
p.check_explicitly_declared(dotted_key) ?
p.explicit_declared << dotted_key
}
t := p.find_table() ? t := p.find_table() ?
unsafe { unsafe {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val.to_json() in table ${ptr_str(t)}') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val.to_json() in table ${ptr_str(t)}')
@ -419,32 +467,36 @@ pub fn (mut p Parser) root_table() ? {
} else if peek_tok.kind == .period { } else if peek_tok.kind == .period {
// Parse `[d.e.f]` // Parse `[d.e.f]`
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
p.root_map_key = p.dotted_key() ? dotted_key := p.dotted_key() ?
// Disallow redeclaring the key // Disallow re-declaring the key
if p.explicit_declared.has(p.root_map_key) { p.check_explicitly_declared(dotted_key) ?
return error(@MOD + '.' + @STRUCT + '.' + @FN + p.explicit_declared << dotted_key
' key `$p.root_map_key` is already explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."')
}
p.explicit_declared << p.root_map_key
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$p.root_map_key` at "$p.tok.kind" "$p.tok.lit"') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$dotted_key` at "$p.tok.kind" "$p.tok.lit"')
p.root_map_key = dotted_key
p.expect(.rsbr) ? p.expect(.rsbr) ?
p.peek_for_correct_line_ending_or_fail() ? p.peek_for_correct_line_ending_or_fail() ?
} else { } else {
// Parse `[key]` // Parse `[key]`
key := p.key() ? key := p.key() ?
p.root_map_key = DottedKey([key.str()]) dotted_key := DottedKey([key.str()])
// Disallow redeclaring the key // Disallow re-declaring the key
if p.explicit_declared.has(p.root_map_key) { p.check_explicitly_declared(dotted_key) ?
p.explicit_declared << dotted_key
// Check for footgun re-declaration in this odd way:
// [[tbl]]
// [tbl]
if p.last_aot == dotted_key {
return error(@MOD + '.' + @STRUCT + '.' + @FN + return error(@MOD + '.' + @STRUCT + '.' + @FN +
' key `$p.root_map_key` is already explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') ' key `$dotted_key` has already been explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."')
} }
p.explicit_declared << p.root_map_key
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$p.root_map_key` at "$p.tok.kind" "$p.tok.lit"') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$dotted_key` at "$p.tok.kind" "$p.tok.lit"')
p.root_map_key = dotted_key
p.next() ? p.next() ?
p.expect(.rsbr) ? p.expect(.rsbr) ?
p.peek_for_correct_line_ending_or_fail() ? p.peek_for_correct_line_ending_or_fail() ?
@ -515,13 +567,13 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? {
if peek_tok.kind == .period { if peek_tok.kind == .period {
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
dotkey := p.dotted_key() ? dotted_key := p.dotted_key() ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
p.check(.assign) ? p.check(.assign) ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
val := p.value() ? val := p.value() ?
sub_table, key := p.sub_table_key(dotkey) sub_table, key := p.sub_table_key(dotted_key)
mut t := p.find_in_table(mut tbl, sub_table) ? mut t := p.find_in_table(mut tbl, sub_table) ?
unsafe { unsafe {
@ -574,6 +626,10 @@ pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ? {
dotted_key := DottedKey([key.str()]) dotted_key := DottedKey([key.str()])
dotted_key_str := dotted_key.str() dotted_key_str := dotted_key.str()
// Disallow re-declaring the key
p.check_explicitly_declared(dotted_key) ?
unsafe { unsafe {
if val := table[dotted_key_str] { if val := table[dotted_key_str] {
if val is []ast.Value { if val is []ast.Value {
@ -609,12 +665,12 @@ pub fn (mut p Parser) array_of_tables_contents() ?[]ast.Value {
match p.tok.kind { match p.tok.kind {
.bare, .quoted, .boolean, .number { .bare, .quoted, .boolean, .number {
if p.peek_tok.kind == .period { if p.peek_tok.kind == .period {
dotkey := p.dotted_key() ? dotted_key := p.dotted_key() ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
p.check(.assign) ? p.check(.assign) ?
val := p.value() ? val := p.value() ?
sub_table, key := p.sub_table_key(dotkey) sub_table, key := p.sub_table_key(dotted_key)
mut t := p.find_in_table(mut tbl, sub_table) ? mut t := p.find_in_table(mut tbl, sub_table) ?
unsafe { unsafe {
@ -671,13 +727,18 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? {
mut t_map := ast.Value(ast.Null{}) mut t_map := ast.Value(ast.Null{})
unsafe { unsafe {
// NOTE this is starting to get EVEN uglier. TOML is not at all simple at this point... // NOTE this is starting to get EVEN uglier. TOML is not *at all* simple at this point...
if first != p.last_aot { if first != p.last_aot {
// Implicit allocation // Implicit allocation
if p.last_aot.len == 0 { if p.last_aot.len == 0 {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'implicit allocation of array for dotted key `$dotted_key`.') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'implicit allocation of array for dotted key `$dotted_key`.')
table[first.str()] = []ast.Value{} table[first.str()] = []ast.Value{}
p.last_aot = first p.last_aot = first
// We register this implicit allocation as *explicit* to be able to catch
// special cases like:
// https://github.com/BurntSushi/toml-test/blob/576db852/tests/invalid/table/array-implicit.toml
p.explicit_declared << first
t_arr = &(table[p.last_aot.str()] as []ast.Value) t_arr = &(table[p.last_aot.str()] as []ast.Value)
t_arr << ast.Value(map[string]ast.Value{}) t_arr << ast.Value(map[string]ast.Value{})
p.last_aot_index = t_arr.len - 1 p.last_aot_index = t_arr.len - 1
@ -730,7 +791,7 @@ pub fn (mut p Parser) double_array_of_tables_contents(target_key DottedKey) ?[]a
// Peek forward as far as we can skipping over space formatting tokens. // Peek forward as far as we can skipping over space formatting tokens.
peek_tok, peeked_over = p.peek_over(1, parser.space_formatting) ? peek_tok, peeked_over = p.peek_over(1, parser.space_formatting) ?
// Peek for occurence of `[[` // Peek for occurrence of `[[`
if peek_tok.kind == .lsbr { if peek_tok.kind == .lsbr {
peek_tok, peeked_over = p.peek_over(peeked_over + 1, parser.space_formatting) ? peek_tok, peeked_over = p.peek_over(peeked_over + 1, parser.space_formatting) ?
if peek_tok.kind == .lsbr { if peek_tok.kind == .lsbr {
@ -743,15 +804,15 @@ pub fn (mut p Parser) double_array_of_tables_contents(target_key DottedKey) ?[]a
match p.tok.kind { match p.tok.kind {
.bare, .quoted, .boolean, .number { .bare, .quoted, .boolean, .number {
if p.peek_tok.kind == .period { if p.peek_tok.kind == .period {
mut dotkey := p.dotted_key() ? mut dotted_key := p.dotted_key() ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
p.check(.assign) ? p.check(.assign) ?
val := p.value() ? val := p.value() ?
if implicit_allocation_key.len > 0 { if implicit_allocation_key.len > 0 {
dotkey.insert(0, implicit_allocation_key) dotted_key.insert(0, implicit_allocation_key)
} }
sub_table, key := p.sub_table_key(dotkey) sub_table, key := p.sub_table_key(dotted_key)
mut t := p.find_in_table(mut tbl, sub_table) ? mut t := p.find_in_table(mut tbl, sub_table) ?
unsafe { unsafe {
@ -787,13 +848,13 @@ pub fn (mut p Parser) double_array_of_tables_contents(target_key DottedKey) ?[]a
if peek_tok.kind == .period { if peek_tok.kind == .period {
// Parse `[d.e.f]` // Parse `[d.e.f]`
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
dotkey := p.dotted_key() ? dotted_key := p.dotted_key() ?
implicit_allocation_key = dotkey implicit_allocation_key = dotted_key
if dotkey.len > 2 { if dotted_key.len > 2 {
implicit_allocation_key = dotkey[2..] implicit_allocation_key = dotted_key[2..]
} }
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'keys are: dotted `$dotkey`, target `$target_key`, implicit `$implicit_allocation_key` at "$p.tok.kind" "$p.tok.lit"') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'keys are: dotted `$dotted_key`, target `$target_key`, implicit `$implicit_allocation_key` at "$p.tok.kind" "$p.tok.lit"')
p.expect(.rsbr) ? p.expect(.rsbr) ?
p.peek_for_correct_line_ending_or_fail() ? p.peek_for_correct_line_ending_or_fail() ?
continue continue

View File

@ -6,20 +6,11 @@ import toml
// `cd vlib/toml/tests/testdata` // `cd vlib/toml/tests/testdata`
// `git clone --depth 1 https://github.com/BurntSushi/toml-test.git burntsushi/toml-test` // `git clone --depth 1 https://github.com/BurntSushi/toml-test.git burntsushi/toml-test`
// See also the CI toml tests // See also the CI toml tests
// TODO Goal: make parsing AND value retrieval of all of https://github.com/BurntSushi/toml-test/test/ pass // TODO Goal: make value retrieval of all of https://github.com/BurntSushi/toml-test/test/ pass
const ( const (
// Kept for easier handling of future updates to the tests // Kept for easier handling of future updates to the tests
valid_exceptions = []string{} valid_exceptions = []string{}
invalid_exceptions = [ invalid_exceptions = []string{}
// Table
'table/duplicate-table-array2.toml',
'table/array-implicit.toml',
'table/injection-2.toml',
'table/injection-1.toml',
'table/duplicate-table-array.toml',
// Array
'array/tables-1.toml',
]
) )
// test_burnt_sushi_tomltest run though 'testdata/burntsushi/toml-test/*' if found. // test_burnt_sushi_tomltest run though 'testdata/burntsushi/toml-test/*' if found.
@ -58,9 +49,6 @@ fn test_burnt_sushi_tomltest() {
println('TODO Skipped parsing of $valid_exceptions.len valid TOML files...') println('TODO Skipped parsing of $valid_exceptions.len valid TOML files...')
} }
// NOTE uncomment to see list of skipped files
// assert false
// TODO test cases where the parser should fail // TODO test cases where the parser should fail
invalid_test_files := os.walk_ext(os.join_path(test_root, 'invalid'), '.toml') invalid_test_files := os.walk_ext(os.join_path(test_root, 'invalid'), '.toml')
println('Testing $invalid_test_files.len invalid TOML files...') println('Testing $invalid_test_files.len invalid TOML files...')
@ -82,7 +70,7 @@ fn test_burnt_sushi_tomltest() {
assert false assert false
} else { } else {
println(' $err.msg') println(' $err.msg')
assert true // err.msg == 'your error' assert true
} }
invalid++ invalid++
} else { } else {
@ -94,9 +82,6 @@ fn test_burnt_sushi_tomltest() {
if invalid_exceptions.len > 0 { if invalid_exceptions.len > 0 {
println('TODO Skipped parsing of $invalid_exceptions.len invalid TOML files...') println('TODO Skipped parsing of $invalid_exceptions.len invalid TOML files...')
} }
// NOTE uncomment to see list of skipped files
// assert false
} else { } else {
println('No test data directory found in "$test_root"') println('No test data directory found in "$test_root"')
assert true assert true