From 253e38d9d7907309a9b5bfd19e17fb00c9f8c1e9 Mon Sep 17 00:00:00 2001 From: Larpon Date: Fri, 26 Nov 2021 14:06:28 +0100 Subject: [PATCH] toml: support implicit array of tables key change (#12580) --- vlib/toml/parser/parser.v | 143 +++++++++++++++++- .../tests/array_of_tables_edge_case_1_test.v | 18 +++ .../array_of_tables_edge_case_1_test.out | 1 + .../array_of_tables_edge_case_1_test.toml | 33 ++++ 4 files changed, 190 insertions(+), 5 deletions(-) create mode 100644 vlib/toml/tests/array_of_tables_edge_case_1_test.v create mode 100644 vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.out create mode 100644 vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.toml diff --git a/vlib/toml/parser/parser.v b/vlib/toml/parser/parser.v index e8726421fe..acf96b8702 100644 --- a/vlib/toml/parser/parser.v +++ b/vlib/toml/parser/parser.v @@ -58,9 +58,10 @@ mut: tokens []token.Token // To be able to peek more than one token ahead. skip_next bool // The root map (map is called table in TOML world) - root_map map[string]ast.Value - root_map_key DottedKey - explicit_declared []DottedKey + root_map map[string]ast.Value + root_map_key DottedKey + explicit_declared []DottedKey + explicit_declared_array_of_tables []DottedKey // Array of Tables state last_aot DottedKey last_aot_index int @@ -266,6 +267,15 @@ fn (p Parser) check_explicitly_declared(key DottedKey) ? { } } +// check_explicitly_declared_array_of_tables returns an error if `key` has been +// explicitly declared as an array of tables. +fn (p Parser) check_explicitly_declared_array_of_tables(key DottedKey) ? { + if p.explicit_declared_array_of_tables.len > 0 && p.explicit_declared_array_of_tables.has(key) { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' key `$key.str()` is already an explicitly declared array of tables. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') + } +} + // find_table returns a reference to a map if found in the *root* table given a "dotted" key (`a.b.c`). // If some segments of the key does not exist in the root table find_table will // allocate a new map for each segment. This behavior is needed because you can @@ -338,7 +348,7 @@ pub fn (mut p Parser) find_in_table(mut table map[string]ast.Value, key DottedKe t = &(val as map[string]ast.Value) } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + - ' "$k" in "$key" is not a map ($val.type_name())') + ' "$k" in "$key" is not a map but `$val.type_name()`') } } else { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'no key "$k" in "$key" found, allocating new map at key "$k" in map ${ptr_str(t)}"') @@ -352,6 +362,28 @@ pub fn (mut p Parser) find_in_table(mut table map[string]ast.Value, key DottedKe return t } +// find_array_of_tables returns an array if found in the root table based on the parser's +// last encountered "Array Of Tables" key. +// If the state key does not exist find_array_in_table will return an error. +pub fn (mut p Parser) find_array_of_tables() ?[]ast.Value { + mut t := unsafe { &p.root_map } + mut key := p.last_aot + if key.len > 1 { + key = DottedKey([key[0]]) + } + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "$key" in map ${ptr_str(t)}') + unsafe { + if val := t[key.str()] { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'found key "$key" in $t.keys()') + if val is []ast.Value { + arr := (val as []ast.Value) + return arr + } + } + } + return error(@MOD + '.' + @STRUCT + '.' + @FN + 'no key `$key` found in map ${ptr_str(t)}"') +} + // allocate_in_table allocates all tables in "dotted" `key` (`a.b.c`) in `table`. pub fn (mut p Parser) allocate_in_table(mut table map[string]ast.Value, key DottedKey) ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocating "$key" in map ${ptr_str(table)}') @@ -503,9 +535,39 @@ pub fn (mut p Parser) root_table() ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'leaving double bracket at "$p.tok.kind" "$p.tok.lit". NEXT is "$p.peek_tok.kind "$p.peek_tok.lit"') } else if peek_tok.kind == .period { // Parse `[d.e.f]` - p.ignore_while(parser.space_formatting) dotted_key := p.dotted_key() ? + // So apparently TOML is a *very* key context sensitive language... + // [[table]] <- parsed previously + // ... + // [table.key] <- parser is here + // + // `table.key` now shape shifts into being a *double array of tables* key... + // ... but with a different set of rules - making it hard to reuse the code we already have for that ... + // See `testdata/array_of_tables_edge_case_1_test.toml` for the type of construct parsed. + if p.last_aot.len == 1 && dotted_key.len == 2 + && dotted_key[0] == p.last_aot.str() { + // Disallow re-declaring the key + p.check_explicitly_declared_array_of_tables(dotted_key) ? + p.check(.rsbr) ? + p.ignore_while(parser.space_formatting) + arr := p.find_array_of_tables() ? + if val := arr[p.last_aot_index] { + if val is map[string]ast.Value { + mut m := map[string]ast.Value{} + p.table_contents(mut m) ? + unsafe { + mut mut_val := &val + mut_val[dotted_key[1].str()] = m + } + } else { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$p.last_aot_index" in array is not a map but `${typeof(val).name}`') + } + } + continue + } + // Disallow re-declaring the key p.check_explicitly_declared(dotted_key) ? p.explicit_declared << dotted_key @@ -557,6 +619,73 @@ fn (p Parser) excerpt() string { return p.scanner.excerpt(p.tok.pos, 10) } +// table_contents parses next tokens into a map of `ast.Value`s. +// The V `map` type is corresponding to a "table" in TOML. +pub fn (mut p Parser) table_contents(mut tbl map[string]ast.Value) ? { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing table contents...') + + for p.tok.kind != .eof { + if p.peek_tok.kind == .lsbr { + return + } + if !p.skip_next { + p.next() ? + } else { + p.skip_next = false + } + + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"') + match p.tok.kind { + .hash { + c := p.comment() + p.ast_root.comments << c + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"') + } + .whitespace, .tab, .nl, .cr { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping formatting "$p.tok.kind" "$p.tok.lit"') + continue + } + .bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys + // Peek forward as far as we can skipping over space formatting tokens. + peek_tok, _ := p.peek_over(1, parser.keys_and_space_formatting) ? + + if peek_tok.kind == .period { + dotted_key, val := p.dotted_key_value() ? + + sub_table, key := p.sub_table_key(dotted_key) + + t := p.find_in_table(mut tbl, sub_table) ? + unsafe { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key" = $val in table ${ptr_str(t)}') + t[key.str()] = val + } + } else { + p.ignore_while(parser.space_formatting) + key, val := p.key_value() ? + + unsafe { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val in table ${ptr_str(tbl)}') + key_str := key.str() + if _ := tbl[key_str] { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' key "$key" is already initialized with a value. At "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') + } + tbl[key_str] = val + } + } + p.peek_for_correct_line_ending_or_fail() ? + } + .eof { + break + } + else { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' could not parse "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') + } + } + } +} + // inline_table parses next tokens into a map of `ast.Value`s. // The V map type is corresponding to a "table" in TOML. pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? { @@ -747,6 +876,10 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? { ' nested array of tables does not support more than 2 levels. (excerpt): "...${p.excerpt()}..."') } + if !p.explicit_declared_array_of_tables.has(dotted_key) { + p.explicit_declared_array_of_tables << dotted_key + } + first := DottedKey([dotted_key[0]]) // The array that holds the entries last := DottedKey([dotted_key[1]]) // The key the parsed array data should be added to diff --git a/vlib/toml/tests/array_of_tables_edge_case_1_test.v b/vlib/toml/tests/array_of_tables_edge_case_1_test.v new file mode 100644 index 0000000000..f4f18dac29 --- /dev/null +++ b/vlib/toml/tests/array_of_tables_edge_case_1_test.v @@ -0,0 +1,18 @@ +import os +import toml +import toml.to + +fn test_array_of_tables_edge_case_file() { + toml_file := + os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + + '.toml' + toml_doc := toml.parse(toml_file) or { panic(err) } + + toml_json := to.json(toml_doc) + out_file := + os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + + '.out' + out_file_json := os.read_file(out_file) or { panic(err) } + println(toml_json) + assert toml_json == out_file_json +} diff --git a/vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.out b/vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.out new file mode 100644 index 0000000000..8225fe1b97 --- /dev/null +++ b/vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.out @@ -0,0 +1 @@ +{ "Toml": [ { "ID": "1", "Index": 0, "Range": [ 0, 1, 2, 3 ], "Greeting": "Hello!", "Name": { "First": "Dolores", "Last": "Alvarado" }, "Friends": [ { "ID": 0, "Name": "Tom" }, { "ID": 1, "Name": "Dollie" } ] }, { "ID": "2", "Index": 1, "Range": [ 0, 1, 2, 3 ], "Greeting": "Hep!", "Name": { "First": "Rush", "Last": "Washington" }, "Friends": [ { "ID": 0, "Name": "Staci" }, { "ID": 1, "Name": "Dollie" } ] } ] } \ No newline at end of file diff --git a/vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.toml b/vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.toml new file mode 100644 index 0000000000..baea5e3c97 --- /dev/null +++ b/vlib/toml/tests/testdata/array_of_tables_edge_case_1_test.toml @@ -0,0 +1,33 @@ +[[Toml]] + ID = "1" + Index = 0 + Range = [0, 1, 2, 3] + Greeting = "Hello!" + [Toml.Name] + First = "Dolores" + Last = "Alvarado" + + [[Toml.Friends]] + ID = 0 + Name = "Tom" + + [[Toml.Friends]] + ID = 1 + Name = "Dollie" + +[[Toml]] + ID = "2" + Index = 1 + Range = [0, 1, 2, 3] + Greeting = "Hep!" + [Toml.Name] + First = "Rush" + Last = "Washington" + + [[Toml.Friends]] + ID = 0 + Name = "Staci" + + [[Toml.Friends]] + ID = 1 + Name = "Dollie"