diff --git a/vlib/toml/parser/parser.v b/vlib/toml/parser/parser.v index 565da99c03..be3cffebf2 100644 --- a/vlib/toml/parser/parser.v +++ b/vlib/toml/parser/parser.v @@ -9,6 +9,11 @@ import toml.util import toml.token import toml.scanner +pub const ( + all_formatting = [token.Kind.whitespace, .tab, .nl] + space_formatting = [token.Kind.whitespace, .tab] +) + // Parser contains the necessary fields for keeping the state of the parsing process. pub struct Parser { pub: @@ -127,7 +132,8 @@ fn (mut p Parser) check(check_token token.Kind) ? { } } -// check_one_of returns true if the current token's `Kind` is equal that of `expected_token`. +// check_one_of forwards the parser to the next token if the current +// token's `Kind` can be found in `tokens`. Otherwise it returns an error. fn (mut p Parser) check_one_of(tokens []token.Kind) ? { if p.tok.kind in tokens { p.next() ? @@ -137,6 +143,45 @@ fn (mut p Parser) check_one_of(tokens []token.Kind) ? { } } +// ignore_while forwards the parser to the next token as long as the current +// token's `Kind` can be found in `tokens`. This is helpful for ignoring +// a stream of formatting tokens. +fn (mut p Parser) ignore_while(tokens []token.Kind) { + if p.tok.kind in tokens { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignoring "$p.tok.kind" ...') + p.next() or { return } + p.ignore_while(tokens) + } +} + +// ignore_while_peek forwards the parser to the next token as long as `peek_tok` +// token's `Kind` can be found in `tokens`. This is helpful for ignoring +// a stream of formatting tokens. +// In contrast to `ignore_while`, `ignore_while_peek` compares on `peek_tok` this is +// sometimes necessary since not all parser calls forward using the `next()` call. +fn (mut p Parser) ignore_while_peek(tokens []token.Kind) { + for p.peek_tok.kind in tokens { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'eating "$p.tok.kind" ...') + p.next() or { return } + } +} + +// peek_over peeks ahead from token starting at `i` skipping over +// any `token.Kind`s found in `tokens`. `peek_over` returns the next token *not* +// found in `tokens`. +fn (mut p Parser) peek_over(i int, tokens []token.Kind) ?token.Token { + mut peek_tok := p.peek_tok + + // Peek ahead as far as we can from token at `i` while the peeked + // token is found in `tokens`. + mut peek_i := i + for peek_tok.kind in tokens { + peek_tok = p.peek(peek_i) ? + peek_i++ + } + return peek_tok +} + // is_at returns true if the token kind is equal to `expected_token`. fn (mut p Parser) is_at(expected_token token.Kind) bool { return p.tok.kind == expected_token @@ -251,12 +296,15 @@ pub fn (mut p Parser) find_in_table(mut table map[string]ast.Value, key string) pub fn (mut p Parser) sub_key() ?string { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing nested key...') key := p.key() ? + p.ignore_while_peek(parser.space_formatting) mut text := key.str() for p.peek_tok.kind == .period { p.next() ? // . p.check(.period) ? + p.ignore_while(parser.space_formatting) next_key := p.key() ? text += '.' + next_key.text + p.ignore_while_peek(parser.space_formatting) } p.next() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed nested key `$text` now at "$p.tok.kind" "$p.tok.lit"') @@ -282,23 +330,22 @@ pub fn (mut p Parser) root_table() ? { p.ast_root.comments << c util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"') } - //.whitespace, .tab, .nl { - // util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind "$p.tok.lit"') - //} + .whitespace, .tab, .nl, .cr { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping formatting "$p.tok.kind" "$p.tok.lit"') + continue + } .bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys - if p.peek_tok.kind == .assign - || (p.tok.kind == .number && p.peek_tok.kind == .minus) { - key, val := p.key_value() ? + mut peek_tok := p.peek_tok - t := p.find_table() ? - unsafe { - util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val.to_json() in table ${ptr_str(t)}') - t[key.str()] = val - } - } else if p.peek_tok.kind == .period { + // Peek forward as far as we can skipping over space formatting tokens. + peek_tok = p.peek_over(1, parser.space_formatting) ? + + if peek_tok.kind == .period { + p.ignore_while(parser.space_formatting) subkey := p.sub_key() ? - + p.ignore_while(parser.space_formatting) p.check(.assign) ? + p.ignore_while(parser.space_formatting) val := p.value() ? sub_table, key := p.sub_table_key(subkey) @@ -309,19 +356,32 @@ pub fn (mut p Parser) root_table() ? { t[key] = val } } else { - return error(@MOD + '.' + @STRUCT + '.' + @FN + - ' dead end at "$p.tok.kind" "$p.tok.lit"') + p.ignore_while(parser.space_formatting) + key, val := p.key_value() ? + + t := p.find_table() ? + unsafe { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val.to_json() in table ${ptr_str(t)}') + t[key.str()] = val + } } } .lsbr { p.check(.lsbr) ? // '[' bracket + p.ignore_while(parser.space_formatting) + + mut peek_tok := p.peek_tok + // Peek forward as far as we can skipping over space formatting tokens. + peek_tok = p.peek_over(1, parser.space_formatting) ? if p.tok.kind == .lsbr { p.array_of_tables(mut &p.root_map) ? p.skip_next = true // skip calling p.next() in coming iteration - util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'leaving double bracket at "$p.tok.kind "$p.tok.lit". NEXT is "$p.peek_tok.kind "$p.peek_tok.lit"') - } else if p.peek_tok.kind == .period { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'leaving double bracket at "$p.tok.kind" "$p.tok.lit". NEXT is "$p.peek_tok.kind "$p.peek_tok.lit"') + } else if peek_tok.kind == .period { + p.ignore_while(parser.space_formatting) p.root_map_key = p.sub_key() ? + p.ignore_while(parser.space_formatting) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$p.root_map_key` at "$p.tok.kind" "$p.tok.lit"') p.expect(.rsbr) ? } else { @@ -359,6 +419,7 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind"') if previous_token_was_value { + p.ignore_while(parser.space_formatting) if p.tok.kind != .rcbr { p.expect(.comma) ? } @@ -366,10 +427,17 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? { } match p.tok.kind { - //.whitespace, .tab, .nl { - // util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind "$p.tok.lit"') - //} + .whitespace, .tab { + /* + if !p.scanner.config.tokenize_formatting { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind" "$p.tok.lit"') + continue + }*/ + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping formatting "$p.tok.kind" "$p.tok.lit"') + continue + } .comma { + p.ignore_while_peek(parser.space_formatting) if p.peek_tok.kind == .rcbr { p.next() ? // Forward to the peek_tok return error(@MOD + '.' + @STRUCT + '.' + @FN + @@ -388,13 +456,16 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? { return } .bare, .quoted, .boolean, .number, .underscore { - if p.peek_tok.kind == .assign { - key, val := p.key_value() ? - util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @5 "$key.str()" = $val.to_json() into ${ptr_str(tbl)}') - tbl[key.str()] = val - } else if p.peek_tok.kind == .period { + mut peek_tok := p.peek_tok + // Peek forward as far as we can skipping over space formatting tokens. + peek_tok = p.peek_over(1, parser.space_formatting) ? + + if peek_tok.kind == .period { + p.ignore_while(parser.space_formatting) subkey := p.sub_key() ? + p.ignore_while(parser.space_formatting) p.check(.assign) ? + p.ignore_while(parser.space_formatting) val := p.value() ? sub_table, key := p.sub_table_key(subkey) @@ -405,8 +476,10 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? { t[key] = val } } else { - return error(@MOD + '.' + @STRUCT + '.' + @FN + - ' dead end at "$p.tok.kind" "$p.tok.lit"') + p.ignore_while(parser.space_formatting) + key, val := p.key_value() ? + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @5 "$key.str()" = $val.to_json() into ${ptr_str(tbl)}') + tbl[key.str()] = val } previous_token_was_value = true } @@ -438,6 +511,8 @@ pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ? { p.check(.rsbr) ? p.check(.rsbr) ? + p.ignore_while(parser.all_formatting) + key_str := key.str() unsafe { if key_str in table.keys() { @@ -448,7 +523,7 @@ pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ? { { if val is []ast.Value { arr := &(table[key_str] as []ast.Value) - arr << p.double_bracket_array() ? + arr << p.array_of_tables_contents() ? table[key_str] = arr } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + @@ -456,7 +531,7 @@ pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ? { } } } else { - table[key_str] = p.double_bracket_array() ? + table[key_str] = p.array_of_tables_contents() ? } } p.last_aot = key_str @@ -475,6 +550,7 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? { next_key := p.key() ? key_str += '.' + next_key.text } + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed nested key `$key_str` now at "$p.tok.kind" "$p.tok.lit"') p.next() ? p.check(.rsbr) ? @@ -501,7 +577,10 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? { } mut t_arr := &(table[p.last_aot] as []ast.Value) - mut t_map := t_arr[p.last_aot_index] + mut t_map := ast.Value(map[string]ast.Value{}) + if t_arr.len > 0 { + t_map = t_arr[p.last_aot_index] + } mut t := &(t_map as map[string]ast.Value) if last in t.keys() { @@ -512,7 +591,7 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? { { if val is []ast.Value { arr := &(val as []ast.Value) - arr << p.double_bracket_array() ? + arr << p.array_of_tables_contents() ? t[last] = arr } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + @@ -520,21 +599,43 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? { } } } else { - t[last] = p.double_bracket_array() ? + t[last] = p.array_of_tables_contents() ? + } + if t_arr.len == 0 { + t_arr << t + p.last_aot_index = 0 } } } // array parses next tokens into an array of `ast.Value`s. -pub fn (mut p Parser) double_bracket_array() ?[]ast.Value { - mut arr := []ast.Value{} - for p.tok.kind in [.bare, .quoted, .boolean, .number] && p.peek_tok.kind == .assign { - mut tbl := map[string]ast.Value{} - key, val := p.key_value() ? - tbl[key.str()] = val - arr << tbl +pub fn (mut p Parser) array_of_tables_contents() ?[]ast.Value { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables contents from "$p.tok.kind" "$p.tok.lit"') + mut tbl := map[string]ast.Value{} + for p.tok.kind in [.bare, .quoted, .boolean, .number] { + if p.peek_tok.kind == .period { + subkey := p.sub_key() ? + p.check(.assign) ? + val := p.value() ? + + sub_table, key := p.sub_table_key(subkey) + + mut t := p.find_in_table(mut tbl, sub_table) ? + unsafe { + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @6 "$key" = $val.to_json() into ${ptr_str(t)}') + t[key] = val + } + } else { + key, val := p.key_value() ? + tbl[key.str()] = val + } p.next() ? + p.ignore_while(parser.all_formatting) } + mut arr := []ast.Value{} + arr << tbl + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables ${arr.str().replace('\n', + ' ')}. leaving at "$p.tok.kind" "$p.tok.lit"') return arr } @@ -549,6 +650,7 @@ pub fn (mut p Parser) array() ?[]ast.Value { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"') if previous_token_was_value { + p.ignore_while(parser.all_formatting) if p.tok.kind != .rsbr && p.tok.kind != .hash { p.expect(.comma) ? } @@ -581,6 +683,7 @@ pub fn (mut p Parser) array() ?[]ast.Value { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"') } .lcbr { + p.ignore_while(parser.space_formatting) mut t := map[string]ast.Value{} p.inline_table(mut t) ? arr << ast.Value(t) @@ -680,7 +783,9 @@ pub fn (mut p Parser) key_value() ?(ast.Key, ast.Value) { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key value pair...') key := p.key() ? p.next() ? + p.ignore_while(parser.space_formatting) p.check(.assign) ? // Assignment operator + p.ignore_while(parser.space_formatting) value := p.value() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed key value pair. "$key" = $value.to_json()') return key, value @@ -711,6 +816,7 @@ pub fn (mut p Parser) value() ?ast.Value { ast.Value(p.array() ?) } .lcbr { + p.ignore_while(parser.space_formatting) mut t := map[string]ast.Value{} p.inline_table(mut t) ? // table[key_str] = ast.Value(t) diff --git a/vlib/toml/scanner/scanner.v b/vlib/toml/scanner/scanner.v index d60284e540..a97cf131c7 100644 --- a/vlib/toml/scanner/scanner.v +++ b/vlib/toml/scanner/scanner.v @@ -40,8 +40,8 @@ pub: // Only one of the fields `text` and `file_path` is allowed to be set at time of configuration. pub struct Config { pub: - input input.Config - tokenize_formating bool // if true, generate tokens for `\n`, ` `, `\t`, `\r` etc. + input input.Config + tokenize_formatting bool = true // if true, generate tokens for `\n`, ` `, `\t`, `\r` etc. } // new_scanner returns a new *heap* allocated `Scanner` instance. @@ -136,14 +136,16 @@ pub fn (mut s Scanner) scan() ?token.Token { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified, what could be, a space between a RFC 3339 date and time ("$ascii") ($ascii.len)') return s.new_token(token.Kind.whitespace, ascii, ascii.len) } - if s.config.tokenize_formating { + if s.config.tokenize_formatting { mut kind := token.Kind.whitespace if c == `\t` { kind = token.Kind.tab + } else if c == `\r` { + kind = token.Kind.cr } else if c == `\n` { kind = token.Kind.nl } - util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified one of " ", "\\t" or "\\n" ("$ascii") ($ascii.len)') + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified formatting character ("$ascii") ($ascii.len)') return s.new_token(kind, ascii, ascii.len) } else { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping " ", "\\t" or "\\n" ("$ascii") ($ascii.len)') @@ -230,7 +232,7 @@ pub fn (s &Scanner) remaining() int { } // next returns the next character code from the input text. -// next returns `-1` if it can't reach the next character. +// next returns `end_of_text` if it can't reach the next character. [direct_array_access; inline] pub fn (mut s Scanner) next() int { if s.pos < s.text.len { @@ -240,7 +242,7 @@ pub fn (mut s Scanner) next() int { c := s.text[opos] return c } - return -1 + return scanner.end_of_text } // skip skips one character ahead. @@ -265,14 +267,14 @@ pub fn (mut s Scanner) skip_n(n int) { } // at returns the *current* character code from the input text. -// at returns `-1` if it can't get the current character. +// at returns `end_of_text` if it can't get the current character. // unlike `next()`, `at()` does not change the state of the scanner. [direct_array_access; inline] pub fn (s &Scanner) at() int { if s.pos < s.text.len { return s.text[s.pos] } - return -1 + return scanner.end_of_text } // at_crlf returns `true` if the scanner is at a `\r` character @@ -282,7 +284,7 @@ fn (s Scanner) at_crlf() bool { } // peek returns the character code from the input text at position + `n`. -// peek returns `-1` if it can't peek `n` characters ahead. +// peek returns `end_of_text` if it can't peek `n` characters ahead. [direct_array_access; inline] pub fn (s &Scanner) peek(n int) int { if s.pos + n < s.text.len { @@ -293,7 +295,7 @@ pub fn (s &Scanner) peek(n int) int { } return s.text[s.pos + n] } - return -1 + return scanner.end_of_text } // reset resets the internal state of the scanner. diff --git a/vlib/toml/tests/array_of_tables_1_level_test.v b/vlib/toml/tests/array_of_tables_1_level_test.v new file mode 100644 index 0000000000..92eefca3c3 --- /dev/null +++ b/vlib/toml/tests/array_of_tables_1_level_test.v @@ -0,0 +1,28 @@ +import os +import toml + +const ( + toml_table_text = ' +[[products]] +name = "Hammer" +sku = 738594937 + +[[products]] # empty table within the array + +[[products]] +name = "Nail" +sku = 284758393 + +color = "gray"' +) + +fn test_tables() { + mut toml_doc := toml.parse(toml_table_text) or { panic(err) } + + toml_json := toml_doc.to_json() + + eprintln(toml_json) + assert toml_json == os.read_file( + os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + + '.out') or { panic(err) } +} diff --git a/vlib/toml/tests/burntsushi.toml-test_test.v b/vlib/toml/tests/burntsushi.toml-test_test.v index f1a73c2c99..92a6c3265a 100644 --- a/vlib/toml/tests/burntsushi.toml-test_test.v +++ b/vlib/toml/tests/burntsushi.toml-test_test.v @@ -13,7 +13,6 @@ const ( ] invalid_exceptions = [ // Table - 'table/rrbrace.toml', 'table/duplicate-table-array2.toml', 'table/duplicate.toml', 'table/array-implicit.toml', @@ -23,7 +22,6 @@ const ( 'table/duplicate-table-array.toml', // Array 'array/tables-1.toml', - //'array/missing-separator.toml', 'array/text-after-array-entries.toml', 'array/text-before-array-separator.toml', // Date / Time diff --git a/vlib/toml/tests/table_test.v b/vlib/toml/tests/table_test.v index 13b1d8f47b..c11b6962d4 100644 --- a/vlib/toml/tests/table_test.v +++ b/vlib/toml/tests/table_test.v @@ -69,19 +69,23 @@ fn test_tables() { arr := toml_doc.value('arr') as []toml.Any + for i := 0; i < arr.len; i++ { + entry := (arr[i] as map[string]toml.Any) + value = entry.value('t.a.b') or { panic(err) } + assert value.int() == i + 1 + value = entry.value('T.a.b') or { panic(err) } + assert value.int() == i + 1 + } + arr0 := arr[0] as map[string]toml.Any value = arr0.value('t.a.b') or { panic(err) } assert value.int() == 1 - - arr1 := arr[1] as map[string]toml.Any - value = arr1.value('T.a.b') or { panic(err) } + value = arr0.value('T.a.b') or { panic(err) } assert value.int() == 1 - arr2 := arr[2] as map[string]toml.Any - value = arr2.value('t.a.b') or { panic(err) } + arr1 := arr[1] as map[string]toml.Any + value = arr1.value('t.a.b') or { panic(err) } assert value.int() == 2 - - arr3 := arr[3] as map[string]toml.Any - value = arr3.value('T.a.b') or { panic(err) } + value = arr1.value('T.a.b') or { panic(err) } assert value.int() == 2 } diff --git a/vlib/toml/tests/testdata/array_of_tables_1_level_test.out b/vlib/toml/tests/testdata/array_of_tables_1_level_test.out new file mode 100644 index 0000000000..08d331f66c --- /dev/null +++ b/vlib/toml/tests/testdata/array_of_tables_1_level_test.out @@ -0,0 +1 @@ +{ "products": [ { "name": "Hammer", "sku": 738594937 }, { }, { "name": "Nail", "sku": 284758393, "color": "gray" } ] } \ No newline at end of file