// Copyright (c) 2021 Lars Pontoppidan. All rights reserved. // Use of this source code is governed by an MIT license // that can be found in the LICENSE file. module parser import toml.ast import toml.checker import toml.decoder import toml.util import toml.token import toml.scanner pub const ( all_formatting = [token.Kind.whitespace, .tab, .cr, .nl] space_formatting = [token.Kind.whitespace, .tab] keys_and_space_formatting = [token.Kind.whitespace, .tab, .minus, .bare, .quoted, .boolean, .number, .underscore] ) type DottedKey = []string pub fn (dk DottedKey) str() string { return dk.join('.') } // starts_with returns true if the dotted key starts with the same key entries as `target`. fn (dk DottedKey) starts_with(target DottedKey) bool { if dk.len >= target.len { for i := 0; i < target.len; i++ { if dk[i] != target[i] { return false } } return true } return false } // has returns true if the array contains `target`. fn (a []DottedKey) has(target DottedKey) bool { for dk in a { if dk == target { return true } } return false } // Parser contains the necessary fields for keeping the state of the parsing process. pub struct Parser { pub: config Config mut: scanner &scanner.Scanner prev_tok token.Token tok token.Token peek_tok token.Token tokens []token.Token // To be able to peek more than one token ahead. skip_next bool // The root map (map is called table in TOML world) root_map map[string]ast.Value root_map_key DottedKey explicit_declared []DottedKey // Array of Tables state last_aot DottedKey last_aot_index int // Root of the tree ast_root &ast.Root = &ast.Root{} } // Config is used to configure a Parser instance. // `run_checks` is used to en- or disable running of the strict `checker.Checker` type checks. // `decode_values` is used to en- or disable decoding of values with the `decoder.Decoder`. pub struct Config { pub: scanner &scanner.Scanner run_checks bool = true decode_values bool = true } // new_parser returns a new, stack allocated, `Parser`. pub fn new_parser(config Config) Parser { return Parser{ config: config scanner: config.scanner } } // init initializes the parser. pub fn (mut p Parser) init() ? { p.root_map = map[string]ast.Value{} p.tokens << p.scanner.scan() ? p.next() ? } // run_checker validates the parsed `ast.Value` nodes in the // the generated AST. fn (mut p Parser) run_checker() ? { if p.config.run_checks { chckr := checker.Checker{ scanner: p.scanner } chckr.check(p.root_map) ? for comment in p.ast_root.comments { chckr.check_comment(comment) ? } } } // run_decoder decodes values in the parsed `ast.Value` nodes in the // the generated AST. fn (mut p Parser) run_decoder() ? { if p.config.decode_values { dcoder := decoder.Decoder{ scanner: p.scanner } dcoder.decode(mut p.root_map) ? } } // parse starts parsing the input and returns the root // of the generated AST. pub fn (mut p Parser) parse() ?&ast.Root { p.init() ? p.root_table() ? p.run_checker() ? p.run_decoder() ? p.ast_root.table = p.root_map return p.ast_root } // next forwards the parser to the next token. fn (mut p Parser) next() ? { p.prev_tok = p.tok p.tok = p.peek_tok if p.tokens.len > 0 { p.peek_tok = p.tokens.first() p.tokens.delete(0) p.peek(1) ? } else { p.peek(1) ? p.peek_tok = p.tokens.first() p.tokens.delete(0) } } // peek peeks forward `n` tokens. // peek returns `.unknown` if it can not peek ahead long enough. fn (mut p Parser) peek(n int) ?token.Token { if n < 0 { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' peeking backwards is not supported.') } if n == 0 { return p.peek_tok } else { // n >= 1 if n <= p.tokens.len { return p.tokens[n - 1] } else { mut token := token.Token{} mut count := n - p.tokens.len util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'buffering $count tokens...') for token.kind != .eof && count != 0 { token = p.scanner.scan() ? p.tokens << token count-- } return token } } } // check forwards the parser to the next token if the current // token's `Kind` is equal that of `check_token`. fn (mut p Parser) check(check_token token.Kind) ? { if p.tok.kind == check_token { p.next() ? } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' expected token "$check_token" but found "$p.tok.kind" in this (excerpt): "...${p.excerpt()}..."') } } // peek_for_correct_line_ending_or_fail peeks past any formatting tokens // and return an error if the next token is not one of [.cr, .nl, .hash, .eof]. fn (mut p Parser) peek_for_correct_line_ending_or_fail() ? { // Disallow anything else than [.cr, .nl, .hash, .eof] after any space formatting. peek_tok, _ := p.peek_over(1, parser.space_formatting) ? if peek_tok.kind !in [.cr, .nl, .hash, .eof] { p.next() ? // Forward to the peek_tok return error(@MOD + '.' + @STRUCT + '.' + @FN + ' unexpected EOL "$p.tok.kind" "$p.tok.lit" expected one of [.cr, .nl, .hash, .eof] at this (excerpt): "...${p.excerpt()}..."') } } // check_one_of forwards the parser to the next token if the current // token's `Kind` can be found in `tokens`. Otherwise it returns an error. fn (mut p Parser) check_one_of(tokens []token.Kind) ? { if p.tok.kind in tokens { p.next() ? } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' expected one of $tokens but found "$p.tok.kind" in this (excerpt): "...${p.excerpt()}..."') } } // ignore_while forwards the parser to the next token as long as the current // token's `Kind` can be found in `tokens`. This is helpful for ignoring // a stream of formatting tokens. fn (mut p Parser) ignore_while(tokens []token.Kind) { if p.tok.kind in tokens { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignoring "$p.tok.kind" ...') p.next() or { return } p.ignore_while(tokens) } } // ignore_while_peek forwards the parser to the next token as long as `peek_tok` // token's `Kind` can be found in `tokens`. This is helpful for ignoring // a stream of formatting tokens. // In contrast to `ignore_while`, `ignore_while_peek` compares on `peek_tok` this is // sometimes necessary since not all parser calls forward using the `next()` call. fn (mut p Parser) ignore_while_peek(tokens []token.Kind) { for p.peek_tok.kind in tokens { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'eating "$p.tok.kind" ...') p.next() or { return } } } // peek_over peeks ahead from token starting at `i` skipping over // any `token.Kind`s found in `tokens`. `peek_over` returns the next token *not* // found in `tokens`. fn (mut p Parser) peek_over(i int, tokens []token.Kind) ?(token.Token, int) { mut peek_tok := p.peek_tok // Peek ahead as far as we can from token at `i` while the peeked // token is found in `tokens`. mut peek_i := i for peek_tok.kind in tokens { peek_tok = p.peek(peek_i) ? peek_i++ } return peek_tok, peek_i } // is_at returns true if the token kind is equal to `expected_token`. fn (mut p Parser) is_at(expected_token token.Kind) bool { return p.tok.kind == expected_token } // expect will error if the token kind is not equal to `expected_token`. fn (mut p Parser) expect(expected_token token.Kind) ? { if p.tok.kind == expected_token { return } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' expected token "$expected_token" but found "$p.tok.kind" in this text "...${p.excerpt()}..."') } } // check_explicitly_declared returns an error if `key` has been explicitly declared. fn (p Parser) check_explicitly_declared(key DottedKey) ? { if p.explicit_declared.len > 0 && p.explicit_declared.has(key) { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key `$key.str()` is already explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } } // find_table returns a reference to a map if found in the *root* table given a "dotted" key (`a.b.c`). // If some segments of the key does not exist in the root table find_table will // allocate a new map for each segment. This behavior is needed because you can // reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents. // See also `find_in_table`. pub fn (mut p Parser) find_table() ?&map[string]ast.Value { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "$p.root_map_key" in map ${ptr_str(p.root_map)}') mut t := unsafe { &p.root_map } if p.root_map_key.len == 0 { return t } return p.find_in_table(mut t, p.root_map_key) } // allocate_table allocates all tables in "dotted" `key` (`a.b.c`) in the *root* table. pub fn (mut p Parser) allocate_table(key DottedKey) ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocating "$key" in map ${ptr_str(p.root_map)}') mut t := unsafe { &p.root_map } if key.len == 0 { return } p.allocate_in_table(mut t, key) ? } // sub_table_key returns the logic parts of a dotted key (`a.b.c`) for // use with the `find_sub_table` method. pub fn (mut p Parser) sub_table_key(key DottedKey) (DottedKey, DottedKey) { last := [key.last()] first := key[..key.len - 1] return first, last } // find_sub_table returns a reference to a map if found in the *root* table given a "dotted" key (`a.b.c`). // If some segments of the key does not exist in the input map find_sub_table will // allocate a new map for the segment. This behavior is needed because you can // reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents. // See also `find_in_table`. pub fn (mut p Parser) find_sub_table(key DottedKey) ?&map[string]ast.Value { mut ky := DottedKey([]string{}) ky << p.root_map_key ky << key if p.root_map_key.len == 0 { ky = key } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "$ky" in map ${ptr_str(p.root_map)}') mut t := unsafe { &p.root_map } if ky.len == 0 { return t } return p.find_in_table(mut t, ky) } // find_in_table returns a reference to a map if found in `table` given a "dotted" key (`a.b.c`). // If some segments of the key does not exist in the input map find_in_table will // allocate a new map for the segment. This behavior is needed because you can // reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents. pub fn (mut p Parser) find_in_table(mut table map[string]ast.Value, key DottedKey) ?&map[string]ast.Value { // NOTE This code is the result of much trial and error. // I'm still not quite sure *exactly* why it works. All I can leave here is a hope // that this kind of minefield someday will be easier in V :) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "$key" in map ${ptr_str(table)}') mut t := unsafe { &table } unsafe { for k in key { if val := t[k] { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'found key "$k" in $t.keys()') if val is map[string]ast.Value { t = &(val as map[string]ast.Value) } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "$k" in "$key" is not a map ($val.type_name())') } } else { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'no key "$k" in "$key" found, allocating new map at key "$k" in map ${ptr_str(t)}"') t[k] = map[string]ast.Value{} t = &(t[k] as map[string]ast.Value) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocated new map ${ptr_str(t)}"') } } } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning map ${ptr_str(t)}"') return t } // allocate_in_table allocates all tables in "dotted" `key` (`a.b.c`) in `table`. pub fn (mut p Parser) allocate_in_table(mut table map[string]ast.Value, key DottedKey) ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocating "$key" in map ${ptr_str(table)}') mut t := unsafe { &table } unsafe { for k in key { if val := t[k] { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'found key "$k" in $t.keys()') if val is map[string]ast.Value { t = &(val as map[string]ast.Value) } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "$k" in "$key" is not a map ($val.type_name())') } } else { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'no key "$k" in "$key" found, allocating new map at key "$k" in map ${ptr_str(t)}"') t[k] = map[string]ast.Value{} t = &(t[k] as map[string]ast.Value) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocated new map ${ptr_str(t)}"') } } } } // dotted_key returns a string of the next tokens parsed as // sub/nested/path keys (e.g. `a.b.c`). In TOML, this form of key is referred to as a "dotted" key. pub fn (mut p Parser) dotted_key() ?DottedKey { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing dotted key...') mut dotted_key := DottedKey([]string{}) key := p.key() ? p.ignore_while_peek(parser.space_formatting) dotted_key << key.str() for p.peek_tok.kind == .period { p.next() ? // . p.check(.period) ? p.ignore_while(parser.space_formatting) next_key := p.key() ? dotted_key << next_key.text p.ignore_while_peek(parser.space_formatting) } p.next() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed dotted key `$dotted_key` now at "$p.tok.kind" "$p.tok.lit"') return dotted_key } // root_table parses next tokens into the root map of `ast.Value`s. // The V `map` type is corresponding to a "table" in TOML. pub fn (mut p Parser) root_table() ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing root table...') for p.tok.kind != .eof { if !p.skip_next { p.next() ? } else { p.skip_next = false } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"') match p.tok.kind { .hash { c := p.comment() p.ast_root.comments << c util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"') } .whitespace, .tab, .nl, .cr { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping formatting "$p.tok.kind" "$p.tok.lit"') continue } .bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys // Peek forward as far as we can skipping over space formatting tokens. peek_tok, _ := p.peek_over(1, parser.keys_and_space_formatting) ? if peek_tok.kind == .period { dotted_key, val := p.dotted_key_value() ? sub_table, key := p.sub_table_key(dotted_key) // Check for "table injection": // https://github.com/BurntSushi/toml-test/blob/576db8523df1b8705ef18c526b4a6ba9c271bbbc/tests/invalid/table/injection-1.toml // https://github.com/BurntSushi/toml-test/blob/576db8523df1b8705ef18c526b4a6ba9c271bbbc/tests/invalid/table/injection-2.toml // NOTE this is a *relatively* costly check. In general - and by specification, // TOML documents are expected to be "small" so this shouldn't be a problem. Famous last words. for explicit_key in p.explicit_declared { if explicit_key.len == 1 || explicit_key == p.root_map_key { continue } mut abs_dotted_key := DottedKey([]string{}) abs_dotted_key << p.root_map_key abs_dotted_key << sub_table if abs_dotted_key.starts_with(explicit_key) { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key `$dotted_key` has already been explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } } t := p.find_sub_table(sub_table) ? unsafe { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key" = $val in table ${ptr_str(t)}') t[key.str()] = val } } else { p.ignore_while(parser.space_formatting) key, val := p.key_value() ? // Check and register explicitly declared arrays if val is []ast.Value { dotted_key := DottedKey([key.str()]) // Disallow re-declaring the key p.check_explicitly_declared(dotted_key) ? p.explicit_declared << dotted_key } t := p.find_table() ? unsafe { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val in table ${ptr_str(t)}') key_str := key.str() if _ := t[key_str] { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key" is already initialized with a value. At "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } t[key_str] = val } } p.peek_for_correct_line_ending_or_fail() ? } .lsbr { p.check(.lsbr) ? // '[' bracket mut peek_tok := p.peek_tok // Disallow `[ [table]]` if p.tok.kind in parser.space_formatting { peek_tok, _ = p.peek_over(1, parser.space_formatting) ? if peek_tok.kind == .lsbr { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' unexpected "$p.tok.kind" "$p.tok.lit" at this (excerpt): "...${p.excerpt()}..."') } } // Allow `[ d.e.f]` p.ignore_while(parser.space_formatting) // Peek forward as far as we can skipping over space formatting tokens. peek_tok, _ = p.peek_over(1, parser.keys_and_space_formatting) ? if p.tok.kind == .lsbr { // Parse `[[table]]` p.array_of_tables(mut &p.root_map) ? p.skip_next = true // skip calling p.next() in coming iteration util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'leaving double bracket at "$p.tok.kind" "$p.tok.lit". NEXT is "$p.peek_tok.kind "$p.peek_tok.lit"') } else if peek_tok.kind == .period { // Parse `[d.e.f]` p.ignore_while(parser.space_formatting) dotted_key := p.dotted_key() ? // Disallow re-declaring the key p.check_explicitly_declared(dotted_key) ? p.explicit_declared << dotted_key p.ignore_while(parser.space_formatting) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$dotted_key` at "$p.tok.kind" "$p.tok.lit"') p.root_map_key = dotted_key p.allocate_table(p.root_map_key) ? p.expect(.rsbr) ? p.peek_for_correct_line_ending_or_fail() ? } else { // Parse `[key]` key := p.key() ? dotted_key := DottedKey([key.str()]) // Disallow re-declaring the key p.check_explicitly_declared(dotted_key) ? p.explicit_declared << dotted_key // Check for footgun re-declaration in this odd way: // [[tbl]] // [tbl] if p.last_aot == dotted_key { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key `$dotted_key` has already been explicitly declared. Unexpected redeclaration at "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$dotted_key` at "$p.tok.kind" "$p.tok.lit"') p.root_map_key = dotted_key p.allocate_table(p.root_map_key) ? p.next() ? p.expect(.rsbr) ? p.peek_for_correct_line_ending_or_fail() ? } } .eof { return } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' could not parse "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } } } } // excerpt returns a string of the characters surrounding `Parser.tok.pos` fn (p Parser) excerpt() string { return p.scanner.excerpt(p.tok.pos, 10) } // inline_table parses next tokens into a map of `ast.Value`s. // The V map type is corresponding to a "table" in TOML. pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing inline table into ${ptr_str(tbl)}...') mut previous_token_was_value := false for p.tok.kind != .eof { p.next() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind"') if previous_token_was_value { p.ignore_while(parser.space_formatting) if p.tok.kind != .rcbr { p.expect(.comma) ? } previous_token_was_value = false } match p.tok.kind { .whitespace, .tab { /* if !p.scanner.config.tokenize_formatting { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind" "$p.tok.lit"') continue }*/ util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping formatting "$p.tok.kind" "$p.tok.lit"') continue } .comma { p.ignore_while_peek(parser.space_formatting) if p.peek_tok.kind in [.comma, .rcbr] { p.next() ? // Forward to the peek_tok return error(@MOD + '.' + @STRUCT + '.' + @FN + ' unexpected "$p.tok.kind" "$p.tok.lit" at this (excerpt): "...${p.excerpt()}..."') } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comma table value seperator "$p.tok.lit"') continue } .rcbr { // '}' bracket return } .bare, .quoted, .boolean, .number, .underscore { // Peek forward as far as we can skipping over space formatting tokens. peek_tok, _ := p.peek_over(1, parser.space_formatting) ? if peek_tok.kind == .period { dotted_key, val := p.dotted_key_value() ? sub_table, key := p.sub_table_key(dotted_key) mut t := p.find_in_table(mut tbl, sub_table) ? unsafe { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @6 "$key" = $val into ${ptr_str(t)}') t[key.str()] = val } } else { p.ignore_while(parser.space_formatting) key, val := p.key_value() ? key_str := key.str() if _ := tbl[key_str] { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key_str" is already initialized with a value. At "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @5 "$key_str" = $val into ${ptr_str(tbl)}') tbl[key_str] = val } previous_token_was_value = true } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' unexpected "$p.tok.kind" "$p.tok.lit" at this (excerpt): "...${p.excerpt()}..."') } } } // Make sure the inline-table actually use the return at .rcbr match branch. return error(@MOD + '.' + @STRUCT + '.' + @FN + ' unexpected end of inline-table "$p.tok.kind" "$p.tok.lit" at this (excerpt): "...${p.excerpt()}..."') } // array_of_tables parses next tokens into an array of `ast.Value`s. pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables "$p.tok.kind" "$p.tok.lit"') // NOTE this is starting to get ugly. TOML isn't simple at this point p.check(.lsbr) ? // '[' bracket p.ignore_while(parser.space_formatting) peek_tok, _ := p.peek_over(1, parser.space_formatting) ? p.ignore_while(parser.space_formatting) // [[key.key]] horror if peek_tok.kind == .period { p.double_array_of_tables(mut table) ? return } key := p.key() ? p.next() ? p.check(.rsbr) ? p.peek_for_correct_line_ending_or_fail() ? p.expect(.rsbr) ? p.ignore_while(parser.all_formatting) dotted_key := DottedKey([key.str()]) dotted_key_str := dotted_key.str() // Disallow re-declaring the key p.check_explicitly_declared(dotted_key) ? unsafe { if val := table[dotted_key_str] { if val is []ast.Value { arr := &(table[dotted_key_str] as []ast.Value) arr << p.array_of_tables_contents() ? table[dotted_key_str] = arr } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' table[$dotted_key_str] is not an array. (excerpt): "...${p.excerpt()}..."') } } else { table[dotted_key_str] = p.array_of_tables_contents() ? } } p.last_aot = dotted_key unsafe { arr := &(table[p.last_aot.str()] as []ast.Value) p.last_aot_index = arr.len - 1 } } // array_of_tables_contents parses next tokens into an array of `ast.Value`s. pub fn (mut p Parser) array_of_tables_contents() ?[]ast.Value { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing contents from "$p.tok.kind" "$p.tok.lit"') mut tbl := map[string]ast.Value{} for p.tok.kind != .eof { p.next() ? p.ignore_while(parser.all_formatting) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind"') match p.tok.kind { .bare, .quoted, .boolean, .number, .underscore { // Peek forward as far as we can skipping over space formatting tokens. peek_tok, _ := p.peek_over(1, parser.space_formatting) ? if peek_tok.kind == .period { dotted_key, val := p.dotted_key_value() ? sub_table, key := p.sub_table_key(dotted_key) mut t := p.find_in_table(mut tbl, sub_table) ? unsafe { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @6 "$key" = $val into ${ptr_str(t)}') t[key.str()] = val } } else { key, val := p.key_value() ? tbl[key.str()] = val } } else { break } } } mut arr := []ast.Value{} arr << tbl util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed array of tables ${ast.Value(arr)}. leaving at "$p.tok.kind" "$p.tok.lit"') return arr } // double_array_of_tables parses next tokens into an array of tables of arrays of `ast.Value`s... pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables of arrays "$p.tok.kind" "$p.tok.lit"') dotted_key := p.dotted_key() ? p.ignore_while(parser.space_formatting) p.check(.rsbr) ? p.expect(.rsbr) ? p.ignore_while(parser.all_formatting) if dotted_key.len != 2 { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' nested array of tables does not support more than 2 levels. (excerpt): "...${p.excerpt()}..."') } first := DottedKey([dotted_key[0]]) // The array that holds the entries last := DottedKey([dotted_key[1]]) // The key the parsed array data should be added to mut t_arr := &[]ast.Value(0) mut t_map := ast.Value(ast.Null{}) unsafe { // NOTE this is starting to get EVEN uglier. TOML is not *at all* simple at this point... if first != p.last_aot { // Implicit allocation if p.last_aot.len == 0 { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'implicit allocation of array for dotted key `$dotted_key`.') p.last_aot = first // We register this implicit allocation as *explicit* to be able to catch // special cases like: // https://github.com/BurntSushi/toml-test/blob/576db852/tests/invalid/table/array-implicit.toml p.explicit_declared << first mut nm := map[string]ast.Value{} nm[last.str()] = []ast.Value{} table[first.str()] = ast.Value(nm) t_arr = &(nm[last.str()] as []ast.Value) t_arr << p.array_of_tables_contents() ? return } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' nested array of tables key "$first" does not match "$p.last_aot". (excerpt): "...${p.excerpt()}..."') } } t_arr = &(table[p.last_aot.str()] as []ast.Value) t_map = ast.Value(map[string]ast.Value{}) if p.last_aot_index < t_arr.len { t_map = t_arr[p.last_aot_index] } mut t := &(t_map as map[string]ast.Value) if val := t[last.str()] { if val is []ast.Value { arr := &(val as []ast.Value) arr << p.double_array_of_tables_contents(dotted_key) ? t[last.str()] = arr } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' t[$last.str()] is not an array. (excerpt): "...${p.excerpt()}..."') } } else { t[last.str()] = p.double_array_of_tables_contents(dotted_key) ? } if t_arr.len == 0 { t_arr << t p.last_aot_index = t_arr.len - 1 } } } // double_array_of_tables_contents parses next tokens into an array of `ast.Value`s. pub fn (mut p Parser) double_array_of_tables_contents(target_key DottedKey) ?[]ast.Value { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing contents from "$p.tok.kind" "$p.tok.lit"') mut tbl := map[string]ast.Value{} mut implicit_allocation_key := DottedKey([]string{}) mut peeked_over := 0 mut peek_tok := p.peek_tok for p.tok.kind != .eof { p.next() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind"') p.ignore_while(parser.all_formatting) // Peek forward as far as we can skipping over space formatting tokens. peek_tok, peeked_over = p.peek_over(1, parser.space_formatting) ? // Peek for occurrence of `[[` if peek_tok.kind == .lsbr { peek_tok, peeked_over = p.peek_over(peeked_over + 1, parser.space_formatting) ? if peek_tok.kind == .lsbr { mut arr := []ast.Value{} arr << tbl return arr } } match p.tok.kind { .bare, .quoted, .boolean, .number, .underscore { // Peek forward as far as we can skipping over space formatting tokens. peek_tok, _ = p.peek_over(1, parser.space_formatting) ? if peek_tok.kind == .period { mut dotted_key, val := p.dotted_key_value() ? if implicit_allocation_key.len > 0 { dotted_key.insert(0, implicit_allocation_key) } sub_table, key := p.sub_table_key(dotted_key) mut t := p.find_in_table(mut tbl, sub_table) ? unsafe { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @6 "$key" = $val into ${ptr_str(t)}') t[key.str()] = val } } else { key, val := p.key_value() ? mut t := unsafe { &tbl } if implicit_allocation_key.len > 0 { t = p.find_in_table(mut tbl, implicit_allocation_key) ? } unsafe { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @7 "$key" = $val into ${ptr_str(t)}') t[key.str()] = val } } } .lsbr { p.check(.lsbr) ? // '[' bracket peek_tok = p.peek_tok // Allow `[ d.e.f]` p.ignore_while(parser.space_formatting) // Peek forward as far as we can skipping over space formatting tokens. peek_tok, _ = p.peek_over(1, parser.space_formatting) ? if peek_tok.kind == .period { // Parse `[d.e.f]` p.ignore_while(parser.space_formatting) dotted_key := p.dotted_key() ? implicit_allocation_key = dotted_key if dotted_key.len > 2 { implicit_allocation_key = dotted_key[2..] } p.ignore_while(parser.space_formatting) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'keys are: dotted `$dotted_key`, target `$target_key`, implicit `$implicit_allocation_key` at "$p.tok.kind" "$p.tok.lit"') p.expect(.rsbr) ? p.peek_for_correct_line_ending_or_fail() ? continue } else { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' could not parse "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } } else { break } } } mut arr := []ast.Value{} arr << tbl util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed array of tables ${ast.Value(arr)}. leaving at "$p.tok.kind" "$p.tok.lit"') return arr } // array parses next tokens into an array of `ast.Value`s. pub fn (mut p Parser) array() ?[]ast.Value { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array...') mut arr := []ast.Value{} p.expect(.lsbr) ? // '[' bracket mut previous_token_was_value := false for p.tok.kind != .eof { p.next() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"') if previous_token_was_value { p.ignore_while(parser.all_formatting) if p.tok.kind != .rsbr && p.tok.kind != .hash { p.expect(.comma) ? } previous_token_was_value = false } match p.tok.kind { .boolean { arr << ast.Value(p.boolean() ?) previous_token_was_value = true } .comma { p.ignore_while_peek(parser.space_formatting) // Trailing commas before array close is allowed // so we do not do `if p.peek_tok.kind == .rsbr { ... }` // Check for known errors: if p.peek_tok.kind in [.comma, .bare] { p.next() ? // Forward to the peek_tok return error(@MOD + '.' + @STRUCT + '.' + @FN + ' unexpected "$p.tok.kind" "$p.tok.lit" at this (excerpt): "...${p.excerpt()}..."') } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comma table value seperator "$p.tok.lit"') continue } .eof { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' could not parse array. Reached EOF "$p.tok.kind" "$p.tok.lit" ("$p.tok.lit") in this (excerpt): "...${p.excerpt()}..."') } .hash { c := p.comment() p.ast_root.comments << c util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"') } .lcbr { p.ignore_while(parser.space_formatting) mut t := map[string]ast.Value{} p.inline_table(mut t) ? arr << ast.Value(t) previous_token_was_value = true } .number { val := p.number_or_date() ? arr << val previous_token_was_value = true } .quoted { arr << ast.Value(p.quoted()) previous_token_was_value = true } .lsbr { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array in array "$p.tok.kind" "$p.tok.lit"') arr << ast.Value(p.array() ?) previous_token_was_value = true } .rsbr { break } else { error(@MOD + '.' + @STRUCT + '.' + @FN + ' could not parse "$p.tok.kind" "$p.tok.lit" ("$p.tok.lit") in this (excerpt): "...${p.excerpt()}..."') } } } p.expect(.rsbr) ? // ']' bracket $if debug { flat := arr.str().replace('\n', r'\n') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed array: $flat . Currently @ token "$p.tok.kind"') } return arr } // comment returns an `ast.Comment` type. pub fn (mut p Parser) comment() ast.Comment { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed hash comment "#$p.tok.lit"') return ast.Comment{ text: p.tok.lit pos: p.tok.position() } } // key parse and returns an `ast.Key` type. // Keys are the token(s) appearing before an assignment operator (=). pub fn (mut p Parser) key() ?ast.Key { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key from "$p.tok.lit" ...') mut key := ast.Key(ast.Null{}) if p.tok.kind == .number { if p.peek_tok.kind == .minus { mut lits := p.tok.lit pos := p.tok.position() for p.peek_tok.kind != .assign && p.peek_tok.kind != .period && p.peek_tok.kind != .rsbr { p.next() ? if p.tok.kind !in parser.space_formatting { lits += p.tok.lit } } return ast.Key(ast.Bare{ text: lits pos: pos }) } // number := p.number() as ast.Number key = ast.Key(p.number()) } else { key = match p.tok.kind { .bare, .underscore { ast.Key(p.bare()) } .boolean { ast.Key(p.boolean() ?) } .quoted { ast.Key(p.quoted()) } else { ast.Key(ast.Null{}) } } } // NOTE kept for eased debugging // util.printdbg(@MOD +'.' + @STRUCT + '.' + @FN, 'parsed key "$p.tok.lit"') // panic(@MOD + '.' + @STRUCT + '.' + @FN + ' could not parse ${p.tok.kind} ("${p.tok.lit}") token \n$p.tok') // return ast.Key(ast.Bare{}) if key is ast.Null { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key expected .bare, .underscore, .number, .quoted or .boolean but got "$p.tok.kind"') } // A few small exceptions that can't easily be done via `checker` or `decoder` *after* the // main table has been build since information like `is_multiline` is lost when using the key.text as a // V `map` key directly. if key is ast.Quoted { if p.config.run_checks { quoted := key as ast.Quoted if quoted.is_multiline { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' multiline string as key is not allowed. (excerpt): "...${p.excerpt()}..."') } chckr := checker.Checker{ scanner: p.scanner } chckr.check_quoted(quoted) ? } if p.config.decode_values { mut quoted := key as ast.Quoted decoder.decode_quoted_escapes(mut quoted) ? key = ast.Key(quoted) } } return key } // key_value parse and returns a pair `ast.Key` and `ast.Value` type. // see also `key()` and `value()` pub fn (mut p Parser) key_value() ?(ast.Key, ast.Value) { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key value pair...') key := p.key() ? p.next() ? p.ignore_while(parser.space_formatting) p.check(.assign) ? // Assignment operator p.ignore_while(parser.space_formatting) value := p.value() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed key value pair. `$key = $value`') return key, value } // dotted_key_value parse and returns a pair `DottedKey` and `ast.Value` type. // see also `key()` and `value()` pub fn (mut p Parser) dotted_key_value() ?(DottedKey, ast.Value) { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing dotted key value pair...') p.ignore_while(parser.space_formatting) dotted_key := p.dotted_key() ? p.ignore_while(parser.space_formatting) p.check(.assign) ? p.ignore_while(parser.space_formatting) value := p.value() ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed dotted key value pair `$dotted_key = $value`...') return dotted_key, value } // value parse and returns an `ast.Value` type. // values are the token(s) appearing after an assignment operator (=). pub fn (mut p Parser) value() ?ast.Value { util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing value...') // println('parsed comment "${p.tok.lit}"') mut value := ast.Value(ast.Null{}) util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"') // mut value := ast.Value{} if p.tok.kind == .number { number_or_date := p.number_or_date() ? value = number_or_date } else { value = match p.tok.kind { .quoted { ast.Value(p.quoted()) } .boolean { ast.Value(p.boolean() ?) } .lsbr { ast.Value(p.array() ?) } .lcbr { p.ignore_while(parser.space_formatting) mut t := map[string]ast.Value{} p.inline_table(mut t) ? // table[key_str] = ast.Value(t) ast.Value(t) } else { ast.Value(ast.Null{}) } } if value is ast.Null { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' value expected .boolean, .quoted, .lsbr, .lcbr or .number got "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."') } } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed "$p.tok.kind" as value $value') return value } // number_or_date parse and returns an `ast.Value` type as // one of [`ast.Date`, `ast.Time`, `ast.DateTime`, `ast.Number`] pub fn (mut p Parser) number_or_date() ?ast.Value { // Handle Date/Time if p.peek_tok.kind == .minus || p.peek_tok.kind == .colon { date_time_type := p.date_time() ? match date_time_type { ast.Date { return ast.Value(date_time_type as ast.Date) } ast.Time { return ast.Value(date_time_type as ast.Time) } ast.DateTime { return ast.Value(date_time_type as ast.DateTime) } } } return ast.Value(p.number()) } // bare parse and returns an `ast.Bare` type. pub fn (mut p Parser) bare() ast.Bare { return ast.Bare{ text: p.tok.lit pos: p.tok.position() } } // quoted parse and returns an `ast.Quoted` type. pub fn (mut p Parser) quoted() ast.Quoted { // To get more info about the quote type and enable better checking, // the scanner is returning the literal *with* single- or double-quotes. mut quote := p.tok.lit[0] is_multiline := p.tok.lit.len >= 6 && p.tok.lit[1] == quote && p.tok.lit[2] == quote mut lit := p.tok.lit[1..p.tok.lit.len - 1] if is_multiline { lit = p.tok.lit[3..p.tok.lit.len - 3] // From https://toml.io/en/v1.0.0#string // "Multi-line literal strings [...] A newline immediately following the opening // delimiter will be trimmed. All other content between the delimiters // is interpreted as-is without modification." if lit.len > 0 && lit[0] == `\n` { lit = lit[1..] } } return ast.Quoted{ text: lit pos: p.tok.position() quote: quote is_multiline: is_multiline } } // boolean parse and returns an `ast.Bool` type. pub fn (mut p Parser) boolean() ?ast.Bool { if p.tok.lit !in ['true', 'false'] { return error(@MOD + '.' + @STRUCT + '.' + @FN + ' expected literal to be either `true` or `false` got "$p.tok.kind"') } return ast.Bool{ text: p.tok.lit pos: p.tok.position() } } // number parse and returns an `ast.Number` type. pub fn (mut p Parser) number() ast.Number { return ast.Number{ text: p.tok.lit pos: p.tok.position() } } // date_time parses dates and time in RFC 3339 format. // https://datatracker.ietf.org/doc/html/rfc3339 pub fn (mut p Parser) date_time() ?ast.DateTimeType { // Date and/or Time mut lit := '' pos := p.tok.position() mut date := ast.Date{} mut time := ast.Time{} if p.peek_tok.kind == .minus { date = p.date() ? lit += date.text // Look for any THH:MM:SS or HH:MM:SS if (p.peek_tok.kind == .bare && (p.peek_tok.lit.starts_with('T') || p.peek_tok.lit.starts_with('t'))) || p.peek_tok.kind == .whitespace { p.next() ? // Advance to token with Txx or whitespace special case if p.tok.lit.starts_with('T') || p.tok.lit.starts_with('t') { lit += p.tok.lit[0].ascii_str() //'T' or 't' } else { lit += p.tok.lit p.next() ? } time = p.time() ? lit += time.text util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed date-time: "$lit"') return ast.DateTime{ text: lit pos: pos date: date time: time } } } else if p.peek_tok.kind == .colon { time = p.time() ? return time } return ast.Date{ text: lit pos: pos } } // date parse and returns an `ast.Date` type. pub fn (mut p Parser) date() ?ast.Date { // Date mut lit := p.tok.lit pos := p.tok.position() p.check(.number) ? lit += p.tok.lit p.check(.minus) ? lit += p.tok.lit p.check(.number) ? lit += p.tok.lit p.check(.minus) ? lit += p.tok.lit p.expect(.number) ? util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed date: "$lit"') return ast.Date{ text: lit pos: pos } } // time parse and returns an `ast.Time` type. pub fn (mut p Parser) time() ?ast.Time { // Time mut lit := p.tok.lit pos := p.tok.position() if p.is_at(.bare) && (lit.starts_with('T') || lit.starts_with('t')) { if p.tok.lit.starts_with('T') { lit = lit.all_after('T') } else if p.tok.lit.starts_with('t') { lit = lit.all_after('t') } p.next() ? } else { p.check(.number) ? } lit += p.tok.lit p.check(.colon) ? lit += p.tok.lit p.check(.number) ? lit += p.tok.lit // TODO does TOML even have optional seconds? // if p.peek_tok.kind == .colon { p.check(.colon) ? lit += p.tok.lit p.expect(.number) ? //} // Optional milliseconds if p.peek_tok.kind == .period { p.next() ? lit += p.tok.lit // lit += '.' p.check(.period) ? lit += p.tok.lit p.expect(.number) ? } // Parse offset if p.peek_tok.kind == .minus || p.peek_tok.kind == .plus { p.next() ? lit += p.tok.lit // lit += '-' p.check_one_of([.minus, .plus]) ? lit += p.tok.lit p.check(.number) ? lit += p.tok.lit p.check(.colon) ? lit += p.tok.lit p.expect(.number) ? } else if p.peek_tok.kind == .bare && (p.peek_tok.lit == 'Z' || p.peek_tok.lit == 'z') { p.next() ? lit += p.tok.lit p.expect(.bare) ? } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed time: "$lit"') return ast.Time{ text: lit pos: pos } } // eof returns an `ast.EOF` type. pub fn (mut p Parser) eof() ast.EOF { return ast.EOF{ pos: p.tok.position() } }