From a894a6cf360d148f68230f268bd30f9767c968d4 Mon Sep 17 00:00:00 2001 From: Larpon Date: Fri, 19 Nov 2021 19:34:21 +0100 Subject: [PATCH] toml: fix parsing of alphanumeric keys (#12517) --- vlib/toml/parser/parser.v | 30 ++++++++++++--------- vlib/toml/scanner/scanner.v | 7 ++++- vlib/toml/tests/burntsushi.toml-test_test.v | 2 -- vlib/toml/tests/key_test.v | 27 +++++++++++++++++++ vlib/toml/tests/testdata/key_test.out | 1 + vlib/toml/tests/testdata/key_test.toml | 9 +++++++ 6 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 vlib/toml/tests/key_test.v create mode 100644 vlib/toml/tests/testdata/key_test.out create mode 100644 vlib/toml/tests/testdata/key_test.toml diff --git a/vlib/toml/parser/parser.v b/vlib/toml/parser/parser.v index 5e1c771cdf..51bec38469 100644 --- a/vlib/toml/parser/parser.v +++ b/vlib/toml/parser/parser.v @@ -10,8 +10,10 @@ import toml.token import toml.scanner pub const ( - all_formatting = [token.Kind.whitespace, .tab, .cr, .nl] - space_formatting = [token.Kind.whitespace, .tab] + all_formatting = [token.Kind.whitespace, .tab, .cr, .nl] + space_formatting = [token.Kind.whitespace, .tab] + keys_and_space_formatting = [token.Kind.whitespace, .tab, .minus, .bare, .quoted, .boolean, + .number, .underscore] ) type DottedKey = []string @@ -117,11 +119,13 @@ fn (mut p Parser) next() ? { p.prev_tok = p.tok p.tok = p.peek_tok if p.tokens.len > 0 { - p.peek_tok = p.tokens.pop() + p.peek_tok = p.tokens.first() + p.tokens.delete(0) p.peek(1) ? } else { p.peek(1) ? - p.peek_tok = p.tokens.pop() + p.peek_tok = p.tokens.first() + p.tokens.delete(0) } } @@ -402,10 +406,8 @@ pub fn (mut p Parser) root_table() ? { continue } .bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys - mut peek_tok := p.peek_tok - // Peek forward as far as we can skipping over space formatting tokens. - peek_tok, _ = p.peek_over(1, parser.space_formatting) ? + peek_tok, _ := p.peek_over(1, parser.keys_and_space_formatting) ? if peek_tok.kind == .period { p.ignore_while(parser.space_formatting) @@ -482,7 +484,7 @@ pub fn (mut p Parser) root_table() ? { p.ignore_while(parser.space_formatting) // Peek forward as far as we can skipping over space formatting tokens. - peek_tok, _ = p.peek_over(1, parser.space_formatting) ? + peek_tok, _ = p.peek_over(1, parser.keys_and_space_formatting) ? if p.tok.kind == .lsbr { // Parse `[[table]]` @@ -690,7 +692,7 @@ pub fn (mut p Parser) array_of_tables_contents() ?[]ast.Value { p.ignore_while(parser.all_formatting) match p.tok.kind { - .bare, .quoted, .boolean, .number { + .bare, .quoted, .boolean, .number, .underscore { if p.peek_tok.kind == .period { dotted_key := p.dotted_key() ? p.ignore_while(parser.space_formatting) @@ -829,7 +831,7 @@ pub fn (mut p Parser) double_array_of_tables_contents(target_key DottedKey) ?[]a } match p.tok.kind { - .bare, .quoted, .boolean, .number { + .bare, .quoted, .boolean, .number, .underscore { if p.peek_tok.kind == .period { mut dotted_key := p.dotted_key() ? p.ignore_while(parser.space_formatting) @@ -1001,9 +1003,11 @@ pub fn (mut p Parser) key() ?ast.Key { if p.peek_tok.kind == .minus { mut lits := p.tok.lit pos := p.tok.position() - for p.peek_tok.kind != .assign { + for p.peek_tok.kind != .assign && p.peek_tok.kind != .period && p.peek_tok.kind != .rsbr { p.next() ? - lits += p.tok.lit + if p.tok.kind !in parser.space_formatting { + lits += p.tok.lit + } } return ast.Key(ast.Bare{ text: lits @@ -1036,7 +1040,7 @@ pub fn (mut p Parser) key() ?ast.Key { if key is ast.Null { return error(@MOD + '.' + @STRUCT + '.' + @FN + - ' key expected .bare, .number, .quoted or .boolean but got "$p.tok.kind"') + ' key expected .bare, .underscore, .number, .quoted or .boolean but got "$p.tok.kind"') } // A small exception that can't easily be done via `checker` diff --git a/vlib/toml/scanner/scanner.v b/vlib/toml/scanner/scanner.v index f1aa696566..ce35869dc5 100644 --- a/vlib/toml/scanner/scanner.v +++ b/vlib/toml/scanner/scanner.v @@ -25,6 +25,8 @@ mut: line_nr int = 1 // current line number (y coordinate) pos int // current flat/index position in the `text` field header_len int // Length, how many bytes of header was found + // Quirks + is_left_of_assign bool = true // indicates if the scanner is on the *left* side of an assignment } // State is a read-only copy of the scanner's internal state. @@ -165,6 +167,7 @@ pub fn (mut s Scanner) scan() ?token.Token { return s.new_token(.plus, ascii, ascii.len) } `=` { + s.is_left_of_assign = false util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified assignment "$ascii" ($ascii.len)') return s.new_token(.assign, ascii, ascii.len) } @@ -345,6 +348,7 @@ fn (mut s Scanner) ignore_line() ?string { fn (mut s Scanner) inc_line_number() { s.col = 0 s.line_nr++ + s.is_left_of_assign = true } // extract_key parses and returns a TOML key as a string. @@ -568,7 +572,8 @@ fn (mut s Scanner) extract_number() ?string { s.col += 2 } c = s.at() - if !(byte(c).is_hex_digit() || c in scanner.digit_extras) { + if !(byte(c).is_hex_digit() || c in scanner.digit_extras) + || (c == `.` && s.is_left_of_assign) { break } s.pos++ diff --git a/vlib/toml/tests/burntsushi.toml-test_test.v b/vlib/toml/tests/burntsushi.toml-test_test.v index 4a0dcf1033..73831231d4 100644 --- a/vlib/toml/tests/burntsushi.toml-test_test.v +++ b/vlib/toml/tests/burntsushi.toml-test_test.v @@ -35,8 +35,6 @@ const ( // Inline-table 'inline-table/multiline.toml', // Key - 'key/numeric-dotted.toml', - 'key/alphanum.toml', 'key/escapes.toml', ] diff --git a/vlib/toml/tests/key_test.v b/vlib/toml/tests/key_test.v new file mode 100644 index 0000000000..d624c4d4fd --- /dev/null +++ b/vlib/toml/tests/key_test.v @@ -0,0 +1,27 @@ +import os +import toml +import toml.to + +fn test_keys() { + toml_file := + os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + + '.toml' + toml_doc := toml.parse(toml_file) or { panic(err) } + + mut value := toml_doc.value('34-11') + assert value.int() == 23 + + value = toml_doc.value('1.2') + assert value.int() == 3 + + value = toml_doc.value('34-12.2') + assert value.int() == 42 + + toml_json := to.json(toml_doc) + out_file := + os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) + + '.out' + out_file_json := os.read_file(out_file) or { panic(err) } + println(toml_json) + assert toml_json == out_file_json +} diff --git a/vlib/toml/tests/testdata/key_test.out b/vlib/toml/tests/testdata/key_test.out new file mode 100644 index 0000000000..0df512f8e6 --- /dev/null +++ b/vlib/toml/tests/testdata/key_test.out @@ -0,0 +1 @@ +{ "34-11": 23, "1": { "2": 3 }, "34-12": { "2": 42 }, "34-20": { }, "5": { "6": { } }, "34-13": { "2": { } } } \ No newline at end of file diff --git a/vlib/toml/tests/testdata/key_test.toml b/vlib/toml/tests/testdata/key_test.toml new file mode 100644 index 0000000000..0c88fe1584 --- /dev/null +++ b/vlib/toml/tests/testdata/key_test.toml @@ -0,0 +1,9 @@ +34-11 = 23 # came out as "34-11 " = 23 +1.2 = 3 # came out as "1.2" = 3 +34-12.2 = 42 # came out as "34-12.2" = 42 + +[34-20] + +[5.6] + +[34-13.2]