toml: fix parsing of alphanumeric keys (#12517)

pull/12546/head
Larpon 2021-11-19 19:34:21 +01:00 committed by GitHub
parent eec8788333
commit a894a6cf36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 16 deletions

View File

@ -12,6 +12,8 @@ import toml.scanner
pub const ( pub const (
all_formatting = [token.Kind.whitespace, .tab, .cr, .nl] all_formatting = [token.Kind.whitespace, .tab, .cr, .nl]
space_formatting = [token.Kind.whitespace, .tab] space_formatting = [token.Kind.whitespace, .tab]
keys_and_space_formatting = [token.Kind.whitespace, .tab, .minus, .bare, .quoted, .boolean,
.number, .underscore]
) )
type DottedKey = []string type DottedKey = []string
@ -117,11 +119,13 @@ fn (mut p Parser) next() ? {
p.prev_tok = p.tok p.prev_tok = p.tok
p.tok = p.peek_tok p.tok = p.peek_tok
if p.tokens.len > 0 { if p.tokens.len > 0 {
p.peek_tok = p.tokens.pop() p.peek_tok = p.tokens.first()
p.tokens.delete(0)
p.peek(1) ? p.peek(1) ?
} else { } else {
p.peek(1) ? p.peek(1) ?
p.peek_tok = p.tokens.pop() p.peek_tok = p.tokens.first()
p.tokens.delete(0)
} }
} }
@ -402,10 +406,8 @@ pub fn (mut p Parser) root_table() ? {
continue continue
} }
.bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys .bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys
mut peek_tok := p.peek_tok
// Peek forward as far as we can skipping over space formatting tokens. // Peek forward as far as we can skipping over space formatting tokens.
peek_tok, _ = p.peek_over(1, parser.space_formatting) ? peek_tok, _ := p.peek_over(1, parser.keys_and_space_formatting) ?
if peek_tok.kind == .period { if peek_tok.kind == .period {
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
@ -482,7 +484,7 @@ pub fn (mut p Parser) root_table() ? {
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
// Peek forward as far as we can skipping over space formatting tokens. // Peek forward as far as we can skipping over space formatting tokens.
peek_tok, _ = p.peek_over(1, parser.space_formatting) ? peek_tok, _ = p.peek_over(1, parser.keys_and_space_formatting) ?
if p.tok.kind == .lsbr { if p.tok.kind == .lsbr {
// Parse `[[table]]` // Parse `[[table]]`
@ -690,7 +692,7 @@ pub fn (mut p Parser) array_of_tables_contents() ?[]ast.Value {
p.ignore_while(parser.all_formatting) p.ignore_while(parser.all_formatting)
match p.tok.kind { match p.tok.kind {
.bare, .quoted, .boolean, .number { .bare, .quoted, .boolean, .number, .underscore {
if p.peek_tok.kind == .period { if p.peek_tok.kind == .period {
dotted_key := p.dotted_key() ? dotted_key := p.dotted_key() ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
@ -829,7 +831,7 @@ pub fn (mut p Parser) double_array_of_tables_contents(target_key DottedKey) ?[]a
} }
match p.tok.kind { match p.tok.kind {
.bare, .quoted, .boolean, .number { .bare, .quoted, .boolean, .number, .underscore {
if p.peek_tok.kind == .period { if p.peek_tok.kind == .period {
mut dotted_key := p.dotted_key() ? mut dotted_key := p.dotted_key() ?
p.ignore_while(parser.space_formatting) p.ignore_while(parser.space_formatting)
@ -1001,10 +1003,12 @@ pub fn (mut p Parser) key() ?ast.Key {
if p.peek_tok.kind == .minus { if p.peek_tok.kind == .minus {
mut lits := p.tok.lit mut lits := p.tok.lit
pos := p.tok.position() pos := p.tok.position()
for p.peek_tok.kind != .assign { for p.peek_tok.kind != .assign && p.peek_tok.kind != .period && p.peek_tok.kind != .rsbr {
p.next() ? p.next() ?
if p.tok.kind !in parser.space_formatting {
lits += p.tok.lit lits += p.tok.lit
} }
}
return ast.Key(ast.Bare{ return ast.Key(ast.Bare{
text: lits text: lits
pos: pos pos: pos
@ -1036,7 +1040,7 @@ pub fn (mut p Parser) key() ?ast.Key {
if key is ast.Null { if key is ast.Null {
return error(@MOD + '.' + @STRUCT + '.' + @FN + return error(@MOD + '.' + @STRUCT + '.' + @FN +
' key expected .bare, .number, .quoted or .boolean but got "$p.tok.kind"') ' key expected .bare, .underscore, .number, .quoted or .boolean but got "$p.tok.kind"')
} }
// A small exception that can't easily be done via `checker` // A small exception that can't easily be done via `checker`

View File

@ -25,6 +25,8 @@ mut:
line_nr int = 1 // current line number (y coordinate) line_nr int = 1 // current line number (y coordinate)
pos int // current flat/index position in the `text` field pos int // current flat/index position in the `text` field
header_len int // Length, how many bytes of header was found header_len int // Length, how many bytes of header was found
// Quirks
is_left_of_assign bool = true // indicates if the scanner is on the *left* side of an assignment
} }
// State is a read-only copy of the scanner's internal state. // State is a read-only copy of the scanner's internal state.
@ -165,6 +167,7 @@ pub fn (mut s Scanner) scan() ?token.Token {
return s.new_token(.plus, ascii, ascii.len) return s.new_token(.plus, ascii, ascii.len)
} }
`=` { `=` {
s.is_left_of_assign = false
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified assignment "$ascii" ($ascii.len)') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified assignment "$ascii" ($ascii.len)')
return s.new_token(.assign, ascii, ascii.len) return s.new_token(.assign, ascii, ascii.len)
} }
@ -345,6 +348,7 @@ fn (mut s Scanner) ignore_line() ?string {
fn (mut s Scanner) inc_line_number() { fn (mut s Scanner) inc_line_number() {
s.col = 0 s.col = 0
s.line_nr++ s.line_nr++
s.is_left_of_assign = true
} }
// extract_key parses and returns a TOML key as a string. // extract_key parses and returns a TOML key as a string.
@ -568,7 +572,8 @@ fn (mut s Scanner) extract_number() ?string {
s.col += 2 s.col += 2
} }
c = s.at() c = s.at()
if !(byte(c).is_hex_digit() || c in scanner.digit_extras) { if !(byte(c).is_hex_digit() || c in scanner.digit_extras)
|| (c == `.` && s.is_left_of_assign) {
break break
} }
s.pos++ s.pos++

View File

@ -35,8 +35,6 @@ const (
// Inline-table // Inline-table
'inline-table/multiline.toml', 'inline-table/multiline.toml',
// Key // Key
'key/numeric-dotted.toml',
'key/alphanum.toml',
'key/escapes.toml', 'key/escapes.toml',
] ]

View File

@ -0,0 +1,27 @@
import os
import toml
import toml.to
fn test_keys() {
toml_file :=
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.toml'
toml_doc := toml.parse(toml_file) or { panic(err) }
mut value := toml_doc.value('34-11')
assert value.int() == 23
value = toml_doc.value('1.2')
assert value.int() == 3
value = toml_doc.value('34-12.2')
assert value.int() == 42
toml_json := to.json(toml_doc)
out_file :=
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.out'
out_file_json := os.read_file(out_file) or { panic(err) }
println(toml_json)
assert toml_json == out_file_json
}

View File

@ -0,0 +1 @@
{ "34-11": 23, "1": { "2": 3 }, "34-12": { "2": 42 }, "34-20": { }, "5": { "6": { } }, "34-13": { "2": { } } }

View File

@ -0,0 +1,9 @@
34-11 = 23 # came out as "34-11 " = 23
1.2 = 3 # came out as "1.2" = 3
34-12.2 = 42 # came out as "34-12.2" = 42
[34-20]
[5.6]
[34-13.2]