vlib: add toml module + tests (#11964)

pull/11973/head
Larpon 2021-09-24 20:13:52 +02:00 committed by GitHub
parent 834cf40ab2
commit 5541ec8670
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 3459 additions and 0 deletions

27
.github/workflows/toml_ci.yml vendored 100644
View File

@ -0,0 +1,27 @@
name: toml CI
on:
push:
paths-ignore:
- "**.md"
pull_request:
paths-ignore:
- "**.md"
jobs:
toml-module-pass-external-test-suites:
runs-on: ubuntu-20.04
timeout-minutes: 121
steps:
- uses: actions/checkout@v2
- name: Build V
run: make -j2 && ./v -cc gcc -o v cmd/v
- name: Clone BurntSushi/toml-test
run: |
cd vlib/toml/tests/testdata
git clone --depth 1 https://github.com/BurntSushi/toml-test.git burntsushi/toml-test
- name: Run tests
run: ./v -stats test vlib/toml

48
examples/toml.v 100644
View File

@ -0,0 +1,48 @@
import toml
// Complete text from the example in the README.md:
// https://github.com/toml-lang/toml/blob/3b11f6921da7b6f5db37af039aa021fee450c091/README.md#Example
const toml_text = '# This is a TOML document.
title = "TOML Example"
[owner]
name = "Tom Preston-Werner"
dob = 1979-05-27T07:32:00-08:00 # First class dates
[database]
server = "192.168.1.1"
ports = [ 8000, 8001, 8002 ]
connection_max = 5000
enabled = true
[servers]
# Indentation (tabs and/or spaces) is allowed but not required
[servers.alpha]
ip = "10.0.0.1"
dc = "eqdc10"
[servers.beta]
ip = "10.0.0.2"
dc = "eqdc10"
[clients]
data = [ ["gamma", "delta"], [1, 2] ]
# Line breaks are OK when inside arrays
hosts = [
"alpha",
"omega"
]'
fn main() {
doc := toml.parse(toml_text) or { panic(err) }
title := doc.value('title').string()
println('title: "$title"')
ip := doc.value('servers.alpha.ip').string()
println('Server IP: "$ip"')
toml_json := doc.to_json()
println(toml_json)
}

View File

@ -43,6 +43,61 @@ pub fn parse_rfc2822(s string) ?Time {
}
}
// ----- rfc3339 -----
const (
err_invalid_3339 = 'Invalid 3339 format'
)
// parse_rfc3339 returns time from a date string in RFC 3339 datetime format.
pub fn parse_rfc3339(s string) ?Time {
if s == '' {
return error(time.err_invalid_3339 + ' cannot parse empty string')
}
mut t := parse_iso8601(s) or { Time{} }
// If parse_iso8601 DID NOT result in default values (i.e. date was parsed correctly)
if t != Time{} {
return t
}
t_i := s.index('T') or { -1 }
parts := if t_i != -1 { [s[..t_i], s[t_i + 1..]] } else { s.split(' ') }
// Check if s is date only
if !parts[0].contains_any(' Z') && parts[0].contains('-') {
year, month, day := parse_iso8601_date(s) ?
t = new_time(Time{
year: year
month: month
day: day
})
return t
}
// Check if s is time only
if !parts[0].contains('-') && parts[0].contains(':') {
mut hour_, mut minute_, mut second_, mut microsecond_, mut unix_offset, mut is_local_time := 0, 0, 0, 0, i64(0), true
hour_, minute_, second_, microsecond_, unix_offset, is_local_time = parse_iso8601_time(parts[0]) ?
t = new_time(Time{
hour: hour_
minute: minute_
second: second_
microsecond: microsecond_
})
if is_local_time {
return t // Time is already local time
}
mut unix_time := t.unix
if unix_offset < 0 {
unix_time -= (-unix_offset)
} else if unix_offset > 0 {
unix_time += unix_offset
}
t = unix2(i64(unix_time), t.microsecond)
return t
}
return error(time.err_invalid_3339 + '. Could not parse "$s"')
}
// ----- iso8601 -----
const (
err_invalid_8601 = 'Invalid 8601 Format'

View File

@ -0,0 +1,55 @@
# TOML module
`toml` is a fully fledged TOML v1.0.0 compatible parser written in pure V.
## Usage
```v
import toml
// Complete text from the example in the README.md:
// https://github.com/toml-lang/toml/blob/3b11f6921da7b6f5db37af039aa021fee450c091/README.md#Example
const toml_text = '# This is a TOML document.
title = "TOML Example"
[owner]
name = "Tom Preston-Werner"
dob = 1979-05-27T07:32:00-08:00 # First class dates
[database]
server = "192.168.1.1"
ports = [ 8000, 8001, 8002 ]
connection_max = 5000
enabled = true
[servers]
# Indentation (tabs and/or spaces) is allowed but not required
[servers.alpha]
ip = "10.0.0.1"
dc = "eqdc10"
[servers.beta]
ip = "10.0.0.2"
dc = "eqdc10"
[clients]
data = [ ["gamma", "delta"], [1, 2] ]
# Line breaks are OK when inside arrays
hosts = [
"alpha",
"omega"
]'
fn main() {
doc := toml.parse(toml_text) or { panic(err) }
title := doc.value('title').string()
println('title: "$title"')
ip := doc.value('servers.alpha.ip').string()
println('Server IP: "$ip"')
toml_json := doc.to_json()
println(toml_json)
}
```

213
vlib/toml/any.v 100644
View File

@ -0,0 +1,213 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module toml
import time
// Pretty much all json2 types plus time.Time
pub type Any = Null
| []Any
| bool
| f32
| f64
| i64
| int
| map[string]Any
| string
| time.Time
| u64
// string returns `Any` as a string.
pub fn (a Any) string() string {
match a {
string { return a as string }
time.Time { return a.format_ss_micro() }
else { return a.str() }
}
}
// int returns `Any` as an 32-bit integer.
pub fn (a Any) int() int {
match a {
int { return a }
i64, f32, f64, bool { return int(a) }
// time.Time { return int(0) } // TODO
else { return 0 }
}
}
// i64 returns `Any` as a 64-bit integer.
pub fn (a Any) i64() i64 {
match a {
i64 { return a }
int, f32, f64, bool { return i64(a) }
// time.Time { return i64(0) } // TODO
else { return 0 }
}
}
// u64 returns `Any` as a 64-bit unsigned integer.
pub fn (a Any) u64() u64 {
match a {
u64 { return a }
int, i64, f32, f64, bool { return u64(a) }
// time.Time { return u64(0) } // TODO
else { return 0 }
}
}
// f32 returns `Any` as a 32-bit float.
pub fn (a Any) f32() f32 {
match a {
f32 { return a }
int, i64, f64 { return f32(a) }
// time.Time { return f32(0) } // TODO
else { return 0.0 }
}
}
// f64 returns `Any` as a 64-bit float.
pub fn (a Any) f64() f64 {
match a {
f64 { return a }
int, i64, f32 { return f64(a) }
// time.Time { return f64(0) } // TODO
else { return 0.0 }
}
}
// array returns `Any` as an array.
pub fn (a Any) array() []Any {
if a is []Any {
return a
} else if a is map[string]Any {
mut arr := []Any{}
for _, v in a {
arr << v
}
return arr
}
return [a]
}
// as_map returns `Any` as a map (TOML table).
pub fn (a Any) as_map() map[string]Any {
if a is map[string]Any {
return a
} else if a is []Any {
mut mp := map[string]Any{}
for i, fi in a {
mp['$i'] = fi
}
return mp
}
return {
'0': a
}
}
// bool returns `Any` as a boolean.
pub fn (a Any) bool() bool {
match a {
bool { return a }
string { return a.bool() }
else { return false }
}
}
// date returns `Any` as a date encoded in a `time.Time` struct.
pub fn (a Any) date() time.Time {
mut time := time.Time{}
match a {
// string { } // TODO
time.Time { return a }
else { return time }
}
}
// date returns `Any` as a time encoded in a `time.Time` struct.
pub fn (a Any) time() time.Time {
mut time := time.Time{}
match a {
// string { } // TODO
time.Time { return a }
else { return time }
}
}
// date returns `Any` as a date+time encoded in a `time.Time` struct.
pub fn (a Any) datetime() time.Time {
mut time := time.Time{}
match a {
// string { } // TODO
time.Time { return a }
else { return time }
}
}
pub fn (m map[string]Any) value(key string) ?Any {
// return m[key] ?
key_split := key.split('.')
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' getting "${key_split[0]}"')
if key_split[0] in m.keys() {
value := m[key_split[0]] or {
return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key" does not exist')
}
// `match` isn't currently very suitable for these types of sum type constructs...
if value is map[string]Any {
nm := (value as map[string]Any)
next_key := key_split[1..].join('.')
if next_key == '' {
return value
}
return nm.value(next_key)
}
return value
}
return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key" does not exist')
}
pub fn (a []Any) as_strings() []string {
mut sa := []string{}
for any in a {
sa << any.string()
}
return sa
}
// to_json returns `Any` as a JSON encoded string.
pub fn (a Any) to_json() string {
match a {
Null {
return 'null'
}
string {
return '"$a.str()"'
}
bool, f32, f64, i64, int, u64 {
return a.str()
}
map[string]Any {
mut str := '{'
for key, val in a {
str += ' "$key": $val.to_json(),'
}
str = str.trim_right(',')
str += ' }'
return str
}
[]Any {
mut str := '['
for val in a {
str += ' $val.to_json(),'
}
str = str.trim_right(',')
str += ' ]'
return str
}
time.Time {
return '"$a.format_ss_micro()"'
}
}
}

View File

@ -0,0 +1,28 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module ast
import toml.input
// Root represents the root structure of any parsed TOML text snippet or file.
[heap]
pub struct Root {
pub:
input input.Config // User input configuration
pub mut:
table Node
// errors []errors.Error // all the checker errors in the file
}
pub fn (r Root) str() string {
mut s := typeof(r).name + '{\n'
s += ' input: $r.input\n'
s += ' table: $r.table\n'
s += '}'
return s
}
pub fn (r Root) to_json() string {
return r.table.to_json()
}

View File

@ -0,0 +1,241 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module ast
import toml.token
// Key is a sumtype representing all types of keys that
// can be found in a TOML document.
pub type Key = Bare | Bool | Null | Number | Quoted
pub fn (k Key) str() string {
return k.text
}
// Node is a sumtype representing all possible value types
// found in a TOML document.
pub type Node = Bool | Date | DateTime | Null | Number | Quoted | Time | []Node | map[string]Node
pub fn (v Node) to_json() string {
match v {
Quoted, Date, DateTime, Time {
return '"$v.text"'
}
Bool, Null, Number {
return v.text
}
map[string]Node {
mut str := '{'
for key, val in v {
str += ' "$key": $val.to_json(),'
}
str = str.trim_right(',')
str += ' }'
return str
}
[]Node {
mut str := '['
for val in v {
str += ' $val.to_json(),'
}
str = str.trim_right(',')
str += ' ]'
return str
}
}
}
// DateTimeType is a sumtype representing all possible date types
// found in a TOML document.
pub type DateTimeType = Date | DateTime | Time
pub fn (dtt DateTimeType) str() string {
return dtt.text
}
// value queries a value from the map.
// `key` should be in "dotted" form e.g.: `"a.b.c.d"`
pub fn (v map[string]Node) value(key string) &Node {
null := &Node(Null{})
key_split := key.split('.')
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' retreiving value at "$key"')
if key_split[0] in v.keys() {
value := v[key_split[0]] or {
return null
// TODO return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key" does not exist')
}
// `match` isn't currently very suitable for these types of sum type constructs...
if value is map[string]Node {
m := (value as map[string]Node)
next_key := key_split[1..].join('.')
if next_key == '' {
return &value
}
return m.value(next_key)
}
return &value
}
return null
// TODO return error(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key" does not exist')
}
// value queries a value from the map.
pub fn (v map[string]Node) exists(key string) bool {
key_split := key.split('.')
if key_split[0] in v.keys() {
value := v[key_split[0]] or { return false }
// `match` isn't currently very suitable for these types of sum type constructs...
if value is map[string]Node {
m := (value as map[string]Node)
next_key := key_split[1..].join('.')
if next_key == '' {
return true
}
return m.exists(next_key)
}
return true
}
return false
}
pub struct Comment {
pub:
text string
pos token.Position
}
pub fn (c Comment) str() string {
mut s := typeof(c).name + '{\n'
s += ' text: \'$c.text\'\n'
s += ' pos: $c.pos\n'
s += '}'
return s
}
// Null is used in sumtype checks as a "default" value when nothing else is possible.
pub struct Null {
pub:
text string
pos token.Position
}
pub fn (n Null) str() string {
return n.text
}
pub struct Quoted {
pub:
text string
pos token.Position
}
pub fn (q Quoted) str() string {
mut str := typeof(q).name + '{\n'
str += ' text: \'$q.text\'\n'
str += ' pos: $q.pos\n'
str += '}'
return str
}
pub struct Bare {
pub:
text string
pos token.Position
}
pub fn (b Bare) str() string {
mut str := typeof(b).name + '{\n'
str += ' text: \'$b.text\'\n'
str += ' pos: $b.pos\n'
str += '}'
return str
}
pub struct Bool {
pub:
text string
pos token.Position
}
pub fn (b Bool) str() string {
mut str := typeof(b).name + '{\n'
str += ' text: \'$b.text\'\n'
str += ' pos: $b.pos\n'
str += '}'
return str
}
pub struct Number {
pub:
text string
pos token.Position
}
pub fn (n Number) str() string {
mut str := typeof(n).name + '{\n'
str += ' text: \'$n.text\'\n'
str += ' pos: $n.pos\n'
str += '}'
return str
}
pub struct Date {
pub:
text string
pos token.Position
}
pub fn (d Date) str() string {
mut str := typeof(d).name + '{\n'
str += ' text: \'$d.text\'\n'
str += ' pos: $d.pos\n'
str += '}'
return str
}
pub struct Time {
pub:
text string
offset int
pos token.Position
}
pub fn (t Time) str() string {
mut str := typeof(t).name + '{\n'
str += ' text: \'$t.text\'\n'
str += ' offset: \'$t.offset\'\n'
str += ' pos: $t.pos\n'
str += '}'
return str
}
pub struct DateTime {
pub:
text string
pos token.Position
date Date
time Time
}
pub fn (dt DateTime) str() string {
mut str := typeof(dt).name + '{\n'
str += ' text: \'$dt.text\'\n'
str += ' date: \'$dt.date\'\n'
str += ' time: \'$dt.time\'\n'
str += ' pos: $dt.pos\n'
str += '}'
return str
}
pub struct EOF {
pub:
pos token.Position
}
pub fn (e EOF) str() string {
mut str := typeof(e).name + '{\n'
str += ' pos: $e.pos\n'
str += '}'
return str
}

View File

@ -0,0 +1,37 @@
module walker
import toml.ast
// Visitor defines a visit method which is invoked by the walker in each node it encounters.
pub interface Visitor {
visit(node &ast.Node) ?
}
pub type InspectorFn = fn (node &ast.Node, data voidptr) ?
struct Inspector {
inspector_callback InspectorFn
mut:
data voidptr
}
pub fn (i &Inspector) visit(node &ast.Node) ? {
i.inspector_callback(node, i.data) or { return err }
}
// inspect traverses and checks the AST node on a depth-first order and based on the data given
pub fn inspect(node &ast.Node, data voidptr, inspector_callback InspectorFn) ? {
walk(Inspector{inspector_callback, data}, node) ?
}
// walk traverses the AST using the given visitor
pub fn walk(visitor Visitor, node &ast.Node) ? {
if node is map[string]ast.Node {
n := node as map[string]ast.Node
for _, nn in n {
walk(visitor, &nn) ?
}
} else {
visitor.visit(node) ?
}
}

View File

@ -0,0 +1,168 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module checker
import toml.ast
import toml.ast.walker
// import toml.util
import toml.token
import toml.scanner
// Checker checks a tree of TOML `ast.Node`'s for common errors.
pub struct Checker {
scanner &scanner.Scanner
}
pub fn (c Checker) check(n &ast.Node) ? {
walker.walk(c, n) ?
}
fn (c Checker) visit(node &ast.Node) ? {
match node {
ast.Number {
c.check_number(node) ?
}
ast.Bool {
c.check_boolean(node) ?
}
else {
// TODO add more checks to make BurntSushi/toml-test invalid TOML pass
}
}
}
// excerpt returns a string of the characters surrounding`
fn (c Checker) excerpt(tp token.Position) string {
return c.scanner.excerpt(tp.pos, 10)
}
fn is_hex_bin_oct(hbo string) bool {
return hbo.len > 2 && (hbo.starts_with('0x') || hbo.starts_with('0o') || hbo.starts_with('0b'))
}
fn has_repeating(str string, repeats []rune) bool {
for i, r in str {
if r in repeats && i + 1 < str.len {
if r == str[i + 1] {
return true
}
}
}
return false
}
fn (c Checker) check_number(num ast.Number) ? {
lit := num.text
if lit in ['0', '0.0', '+0', '-0', '+0.0', '-0.0', '0e0', '+0e0', '-0e0', '0e00'] {
return
}
if lit.contains('_') {
if lit.starts_with('_') || lit.ends_with('_') {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" can not start or end with `_` in ...${c.excerpt(num.pos)}...')
}
if lit.contains('__') {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" can not have more than one underscore (`_`) in ...${c.excerpt(num.pos)}...')
}
}
mut hex_bin_oct := is_hex_bin_oct(lit)
is_hex := lit.contains('0x')
is_float := lit.to_lower().all_before('e').contains('.')
has_exponent_notation := lit.to_lower().contains('e')
float_decimal_index := lit.index('.') or { -1 }
// mut is_first_digit := byte(lit[0]).is_digit()
mut ascii := byte(lit[0]).ascii_str()
is_sign_prefixed := lit[0] in [`+`, `-`]
if is_sign_prefixed { // +/- ...
n := lit[1..]
hex_bin_oct = is_hex_bin_oct(n)
if hex_bin_oct {
ascii = byte(lit[0]).ascii_str()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (hex, octal and binary) can not start with `$ascii` in ...${c.excerpt(num.pos)}...')
}
// is_first_digit = byte(n[0]).is_digit()
if lit.len > 1 && n.starts_with('0') {
ascii = byte(n[0]).ascii_str()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" can not start with `$ascii` in ...${c.excerpt(num.pos)}...')
}
} else {
if !hex_bin_oct {
if !is_float && lit[0] == `0` {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" can not start with a zero in ...${c.excerpt(num.pos)}...')
}
if is_float && lit[0] == `0` && float_decimal_index > 1 {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" can not start with a zero in ...${c.excerpt(num.pos)}...')
}
}
}
if has_repeating(lit, [`_`, `.`, `x`, `o`, `b`]) {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" can not have $scanner.digit_extras as repeating characters in ...${c.excerpt(num.pos)}...')
}
if hex_bin_oct {
third := lit[2]
if third in scanner.digit_extras {
ascii = byte(third).ascii_str()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (hex, octal and binary) can not have `$ascii` in ...${c.excerpt(num.pos)}...')
}
}
if has_exponent_notation {
if lit.to_lower().all_after('e').contains('.') {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (with exponent) can not have a decimal point in ...${c.excerpt(num.pos)}...')
}
if !is_hex && lit.to_lower().count('e') > 1 {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (with exponent) can only have one exponent in ...${c.excerpt(num.pos)}...')
}
}
if is_float {
if lit.count('.') > 1 {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (float) can only have one decimal point in ...${c.excerpt(num.pos)}...')
}
last := lit[lit.len - 1]
if last in scanner.digit_extras {
ascii = byte(last).ascii_str()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (float) can not start with `$ascii` in ...${c.excerpt(num.pos)}...')
}
if lit.contains('_.') || lit.contains('._') {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (float) can not have underscores before or after the decimal point in ...${c.excerpt(num.pos)}...')
}
if lit.contains('e.') || lit.contains('.e') || lit.contains('E.') || lit.contains('.E') {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" (float) can not have underscores before or after the decimal point in ...${c.excerpt(num.pos)}...')
}
} else {
if lit.len > 1 && lit.starts_with('0') && lit[1] !in [`x`, `o`, `b`] {
ascii = byte(lit[0]).ascii_str()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' numbers like "$lit" can not start with `$ascii` in ...${c.excerpt(num.pos)}...')
}
}
}
fn (c Checker) check_boolean(b ast.Bool) ? {
lit := b.text
if lit in ['true', 'false'] {
return
}
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' boolean values like "$lit" can only be `true` or `false` literals, not `$lit` in ...${c.excerpt(b.pos)}...')
}

View File

@ -0,0 +1,22 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module input
// Config is used to configure input to the toml module.
// Only one of the fields `text` and `file_path` is allowed to be set at time of configuration.
pub struct Config {
pub:
text string // TOML text
file_path string // '/path/to/file.toml'
}
pub fn (c Config) validate() ? {
if c.file_path != '' && c.text != '' {
error(@MOD + '.' + @FN +
' ${typeof(c).name} should contain only one of the fields `file_path` OR `text` filled out')
} else if c.file_path == '' && c.text == '' {
error(@MOD + '.' + @FN +
' ${typeof(c).name} must either contain a valid `file_path` OR a non-empty `text` field')
}
}

View File

@ -0,0 +1,849 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module parser
import toml.ast
import toml.checker
import toml.util
import toml.token
import toml.scanner
// Scanner contains the necessary fields for the state of the scan process.
// the task the scanner does is also refered to as "lexing" or "tokenizing".
// The Scanner methods are based on much of the work in `vlib/strings/textscanner`.
pub struct Parser {
pub:
config Config
mut:
scanner &scanner.Scanner
prev_tok token.Token
tok token.Token
peek_tok token.Token
skip_next bool
// The root map (map is called table in TOML world)
root_map map[string]ast.Node
root_map_key string
// Array of Tables state
last_aot string
last_aot_index int
// Root of the tree
ast_root &ast.Root = &ast.Root{}
}
// Config is used to configure a Scanner instance.
// Only one of the fields `text` and `file_path` is allowed to be set at time of configuration.
pub struct Config {
pub:
scanner &scanner.Scanner
run_checks bool = true
}
// new_parser returns a new, stack allocated, `Parser`.
pub fn new_parser(config Config) Parser {
return Parser{
config: config
scanner: config.scanner
}
}
// init initializes the parser.
pub fn (mut p Parser) init() ? {
p.root_map = map[string]ast.Node{}
p.next() ?
}
// run_checker validates the parsed `ast.Node` nodes in the
// the generated AST.
fn (mut p Parser) run_checker() ? {
if p.config.run_checks {
chckr := checker.Checker{
scanner: p.scanner
}
chckr.check(p.root_map) ?
}
}
// parse starts parsing the input and returns the root
// of the generated AST.
pub fn (mut p Parser) parse() ?&ast.Root {
p.init() ?
p.root_table() ?
p.run_checker() ?
p.ast_root.table = p.root_map
return p.ast_root
}
// next forwards the parser to the next token.
fn (mut p Parser) next() ? {
p.prev_tok = p.tok
p.tok = p.peek_tok
p.peek_tok = p.scanner.scan() ?
}
// check returns true if the current token's `Kind` is equal that of `expected_token`.
fn (mut p Parser) check(check_token token.Kind) ? {
if p.tok.kind == check_token {
p.next() ?
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' expected token "$check_token" but found "$p.tok.kind" in this (excerpt): "...${p.excerpt()}..."')
}
}
// check_one_of returns true if the current token's `Kind` is equal that of `expected_token`.
fn (mut p Parser) check_one_of(tokens []token.Kind) ? {
if p.tok.kind in tokens {
p.next() ?
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' expected one of $tokens but found "$p.tok.kind" in this (excerpt): "...${p.excerpt()}..."')
}
}
// is_at returns true if the token kind is equal to `expected_token`.
fn (mut p Parser) is_at(expected_token token.Kind) bool {
return p.tok.kind == expected_token
}
// expect will error if the token kind is not equal to `expected_token`.
fn (mut p Parser) expect(expected_token token.Kind) ? {
if p.tok.kind == expected_token {
return
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' expected token "$expected_token" but found "$p.tok.kind" in this text "...${p.excerpt()}..."')
}
}
// find_table returns a reference to a map if found in the root table given a "dotted" key ('a.b.c').
// If some segments of the key does not exist in the root table find_table will
// allocate a new map for each segment. This behavior is needed because you can
// reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents.
pub fn (mut p Parser) find_table() ?&map[string]ast.Node {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "$p.root_map_key" in map ${ptr_str(p.root_map)}')
mut t := &map[string]ast.Node{}
unsafe {
t = &p.root_map
}
if p.root_map_key == '' {
return t
}
return p.find_in_table(mut t, p.root_map_key)
}
pub fn (mut p Parser) sub_table_key(key string) (string, string) {
mut ks := key.split('.')
last := ks.last()
ks.delete_last()
return ks.join('.'), last
}
// find_sub_table returns a reference to a map if found in `table` given a "dotted" key ('aa.bb.cc').
// If some segments of the key does not exist in the input map find_in_table will
// allocate a new map for the segment. This behavior is needed because you can
// reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents.
pub fn (mut p Parser) find_sub_table(key string) ?&map[string]ast.Node {
mut ky := p.root_map_key + '.' + key
if p.root_map_key == '' {
ky = key
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "$ky" in map ${ptr_str(p.root_map)}')
mut t := &map[string]ast.Node{}
unsafe {
t = &p.root_map
}
if ky == '' {
return t
}
return p.find_in_table(mut t, ky)
}
// find_in_table returns a reference to a map if found in `table` given a "dotted" key ('aa.bb.cc').
// If some segments of the key does not exist in the input map find_in_table will
// allocate a new map for the segment. This behavior is needed because you can
// reference maps by multiple keys "dotted" (separated by "." periods) in TOML documents.
pub fn (mut p Parser) find_in_table(mut table map[string]ast.Node, key string) ?&map[string]ast.Node {
// NOTE This code is the result of much trial and error.
// I'm still not quite sure *exactly* why it works. All I can leave here is a hope
// that this kind of minefield someday will be easier in V :)
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'locating "$key" in map ${ptr_str(table)}')
mut t := &map[string]ast.Node{}
unsafe {
t = &table
}
ks := key.split('.')
unsafe {
for k in ks {
if k in t.keys() {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'found key "$k" in $t.keys()')
if val := t[k] or {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' this should never happen. Key "$k" was checked before access')
}
{
if val is map[string]ast.Node {
// unsafe {
t = &(t[k] as map[string]ast.Node)
//}
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN + ' "$k" is not a map')
}
}
} else {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'no key "$k" found, allocating new map "$k" in map ${ptr_str(t)}"')
// unsafe {
t[k] = map[string]ast.Node{}
t = &(t[k] as map[string]ast.Node)
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'allocated new map ${ptr_str(t)}"')
//}
}
}
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning map ${ptr_str(t)}"')
return t
}
pub fn (mut p Parser) sub_key() ?string {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing nested key...')
key := p.key() ?
mut text := key.str()
for p.peek_tok.kind == .period {
p.next() ? // .
p.check(.period) ?
next_key := p.key() ?
text += '.' + next_key.text
}
p.next() ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed nested key `$text` now at "$p.tok.kind" "$p.tok.lit"')
return text
}
// root_table parses next tokens into the root map of `ast.Node`s.
// The V `map` type is corresponding to a "table" in TOML.
pub fn (mut p Parser) root_table() ? {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing root table...')
for p.tok.kind != .eof {
if !p.skip_next {
p.next() ?
} else {
p.skip_next = false
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"')
match p.tok.kind {
.hash {
// TODO table.comments << p.comment()
c := p.comment()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"')
}
//.whitespace, .tab, .nl {
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind "$p.tok.lit"')
//}
.bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys
if p.peek_tok.kind == .assign
|| (p.tok.kind == .number && p.peek_tok.kind == .minus) {
key, val := p.key_value() ?
t := p.find_table() ?
unsafe {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val.to_json() in table ${ptr_str(t)}')
t[key.str()] = val
}
} else if p.peek_tok.kind == .period {
subkey := p.sub_key() ?
p.check(.assign) ?
val := p.value() ?
sub_table, key := p.sub_table_key(subkey)
t := p.find_sub_table(sub_table) ?
unsafe {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key" = $val.to_json() in table ${ptr_str(t)}')
t[key] = val
}
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' dead end at "$p.tok.kind" "$p.tok.lit"')
}
}
.lsbr {
p.check(.lsbr) ? // '[' bracket
if p.tok.kind == .lsbr {
p.array_of_tables(mut &p.root_map) ?
p.skip_next = true // skip calling p.next() in coming iteration
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'leaving double bracket at "$p.tok.kind "$p.tok.lit". NEXT is "$p.peek_tok.kind "$p.peek_tok.lit"')
} else if p.peek_tok.kind == .period {
p.root_map_key = p.sub_key() ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$p.root_map_key` at "$p.tok.kind" "$p.tok.lit"')
p.expect(.rsbr) ?
} else {
key := p.key() ?
p.root_map_key = key.str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$p.root_map_key` at "$p.tok.kind" "$p.tok.lit"')
p.next() ?
p.expect(.rsbr) ?
}
}
.eof {
return
}
else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' could not parse "$p.tok.kind" "$p.tok.lit" in this (excerpt): "...${p.excerpt()}..."')
}
}
}
}
// excerpt returns a string of the characters surrounding `Parser.tok.pos`
fn (p Parser) excerpt() string {
return p.scanner.excerpt(p.tok.pos, 10)
}
// inline_table parses next tokens into a map of `ast.Node`s.
// The V map type is corresponding to a "table" in TOML.
pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Node) ? {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing inline table into ${ptr_str(tbl)}...')
for p.tok.kind != .eof {
p.next() ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind"')
match p.tok.kind {
.hash {
// TODO table.comments << p.comment()
c := p.comment()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"')
}
//.whitespace, .tab, .nl {
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind "$p.tok.lit"')
//}
.comma {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comma table value seperator "$p.tok.lit"')
continue
}
.rcbr {
// ']' bracket
return
}
.bare, .quoted, .boolean, .number, .underscore {
if p.peek_tok.kind == .assign {
key, val := p.key_value() ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @5 "$key.str()" = $val.to_json() into ${ptr_str(tbl)}')
tbl[key.str()] = val
} else if p.peek_tok.kind == .period {
subkey := p.sub_key() ?
p.check(.assign) ?
val := p.value() ?
sub_table, key := p.sub_table_key(subkey)
mut t := p.find_in_table(mut tbl, sub_table) ?
unsafe {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @6 "$key" = $val.to_json() into ${ptr_str(t)}')
t[key] = val
}
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' dead end at "$p.tok.kind" "$p.tok.lit"')
}
}
.lsbr {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' unexpected "$p.tok.kind" "$p.tok.lit" at this (excerpt): "...${p.excerpt()}..."')
}
.eof {
return
}
else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' could not parse $p.tok.kind ("$p.tok.lit") in this (excerpt): "...${p.excerpt()}..." token \n$p.tok')
}
}
if p.peek_tok.kind == .lsbr {
return
}
}
}
// array_of_tables parses next tokens into an array of `ast.Node`s.
pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Node) ? {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables "$p.tok.kind" "$p.tok.lit"')
// NOTE this is starting to get ugly. TOML isn't simple at this point
p.check(.lsbr) ? // '[' bracket
// [[key.key]] horror
if p.peek_tok.kind == .period {
p.double_array_of_tables(mut table) ?
return
}
key := p.key() ?
p.next() ?
p.check(.rsbr) ?
p.check(.rsbr) ?
key_str := key.str()
unsafe {
if key_str in table.keys() {
if val := table[key_str] or {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' this should never happen. Key "$key_str" was checked before access')
}
{
if val is []ast.Node {
arr := &(table[key_str] as []ast.Node)
arr << p.double_bracket_array() ?
table[key_str] = arr
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' table[$key_str] is not an array. (excerpt): "...${p.excerpt()}..."')
}
}
} else {
table[key_str] = p.double_bracket_array() ?
}
}
p.last_aot = key_str
p.last_aot_index = 0
}
// double_array_of_tables parses next tokens into an array of tables of arrays of `ast.Node`s...
pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Node) ? {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables of arrays "$p.tok.kind" "$p.tok.lit"')
key := p.key() ?
mut key_str := key.str()
for p.peek_tok.kind == .period {
p.next() ? // .
p.check(.period) ?
next_key := p.key() ?
key_str += '.' + next_key.text
}
p.next() ?
p.check(.rsbr) ?
p.check(.rsbr) ?
ks := key_str.split('.')
if ks.len != 2 {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' nested array of tables does not support more than 2 levels. (excerpt): "...${p.excerpt()}..."')
}
first := ks[0]
last := ks[1]
unsafe {
// NOTE this is starting to get EVEN uglier. TOML is not at all simple at this point...
if p.last_aot != first {
table[first] = []ast.Node{}
p.last_aot = first
mut t_arr := &(table[p.last_aot] as []ast.Node)
t_arr << map[string]ast.Node{}
p.last_aot_index = 0
}
mut t_arr := &(table[p.last_aot] as []ast.Node)
mut t_map := t_arr[p.last_aot_index]
mut t := &(t_map as map[string]ast.Node)
if last in t.keys() {
if val := t[last] or {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' this should never happen. Key "$last" was checked before access')
}
{
if val is []ast.Node {
arr := &(val as []ast.Node)
arr << p.double_bracket_array() ?
t[last] = arr
} else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' t[$last] is not an array. (excerpt): "...${p.excerpt()}..."')
}
}
} else {
t[last] = p.double_bracket_array() ?
}
}
}
// array parses next tokens into an array of `ast.Node`s.
pub fn (mut p Parser) double_bracket_array() ?[]ast.Node {
mut arr := []ast.Node{}
for p.tok.kind in [.bare, .quoted, .boolean, .number] && p.peek_tok.kind == .assign {
mut tbl := map[string]ast.Node{}
key, val := p.key_value() ?
tbl[key.str()] = val
arr << tbl
p.next() ?
}
return arr
}
// array parses next tokens into an array of `ast.Node`s.
pub fn (mut p Parser) array() ?[]ast.Node {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array...')
mut arr := []ast.Node{}
p.expect(.lsbr) ? // '[' bracket
for p.tok.kind != .eof {
p.next() ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"')
match p.tok.kind {
.boolean {
arr << ast.Node(p.boolean() ?)
}
.comma {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comma array value seperator "$p.tok.lit"')
continue
}
.eof {
// End Of File
return arr
}
.hash {
// TODO array.comments << p.comment()
c := p.comment()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"')
}
.lcbr {
mut t := map[string]ast.Node{}
p.inline_table(mut t) ?
ast.Node(t)
}
.number {
val := p.number_or_date() ?
arr << val
}
.quoted {
arr << ast.Node(p.quoted())
}
.lsbr {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array in array "$p.tok.kind" "$p.tok.lit"')
arr << ast.Node(p.array() ?)
}
.rsbr {
break
}
else {
error(@MOD + '.' + @STRUCT + '.' + @FN +
' could not parse "$p.tok.kind" "$p.tok.lit" ("$p.tok.lit") in this (excerpt): "...${p.excerpt()}..."')
}
}
}
p.expect(.rsbr) ? // ']' bracket
$if debug {
flat := arr.str().replace('\n', r'\n')
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed array: $flat . Currently @ token "$p.tok.kind"')
}
return arr
}
// comment returns an `ast.Comment` type.
pub fn (mut p Parser) comment() ast.Comment {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed hash comment "#$p.tok.lit"')
return ast.Comment{
text: p.tok.lit
pos: p.tok.position()
}
}
// key parse and returns an `ast.Key` type.
// Keys are the token(s) appearing before an assignment operator (=).
pub fn (mut p Parser) key() ?ast.Key {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key from "$p.tok.lit" ...')
mut key := ast.Key(ast.Null{})
if p.tok.kind == .number {
if p.peek_tok.kind == .minus {
mut lits := p.tok.lit
pos := p.tok.position()
for p.peek_tok.kind != .assign {
p.next() ?
lits += p.tok.lit
}
return ast.Key(ast.Bare{
text: lits
pos: pos
})
}
// number := p.number() as ast.Number
key = ast.Key(p.number())
} else {
key = match p.tok.kind {
.bare, .underscore {
ast.Key(p.bare())
}
.boolean {
ast.Key(p.boolean() ?)
}
.quoted {
ast.Key(p.quoted())
}
else {
error(@MOD + '.' + @STRUCT + '.' + @FN +
' key expected .bare, .number, .quoted or .boolean but got "$p.tok.kind"')
ast.Key(ast.Bare{}) // TODO workaround bug
}
}
}
// NOTE kept for eased debugging
// util.printdbg(@MOD +'.' + @STRUCT + '.' + @FN, 'parsed key "$p.tok.lit"')
// panic(@MOD + '.' + @STRUCT + '.' + @FN + ' could not parse ${p.tok.kind} ("${p.tok.lit}") token \n$p.tok')
// return ast.Key(ast.Bare{})
return key
}
// key_value parse and returns a pair `ast.Key` and `ast.Node` type.
// see also `key()` and `value()`
pub fn (mut p Parser) key_value() ?(ast.Key, ast.Node) {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key value pair...')
key := p.key() ?
p.next() ?
p.check(.assign) ? // Assignment operator
value := p.value() ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed key value pair. "$key" = $value.to_json()')
return key, value
}
// value parse and returns an `ast.Node` type.
// values are the token(s) appearing after an assignment operator (=).
pub fn (mut p Parser) value() ?ast.Node {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing value...')
// println('parsed comment "${p.tok.lit}"')
mut value := ast.Node(ast.Null{})
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"')
// mut value := ast.Node{}
if p.tok.kind == .number {
number_or_date := p.number_or_date() ?
value = number_or_date
} else {
value = match p.tok.kind {
.quoted {
ast.Node(p.quoted())
}
.boolean {
ast.Node(p.boolean() ?)
}
.lsbr {
ast.Node(p.array() ?)
}
.lcbr {
mut t := map[string]ast.Node{}
p.inline_table(mut t) ?
// table[key_str] = ast.Node(t)
ast.Node(t)
}
else {
error(@MOD + '.' + @STRUCT + '.' + @FN +
' value expected .boolean, .quoted, .lsbr, .lcbr or .number got "$p.tok.kind" "$p.tok.lit"')
ast.Node(ast.Null{}) // TODO workaround bug
}
}
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed value $value.to_json()')
return value
}
// number_or_date parse and returns an `ast.Node` type as
// one of [`ast.Date`, `ast.Time`, `ast.DateTime`, `ast.Number`]
pub fn (mut p Parser) number_or_date() ?ast.Node {
// Handle Date/Time
if p.peek_tok.kind == .minus || p.peek_tok.kind == .colon {
date_time_type := p.date_time() ?
match date_time_type {
ast.Date {
return ast.Node(date_time_type as ast.Date)
}
ast.Time {
return ast.Node(date_time_type as ast.Time)
}
ast.DateTime {
return ast.Node(date_time_type as ast.DateTime)
}
}
}
return ast.Node(p.number())
}
// bare parse and returns an `ast.Bare` type.
pub fn (mut p Parser) bare() ast.Bare {
return ast.Bare{
text: p.tok.lit
pos: p.tok.position()
}
}
// quoted parse and returns an `ast.Quoted` type.
pub fn (mut p Parser) quoted() ast.Quoted {
return ast.Quoted{
text: p.tok.lit
pos: p.tok.position()
}
}
// boolean parse and returns an `ast.Bool` type.
pub fn (mut p Parser) boolean() ?ast.Bool {
if p.tok.lit !in ['true', 'false'] {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' expected literal to be either `true` or `false` got "$p.tok.kind"')
}
return ast.Bool{
text: p.tok.lit
pos: p.tok.position()
}
}
// number parse and returns an `ast.Number` type.
pub fn (mut p Parser) number() ast.Number {
return ast.Number{
text: p.tok.lit
pos: p.tok.position()
}
}
// date_time parses dates and time in RFC 3339 format.
// https://datatracker.ietf.org/doc/html/rfc3339
pub fn (mut p Parser) date_time() ?ast.DateTimeType {
// Date and/or Time
mut lit := ''
pos := p.tok.position()
mut date := ast.Date{}
mut time := ast.Time{}
if p.peek_tok.kind == .minus {
date = p.date() ?
lit += date.text
// Look for any THH:MM:SS or <space>HH:MM:SS
if (p.peek_tok.kind == .bare && (p.peek_tok.lit.starts_with('T')
|| p.peek_tok.lit.starts_with('t'))) || p.peek_tok.kind == .whitespace {
p.next() ? // Advance to token with Txx or whitespace special case
if p.tok.lit.starts_with('T') || p.tok.lit.starts_with('t') {
lit += p.tok.lit[0].ascii_str() //'T' or 't'
} else {
lit += p.tok.lit
p.next() ?
}
time = p.time() ?
lit += time.text
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed date-time: "$lit"')
return ast.DateTime{
text: lit
pos: pos
date: date
time: time
}
}
} else if p.peek_tok.kind == .colon {
time = p.time() ?
return time
}
return ast.Date{
text: lit
pos: pos
}
}
// date parse and returns an `ast.Date` type.
pub fn (mut p Parser) date() ?ast.Date {
// Date
mut lit := p.tok.lit
pos := p.tok.position()
p.check(.number) ?
lit += p.tok.lit
p.check(.minus) ?
lit += p.tok.lit
p.check(.number) ?
lit += p.tok.lit
p.check(.minus) ?
lit += p.tok.lit
p.expect(.number) ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed date: "$lit"')
return ast.Date{
text: lit
pos: pos
}
}
// time parse and returns an `ast.Time` type.
pub fn (mut p Parser) time() ?ast.Time {
// Time
mut lit := p.tok.lit
pos := p.tok.position()
if p.is_at(.bare) && (lit.starts_with('T') || lit.starts_with('t')) {
if p.tok.lit.starts_with('T') {
lit = lit.all_after('T')
} else if p.tok.lit.starts_with('t') {
lit = lit.all_after('t')
}
p.next() ?
} else {
p.check(.number) ?
}
lit += p.tok.lit
p.check(.colon) ?
lit += p.tok.lit
p.check(.number) ?
lit += p.tok.lit
// TODO does TOML even have optional seconds?
// if p.peek_tok.kind == .colon {
p.check(.colon) ?
lit += p.tok.lit
p.expect(.number) ?
//}
// Optional milliseconds
if p.peek_tok.kind == .period {
p.next() ?
lit += p.tok.lit // lit += '.'
p.check(.period) ?
lit += p.tok.lit
p.expect(.number) ?
}
// Parse offset
if p.peek_tok.kind == .minus || p.peek_tok.kind == .plus {
p.next() ?
lit += p.tok.lit // lit += '-'
p.check_one_of([.minus, .plus]) ?
lit += p.tok.lit
p.check(.number) ?
lit += p.tok.lit
p.check(.colon) ?
lit += p.tok.lit
p.expect(.number) ?
} else if p.peek_tok.kind == .bare && (p.peek_tok.lit == 'Z' || p.peek_tok.lit == 'z') {
p.next() ?
lit += p.tok.lit
p.expect(.bare) ?
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed time: "$lit"')
return ast.Time{
text: lit
pos: pos
}
}
// eof returns an `ast.EOF` type.
pub fn (mut p Parser) eof() ast.EOF {
return ast.EOF{
pos: p.tok.position()
}
}

View File

@ -0,0 +1,526 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module scanner
import os
import math.mathutil
import toml.input
import toml.token
import toml.util
pub const digit_extras = [`_`, `.`, `x`, `o`, `b`, `e`, `E`]
// Scanner contains the necessary fields for the state of the scan process.
// the task the scanner does is also refered to as "lexing" or "tokenizing".
// The Scanner methods are based on much of the work in `vlib/strings/textscanner`.
pub struct Scanner {
pub:
config Config
text string // the input TOML text
mut:
col int // current column number (x coordinate)
line_nr int = 1 // current line number (y coordinate)
pos int // current flat/index position in the `text` field
mode Mode // sub-mode of the scanner
}
enum Mode {
normal
inside_string
}
// Config is used to configure a Scanner instance.
// Only one of the fields `text` and `file_path` is allowed to be set at time of configuration.
pub struct Config {
pub:
input input.Config
tokenize_formating bool // if true, generate tokens for `\n`, ` `, `\t`, `\r` etc.
}
// new_scanner returns a new heap allocated `Scanner` instance.
pub fn new_scanner(config Config) ?&Scanner {
config.input.validate() ?
mut text := config.input.text
file_path := config.input.file_path
if os.is_file(file_path) {
text = os.read_file(file_path) or {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' Could not read "$file_path": "$err.msg"')
}
}
mut s := &Scanner{
config: config
text: text
}
return s
}
// scan returns the next token from the input.
[direct_array_access]
pub fn (mut s Scanner) scan() ?token.Token {
for {
c := s.next()
byte_c := byte(c)
if c == -1 {
s.inc_line_number()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'reached EOF')
return s.new_token(.eof, '', 1)
}
ascii := byte_c.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'current char "$ascii"')
is_sign := byte_c in [`+`, `-`]
is_signed_number := is_sign && byte(s.at()).is_digit() && !byte(s.peek(-1)).is_digit()
// TODO (+/-)nan & (+/-)inf
/*
mut is_nan := s.peek(1) == `n` && s.peek(2) == `a` && s.peek(3) == `n`
mut is_inf := s.peek(1) == `i` && s.peek(2) == `n` && s.peek(3) == `f`
if is_nan || is_inf {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified a special number "$key" ($key.len)')
return s.new_token(.number, key, key.len)
}
*/
is_digit := byte_c.is_digit()
if is_digit || is_signed_number {
num := s.extract_number() ?
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified a number "$num" ($num.len)')
return s.new_token(.number, num, num.len)
}
if util.is_key_char(byte_c) {
key := s.extract_key()
if key.to_lower() in ['true', 'false'] {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified a boolean "$key" ($key.len)')
return s.new_token(.boolean, key, key.len)
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified a bare key "$key" ($key.len)')
return s.new_token(.bare, key, key.len)
}
match rune(c) {
` `, `\t`, `\n`, `\r` {
if c == `\n` {
s.inc_line_number()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'incremented line nr to $s.line_nr')
}
// Date-Time in RFC 3339 is allowed to have a space between the date and time in supplement to the 'T'
// so we allow space characters to slip through to the parser if the space is between two digits...
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, '"'+byte(s.peek(-1)).ascii_str()+'" < "$ascii" > "'+byte(s.at()).ascii_str()+'"')
if c == ` ` && byte(s.peek(-1)).is_digit() && byte(s.at()).is_digit() {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified, what could be, a space between a RFC 3339 date and time ("$ascii") ($ascii.len)')
return s.new_token(token.Kind.whitespace, ascii, ascii.len)
}
if s.config.tokenize_formating {
mut kind := token.Kind.whitespace
if c == `\t` {
kind = token.Kind.tab
} else if c == `\n` {
kind = token.Kind.nl
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified one of " ", "\\t" or "\\n" ("$ascii") ($ascii.len)')
return s.new_token(kind, ascii, ascii.len)
} else {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping " ", "\\t" or "\\n" ("$ascii") ($ascii.len)')
}
continue
}
`-` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified minus "$ascii" ($ascii.len)')
return s.new_token(.minus, ascii, ascii.len)
}
`_` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified underscore "$ascii" ($ascii.len)')
return s.new_token(.underscore, ascii, ascii.len)
}
`+` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified plus "$ascii" ($ascii.len)')
return s.new_token(.plus, ascii, ascii.len)
}
`=` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified assignment "$ascii" ($ascii.len)')
return s.new_token(.assign, ascii, ascii.len)
}
`"`, `'` { // ... some string "/'
ident_string, is_multiline := s.extract_string() ?
token_length := if is_multiline { 2 * 3 } else { 2 }
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified quoted string (multiline: $is_multiline) `$ident_string`')
return s.new_token(.quoted, ident_string, ident_string.len + token_length) // + quote length
}
`#` {
start := s.pos //+ 1
s.ignore_line()
hash := s.text[start..s.pos]
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified comment hash "$hash" ($hash.len)')
return s.new_token(.hash, hash, hash.len + 1)
}
`{` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified left curly bracket "$ascii" ($ascii.len)')
return s.new_token(.lcbr, ascii, ascii.len)
}
`}` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified right curly bracket "$ascii" ($ascii.len)')
return s.new_token(.rcbr, ascii, ascii.len)
}
`[` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified left square bracket "$ascii" ($ascii.len)')
return s.new_token(.lsbr, ascii, ascii.len)
}
`]` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified right square bracket "$ascii" ($ascii.len)')
return s.new_token(.rsbr, ascii, ascii.len)
}
`:` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified colon "$ascii" ($ascii.len)')
return s.new_token(.colon, ascii, ascii.len)
}
`,` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified comma "$ascii" ($ascii.len)')
return s.new_token(.comma, ascii, ascii.len)
}
`.` {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified period "$ascii" ($ascii.len)')
return s.new_token(.period, ascii, ascii.len)
}
else {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' could not scan character `$ascii` / $c at $s.pos ($s.line_nr,$s.col) near ...${s.excerpt(s.pos, 5)}...')
}
}
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'unknown character code at $s.pos ($s.line_nr,$s.col) near ...${s.excerpt(s.pos,
5)}...')
return s.new_token(.unknown, '', 0)
}
// free frees all allocated resources.
[unsafe]
pub fn (mut s Scanner) free() {
unsafe {
s.text.free()
}
}
// remaining returns how many characters remain in the text input.
[inline]
pub fn (s &Scanner) remaining() int {
return s.text.len - s.pos
}
// next returns the next character code from the input text.
// next returns `-1` if it can't reach the next character.
[direct_array_access; inline]
pub fn (mut s Scanner) next() int {
if s.pos < s.text.len {
opos := s.pos
s.pos++
s.col++
c := s.text[opos]
return c
}
return -1
}
// skip skips one character ahead.
[inline]
pub fn (mut s Scanner) skip() {
if s.pos + 1 < s.text.len {
s.pos++
s.col++
}
}
// skip_n skips ahead `n` characters.
// If the skip goes out of bounds from the length of `Scanner.text`,
// the scanner position will be sat to the last character possible.
[inline]
pub fn (mut s Scanner) skip_n(n int) {
s.pos += n
if s.pos > s.text.len {
s.pos = s.text.len
}
s.col = s.pos
}
// at returns the *current* character code from the input text.
// at returns `-1` if it can't get the current character.
// unlike `next()`, `at()` does not change the state of the scanner.
[direct_array_access; inline]
pub fn (s &Scanner) at() byte {
if s.pos < s.text.len {
return s.text[s.pos]
}
return byte(-1)
}
// peek returns the character code from the input text at position + `n`.
// peek returns `-1` if it can't peek `n` characters ahead.
[direct_array_access; inline]
pub fn (s &Scanner) peek(n int) int {
if s.pos + n < s.text.len {
// Allow peeking back - needed for spaces between date and time in RFC 3339 format :/
if n - 1 < 0 && s.pos + n - 1 >= 0 {
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'LOOKING BAAAA-AACK - OOOVER MY SHOOOOULDEEEER "${s.text[s.pos + n-1]}"')
return s.text[s.pos + n - 1]
}
return s.text[s.pos + n]
}
return -1
}
// reset resets the internal state of the scanner.
pub fn (mut s Scanner) reset() {
s.pos = 0
s.col = 0
s.line_nr = 1
}
// new_token returns a new `token.Token`.
[inline]
fn (mut s Scanner) new_token(kind token.Kind, lit string, len int) token.Token {
// line_offset := 1
// println('new_token($lit)')
return token.Token{
kind: kind
lit: lit
col: mathutil.max(1, s.col - len + 1)
line_nr: s.line_nr + 1 //+ line_offset
pos: s.pos - len + 1
len: len
}
}
// ignore_line forwards the scanner to the end of the current line.
[direct_array_access; inline]
fn (mut s Scanner) ignore_line() {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' ignoring until EOL')
for c := s.at(); c != -1 && c != `\n`; c = s.at() {
s.next()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()}"')
continue
}
}
// inc_line_number increases the internal line number.
[inline]
fn (mut s Scanner) inc_line_number() {
s.col = 0
s.line_nr++
}
// extract_key parses and returns a TOML key as a string.
[direct_array_access; inline]
fn (mut s Scanner) extract_key() string {
s.pos--
s.col--
start := s.pos
for s.pos < s.text.len {
c := s.at()
if !(util.is_key_char(c) || c.is_digit() || c in [`_`, `-`]) {
break
}
s.pos++
s.col++
}
key := s.text[start..s.pos]
return key
}
// extract_string collects and returns a string containing
// any bytes recognized as a TOML string.
// TOML strings are everything found between two double or single quotation marks (`"`/`'`).
[direct_array_access; inline]
fn (mut s Scanner) extract_string() ?(string, bool) {
// extract_string is called when the scanner has already reached
// a byte that is the start of a string so we rewind it to start at the correct
s.pos--
s.col--
quote := s.at()
start := s.pos
mut lit := ''
is_multiline := s.text[s.pos + 1] == quote && s.text[s.pos + 2] == quote
// Check for escaped multiline quote
if is_multiline {
mls := s.extract_multiline_string() ?
return mls, is_multiline
}
for {
s.pos++
s.col++
if s.pos >= s.text.len {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' unfinished string literal `$quote.ascii_str()` started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...')
}
c := s.at()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c (quote type: $quote/$quote.ascii_str())')
// Check for escaped chars
if c == byte(92) {
esc, skip := s.handle_escapes(quote, is_multiline)
lit += esc
if skip > 0 {
s.pos += skip
s.col += skip
continue
}
}
if c == quote {
s.pos++
s.col++
return lit, is_multiline
}
lit += c.ascii_str()
}
return lit, is_multiline
}
// extract_multiline_string collects and returns a string containing
// any bytes recognized as a TOML string.
// TOML strings are everything found between two double or single quotation marks (`"`/`'`).
[direct_array_access; inline]
fn (mut s Scanner) extract_multiline_string() ?string {
// extract_multiline_string is called from extract_string so we know the 3 first
// characters is the quotes
quote := s.at()
start := s.pos
mut lit := ''
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'multiline `$quote.ascii_str()${s.text[s.pos + 1].ascii_str()}${s.text[
s.pos + 2].ascii_str()}` string started at pos $start ($s.line_nr,$s.col) (quote type: $quote.ascii_str() / $quote)')
s.pos += 2
s.col += 2
for {
s.pos++
s.col++
if s.pos >= s.text.len {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' unfinished multiline string literal ($quote.ascii_str()$quote.ascii_str()$quote.ascii_str()) started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...')
}
c := s.at()
if c == `\n` {
s.inc_line_number()
lit += c.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `\\n` / $c')
continue
}
// Check for escaped chars
if c == byte(92) {
esc, skip := s.handle_escapes(quote, true)
lit += esc
if skip > 0 {
s.pos += skip
s.col += skip
continue
}
}
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c')
if c == quote {
if s.peek(1) == quote && s.peek(2) == quote {
if s.peek(3) == -1 {
s.pos += 3
s.col += 3
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning at $c.ascii_str() `$lit`')
return lit
} else if s.peek(3) != quote {
// lit += c.ascii_str()
// lit += quote.ascii_str()
s.pos += 3
s.col += 3
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning at $c.ascii_str() `$lit`')
return lit
}
}
}
lit += c.ascii_str()
}
return lit
}
// handle_escapes
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
c := s.at()
mut lit := c.ascii_str()
if s.peek(1) == byte(92) {
lit += lit
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
return lit, 1
} else if s.peek(1) == quote {
if (!is_multiline && s.peek(2) == `\n`)
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignore special case escaped `$lit` at end of string')
return '', 0
}
lit += quote.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
return lit, 1
} else if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
return lit, 4
}
return '', 0
}
// extract_number collects and returns a string containing
// any bytes recognized as a TOML number.
// TOML numbers can include digits 0-9 and `_`.
[direct_array_access; inline]
fn (mut s Scanner) extract_number() ?string {
// extract_number is called when the scanner has already reached
// a byte that is a number or +/- - so we rewind it to start at the correct
// position to get the complete number. Even if it's only one digit
s.pos--
s.col--
start := s.pos
mut c := s.at()
is_digit := byte(c).is_digit()
if !(is_digit || c in [`+`, `-`]) {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' ${byte(c).ascii_str()} is not a number at ${s.excerpt(s.pos, 10)}')
}
s.pos++
s.col++
for s.pos < s.text.len {
c = s.at()
// Handle signed exponent notation. I.e.: 3e2, 3E2, 3e-2, 3E+2, 3e0, 3.1e2, 3.1E2, -1E-1
if c in [`e`, `E`] && s.peek(1) in [`+`, `-`] && byte(s.peek(2)).is_digit() {
s.pos += 2
s.col += 2
}
c = s.at()
if !(byte(c).is_hex_digit() || c in scanner.digit_extras) {
break
}
s.pos++
s.col++
}
key := s.text[start..s.pos]
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified number "$key" in range [$start .. $s.pos]')
return key
}
// excerpt returns a string excerpt of the input text centered
// at `pos`. The `margin` argument defines how many chacters
// on each side of `pos` is returned
pub fn (s Scanner) excerpt(pos int, margin int) string {
start := if pos > 0 && pos >= margin { pos - margin } else { 0 }
end := if pos + margin < s.text.len { pos + margin } else { s.text.len }
return s.text[start..end].replace('\n', r'\n')
}

View File

@ -0,0 +1,82 @@
import toml.input
import toml.scanner
const scan_input = input.Config{
text: 'abc'
}
fn test_remaining() {
mut s := scanner.new_scanner(input: scan_input) or { panic(err) }
assert s.remaining() == 3
s.next()
s.next()
assert s.remaining() == 1
s.next()
assert s.remaining() == 0
s.next()
s.next()
assert s.remaining() == 0
s.reset()
assert s.remaining() == 3
}
fn test_next() {
mut s := scanner.new_scanner(input: scan_input) or { panic(err) }
assert s.next() == `a`
assert s.next() == `b`
assert s.next() == `c`
assert s.next() == -1
assert s.next() == -1
assert s.next() == -1
}
fn test_skip() {
mut s := scanner.new_scanner(input: scan_input) or { panic(err) }
assert s.next() == `a`
s.skip()
assert s.next() == `c`
assert s.next() == -1
}
fn test_skip_n() {
mut s := scanner.new_scanner(input: scan_input) or { panic(err) }
s.skip_n(2)
assert s.next() == `c`
assert s.next() == -1
}
fn test_at() {
mut s := scanner.new_scanner(input: scan_input) or { panic(err) }
assert s.at() == `a`
assert s.at() == `a`
assert s.at() == `a`
//
assert s.next() == `a`
assert s.next() == `b`
assert s.next() == `c`
assert s.next() == -1
}
fn test_peek() {
mut s := scanner.new_scanner(input: scan_input) or { panic(err) }
assert s.peek(0) == `a`
assert s.peek(1) == `b`
assert s.peek(2) == `c`
assert s.peek(3) == -1
assert s.peek(4) == -1
//
assert s.next() == `a`
assert s.next() == `b`
assert s.next() == `c`
assert s.next() == -1
}
fn test_reset() {
mut s := scanner.new_scanner(input: scan_input) or { panic(err) }
assert s.next() == `a`
s.next()
s.next()
assert s.next() == -1
s.reset()
assert s.next() == `a`
}

View File

@ -0,0 +1,185 @@
import os
import toml
// Instructions for developers:
// The actual tests and data can be obtained by doing:
// `cd vlib/toml/tests/testdata`
// `git clone --depth 1 https://github.com/BurntSushi/toml-test.git burntsushi/toml-test`
// See also the CI toml tests
// TODO Goal: make parsing AND value retrieval of all of https://github.com/BurntSushi/toml-test/test/ pass
const (
valid_exceptions = [
'float/inf-and-nan.toml',
'table/array-table-array.toml',
]
invalid_exceptions = [
// String
'string/basic-multiline-out-of-range-unicode-escape-1.toml',
'string/basic-byte-escapes.toml',
'string/bad-multiline.toml',
'string/multiline-escape-space.toml',
'string/bad-codepoint.toml',
'string/literal-multiline-quotes-1.toml',
'string/literal-multiline-quotes-2.toml',
'string/multiline-quotes-1.toml',
'string/basic-multiline-out-of-range-unicode-escape-2.toml',
'string/bad-slash-escape.toml',
'string/basic-out-of-range-unicode-escape-1.toml',
'string/basic-out-of-range-unicode-escape-2.toml',
'string/multiline-quotes-2.toml',
'string/bad-uni-esc.toml',
'string/bad-escape.toml',
'string/basic-multiline-unknown-escape.toml',
'string/missing-quotes.toml',
'string/bad-byte-escape.toml',
'string/basic-unknown-escape.toml',
// Integer
'integer/capital-bin.toml',
'integer/invalid-bin.toml',
'integer/invalid-oct.toml',
// Encoding
'encoding/bad-utf8-in-comment.toml',
'encoding/bad-utf8-in-string.toml',
// Float
'float/exp-double-us.toml',
'float/exp-leading-us.toml',
'float/nan_underscore.toml',
'float/nan-incomplete-1.toml',
'invalid/float/exp-point-1.toml',
'float/trailing-us.toml',
'float/us-after-point.toml',
'float/exp-double-e-1.toml',
'float/inf-incomplete-1.toml',
'float/inf_underscore.toml',
// Table
'table/rrbrace.toml',
'table/duplicate-table-array2.toml',
'table/duplicate.toml',
'table/array-implicit.toml',
'table/injection-2.toml',
'table/llbrace.toml',
'table/injection-1.toml',
'table/duplicate-table-array.toml',
// Array
'array/tables-1.toml',
'array/no-close-2.toml',
'array/missing-separator.toml',
'array/text-after-array-entries.toml',
'array/no-close.toml',
'array/text-before-array-separator.toml',
// Date / Time
'datetime/impossible-date.toml',
'datetime/no-leads-with-milli.toml',
'datetime/no-leads.toml',
// Control
'control/string-us.toml',
'control/comment-lf.toml',
'control/multi-us.toml',
'control/rawstring-del.toml',
'control/rawmulti-del.toml',
'control/rawstring-us.toml',
'control/string-bs.toml',
'control/multi-null.toml',
'control/rawstring-lf.toml',
'control/rawmulti-null.toml',
'control/comment-null.toml',
'control/multi-lf.toml',
'control/comment-del.toml',
'control/rawstring-null.toml',
'control/rawmulti-lf.toml',
'control/multi-del.toml',
'control/string-del.toml',
'control/rawmulti-us.toml',
'control/comment-us.toml',
'control/string-lf.toml',
'control/string-null.toml',
'inline-table/empty.toml',
'inline-table/double-comma.toml',
'inline-table/trailing-comma.toml',
'inline-table/linebreak-4.toml',
'inline-table/linebreak-3.toml',
'inline-table/linebreak-1.toml',
'inline-table/linebreak-2.toml',
'inline-table/no-comma.toml',
// Key
'key/duplicate.toml',
'key/after-table.toml',
'key/duplicate-keys.toml',
'key/after-value.toml',
'key/newline.toml',
'key/without-value-2.toml',
'key/no-eol.toml',
'key/after-array.toml',
'key/multiline.toml',
]
)
// test_burnt_sushi_tomltest run though 'testdata/burntsushi/toml-test/*' if found.
fn test_burnt_sushi_tomltest() {
this_file := @FILE
test_root := os.join_path(os.dir(this_file), 'testdata', 'burntsushi', 'toml-test',
'tests')
if os.is_dir(test_root) {
valid_test_files := os.walk_ext(os.join_path(test_root, 'valid'), '.toml')
println('Testing $valid_test_files.len valid TOML files...')
mut valid := 0
mut e := 0
for i, valid_test_file in valid_test_files {
relative := valid_test_file.all_after(os.join_path('toml-test', 'tests', 'valid')).trim_left(os.path_separator)
if relative !in valid_exceptions {
println('OK [$i/$valid_test_files.len] "$valid_test_file"...')
toml_doc := toml.parse_file(valid_test_file) or { panic(err) }
// parsed_json := toml_doc.to_json().replace(' ','')
// mut test_suite_json := os.read_file(valid_test_file.all_before_last('.')+'.json') or { panic(err) }
// test_suite_json = test_suite_json.replace('\n ','').replace(' ','')
// println(test_suite_json.replace('\n ','').replace(' ',''))
// assert parsed_json == test_suite_json
valid++
} else {
e++
println('SKIP [$i/$valid_test_files.len] "$valid_test_file" EXCEPTION [$e/$valid_exceptions.len]...')
}
}
println('$valid/$valid_test_files.len TOML files was parsed correctly')
if valid_exceptions.len > 0 {
println('TODO Skipped parsing of $valid_exceptions.len valid TOML files...')
}
// NOTE uncomment to see list of skipped files
// assert false
// TODO test cases where the parser should fail
invalid_test_files := os.walk_ext(os.join_path(test_root, 'invalid'), '.toml')
println('Testing $invalid_test_files.len invalid TOML files...')
mut invalid := 0
e = 0
for i, invalid_test_file in invalid_test_files {
relative := invalid_test_file.all_after(os.join_path('toml-test', 'tests',
'invalid')).trim_left(os.path_separator)
if relative !in invalid_exceptions {
println('OK [$i/$invalid_test_files.len] "$invalid_test_file"...')
if toml_doc := toml.parse_file(invalid_test_file) {
assert false
} else {
println(' $err.msg')
assert true // err.msg == 'your error'
}
invalid++
} else {
e++
println('SKIP [$i/$invalid_test_files.len] "$invalid_test_file" EXCEPTION [$e/$invalid_exceptions.len]...')
}
}
println('$invalid/$invalid_test_files.len TOML files was parsed correctly')
if invalid_exceptions.len > 0 {
println('TODO Skipped parsing of $invalid_exceptions.len invalid TOML files...')
}
// NOTE uncomment to see list of skipped files
// assert false
} else {
println('No test data directory found in "$test_root"')
assert true
}
}

View File

@ -0,0 +1,83 @@
import toml
// Complete text from the example in the README.md:
// https://github.com/toml-lang/toml/blob/3b11f6921da7b6f5db37af039aa021fee450c091/README.md#Example
const toml_text = '# This is a TOML document.
title = "TOML Example"
[owner]
name = "Tom Preston-Werner"
dob = 1979-05-27T07:32:00-08:00 # First class dates
[database]
server = "192.168.1.1"
ports = [ 8000, 8001, 8002 ]
connection_max = 5000
enabled = true
[servers]
# Indentation (tabs and/or spaces) is allowed but not required
[servers.alpha]
ip = "10.0.0.1"
dc = "eqdc10"
[servers.beta]
ip = "10.0.0.2"
dc = "eqdc10"
[clients]
data=[["gamma","delta"],[1,2]]
# Line breaks are OK when inside arrays
hosts = [
"alpha",
"omega"
]'
fn test_parse_compact_text() {
toml_doc := toml.parse(toml_text) or { panic(err) }
title := toml_doc.value('title')
assert title == toml.Any('TOML Example')
assert title as string == 'TOML Example'
owner := toml_doc.value('owner') as map[string]toml.Any
any_name := owner.value('name') or { panic(err) }
assert any_name.string() == 'Tom Preston-Werner'
database := toml_doc.value('database') as map[string]toml.Any
db_serv := database['server'] or {
panic('could not access "server" index in "database" variable')
}
assert db_serv as string == '192.168.1.1'
// TODO BUG depending on WHAT directory the tests is run from, this one assert sometimes fail?!?!
// assert toml_doc.value('owner.name') as string == 'Tom Preston-Werner'
assert toml_doc.value('database.server') as string == '192.168.1.1'
database_ports := toml_doc.value('database.ports') as []toml.Any
assert database_ports[0] as i64 == 8000
assert database_ports[1] as i64 == 8001
assert database_ports[2] as i64 == 8002
assert database_ports[0].int() == 8000
assert database_ports[1].int() == 8001
assert database_ports[2].int() == 8002
assert toml_doc.value('database.connection_max') as i64 == 5000
assert toml_doc.value('database.enabled') as bool == true
assert toml_doc.value('servers.alpha.ip').string() == '10.0.0.1'
assert toml_doc.value('servers.alpha.dc').string() == 'eqdc10'
assert toml_doc.value('servers.beta.ip').string() == '10.0.0.2'
assert toml_doc.value('servers.beta.dc').string() == 'eqdc10'
clients_data := (toml_doc.value('clients.data') as []toml.Any)
// dump(clients_data)
// assert false
gamma_delta_array := clients_data[0] as []toml.Any
digits_array := clients_data[1] as []toml.Any
assert gamma_delta_array[0].string() == 'gamma'
assert gamma_delta_array[1].string() == 'delta'
assert digits_array[0].int() == 1
assert digits_array[1].int() == 2
clients_hosts := (toml_doc.value('clients.hosts') as []toml.Any).as_strings()
assert clients_hosts[0] == 'alpha'
assert clients_hosts[1] == 'omega'
}

View File

@ -0,0 +1,73 @@
import toml
import time
fn test_dates() {
toml_txt := '
# Offset Date-Time
odt1 = 1979-05-27T07:32:00Z
odt2 = 1979-05-27T00:32:00-07:00
odt3 = 1979-05-27T00:32:00.999999-07:00
odt4 = 1979-05-27 07:32:00Z
# Local Date-Time
ldt1 = 1979-05-27T07:32:00
ldt2 = 1979-05-27T00:32:00.999999
# Local Date
ld1 = 1979-05-27
# Local Time
lt1 = 07:32:00
lt2 = 00:32:00.999999
'
toml_doc := toml.parse(toml_txt) or { panic(err) }
// Re-use vars
mut odt_time := time.parse_rfc3339('1979-05-27T07:32:00Z') or { panic(err) }
mut odt_str := toml_doc.value('odt1').string()
// odt1 test section
assert odt_str == '1979-05-26 07:32:00.000000' // W00t?! why 26th? Z=UTC?
odt1 := toml_doc.value('odt1')
assert odt1.datetime() == odt_time
// odt2 test section
odt_time = time.parse_rfc3339('1979-05-27T00:32:00-07:00') or { panic(err) }
odt2 := toml_doc.value('odt2')
assert odt2.datetime() == odt_time
// odt3 test section
odt_time = time.parse_rfc3339('1979-05-27T00:32:00.999999-07:00') or { panic(err) }
odt3 := toml_doc.value('odt3')
assert odt3.datetime() == odt_time
// odt4 test section
odt_time = time.parse_rfc3339('1979-05-27 07:32:00Z') or { panic(err) }
odt4 := toml_doc.value('odt4')
assert odt4.datetime() == odt_time
// ldt1 test section
odt_time = time.parse_rfc3339('1979-05-27T07:32:00') or { panic(err) }
ldt1 := toml_doc.value('ldt1')
assert ldt1.datetime() == odt_time
// ldt2 test section
odt_time = time.parse_rfc3339('1979-05-27T00:32:00.999999') or { panic(err) }
ldt2 := toml_doc.value('ldt2')
assert ldt2.datetime() == odt_time
// ld1 test section
odt_time = time.parse_rfc3339('1979-05-27') or { panic(err) }
ld1 := toml_doc.value('ld1')
assert ld1.datetime() == odt_time
assert ld1.string() == '1979-05-27 00:00:00.000000'
// lt1 test section
odt_time = time.parse_rfc3339('07:32:00') or { panic(err) }
lt1 := toml_doc.value('lt1')
assert lt1.datetime() == odt_time
assert lt1.string() == '0000-00-00 07:32:00.000000'
// lt2 test section
odt_time = time.parse_rfc3339('00:32:00.999999') or { panic(err) }
lt2 := toml_doc.value('lt2')
assert lt2.datetime() == odt_time
assert lt2.string() == '0000-00-00 00:32:00.999999'
}

View File

@ -0,0 +1,19 @@
import os
import toml
fn test_parse() {
toml_file :=
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.toml'
toml_doc := toml.parse(toml_file) or { panic(err) }
toml_json := toml_doc.to_json()
out_file :=
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.out'
out_file_json := os.read_file(out_file) or { panic(err) }
println(toml_json)
assert toml_json == out_file_json
// assert false
}

View File

@ -0,0 +1,43 @@
import toml
const toml_text = '
[db]
enabled = true
[servers]
# Indentation (tabs and/or spaces) is allowed but not required
[servers.alpha]
ip = "10.0.0.1"
dc = "eqdc10"
[servers.beta]
ip = "10.0.0.2"
dc = "eqdc10"
[servers.alpha.tricky]
ip = "10.0.0.100"
[firewall.rules.limit]
ip = "10.0.0.101"
[firewall.rules]
block = true
'
fn test_parse() {
toml_doc := toml.parse(toml_text) or { panic(err) }
// dump(toml_doc.ast)
// assert false
assert toml_doc.value('db.enabled').bool()
// TODO make this work
assert toml_doc.value('servers.alpha.ip').string() == '10.0.0.1'
assert toml_doc.value('servers.alpha.dc').string() == 'eqdc10'
assert toml_doc.value('servers.beta.ip').string() == '10.0.0.2'
assert toml_doc.value('servers.beta.dc').string() == 'eqdc10'
assert toml_doc.value('servers.alpha.tricky.ip').string() == '10.0.0.100'
assert toml_doc.value('firewall.rules.limit.ip').string() == '10.0.0.101'
assert toml_doc.value('firewall.rules.block').bool() == true
}

View File

@ -0,0 +1,67 @@
import os
import toml
const (
toml_multiline_text_1 = 'multi1 = """one"""
multi2 = """one
two"""
multi3 = """
one
two
three"""
multi4 = """
one
two
three
four
"""'
toml_multiline_text_2 = "multi1 = '''one'''
multi2 = '''one
two'''
multi3 = '''
one
two
three'''
multi4 = '''
one
two
three
four
'''"
)
fn test_multiline_strings() {
mut toml_doc := toml.parse(toml_multiline_text_1) or { panic(err) }
mut value := toml_doc.value('multi1')
assert value.string() == 'one'
value = toml_doc.value('multi2')
assert value.string() == 'one\ntwo'
value = toml_doc.value('multi3')
assert value.string() == '\none\ntwo\nthree'
value = toml_doc.value('multi4')
assert value.string() == '\none\ntwo\nthree\nfour\n'
toml_doc = toml.parse(toml_multiline_text_2) or { panic(err) }
value = toml_doc.value('multi1')
assert value.string() == 'one'
value = toml_doc.value('multi2')
assert value.string() == 'one\ntwo'
value = toml_doc.value('multi3')
assert value.string() == '\none\ntwo\nthree'
value = toml_doc.value('multi4')
assert value.string() == '\none\ntwo\nthree\nfour\n'
toml_file :=
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.toml'
toml_doc = toml.parse(toml_file) or { panic(err) }
value = toml_doc.value('lit_one')
assert value.string() == "'one quote'"
value = toml_doc.value('lit_two')
assert value.string() == "''two quotes''"
value = toml_doc.value('mismatch1')
assert value.string() == 'aaa' + "'''" + 'bbb'
value = toml_doc.value('mismatch2')
assert value.string() == 'aaa' + '"""' + 'bbb'
}

View File

@ -0,0 +1,87 @@
import toml
const (
toml_table_text = 'inline = {a.b = 42}
many.dots.here.dot.dot.dot = {a.b.c = 1, a.b.d = 2}
a = { a.b = 1 }
b = { "a"."b" = 1 }
c = { a . b = 1 }
d = { \'a\' . "b" = 1 }
e = {a.b=1}
[tbl]
a.b.c = {d.e=1}
[tbl.x]
a.b.c = {d.e=1}
[[arr]]
t = {a.b=1}
T = {a.b=1}
[[arr]]
t = {a.b=2}
T = {a.b=2}'
)
fn test_tables() {
mut toml_doc := toml.parse(toml_table_text) or { panic(err) }
mut value := toml_doc.value('inline.a.b')
assert value.int() == 42
value = toml_doc.value('many.dots.here.dot.dot.dot.a.b.c')
assert value.int() == 1
value = toml_doc.value('many.dots.here.dot.dot.dot.a.b.d')
assert value.int() == 2
value = toml_doc.value('a.a.b')
assert value.int() == 1
value = toml_doc.value('b.a.b')
assert value.int() == 1
value = toml_doc.value('c.a.b')
assert value.int() == 1
value = toml_doc.value('d.a.b')
assert value.int() == 1
value = toml_doc.value('e.a.b')
assert value.int() == 1
value = toml_doc.value('tbl.a.b.c.d.e')
assert value.int() == 1
value = toml_doc.value('tbl.x.a.b.c.d.e')
assert value.int() == 1
mut m := toml_doc.value('tbl') as map[string]toml.Any
value = m.value('a.b.c.d.e') or { panic(err) }
assert value.int() == 1
value = m.value('x.a.b.c.d.e') or { panic(err) }
assert value.int() == 1
arr := toml_doc.value('arr') as []toml.Any
arr0 := arr[0] as map[string]toml.Any
value = arr0.value('t.a.b') or { panic(err) }
assert value.int() == 1
arr1 := arr[1] as map[string]toml.Any
value = arr1.value('T.a.b') or { panic(err) }
assert value.int() == 1
arr2 := arr[2] as map[string]toml.Any
value = arr2.value('t.a.b') or { panic(err) }
assert value.int() == 2
arr3 := arr[3] as map[string]toml.Any
value = arr3.value('T.a.b') or { panic(err) }
assert value.int() == 2
}

View File

@ -0,0 +1 @@
{ "v": true, "animal": { "type": { "name": "pug" } }, "inline": { "a": 4, "b.c": 6, "b": { "c": 7 } }, "db": { "t": true }, "ij": { "a": { "i": 1, "j": 2 }, "b": { "i": "3", "j": "4" } }, "fruit": { "apple": { "color": "red", "taste": { "sweet": true }, "texture": { "smooth": true } } } }

View File

@ -0,0 +1,25 @@
v = true
animal = { type.name = "pug" }
inline = { "a" = 4, "b.c" = 6, b.c = 7 }
[db]
t = true
[ij]
[ij.a]
i = 1
j = 2
[ij.b]
i = "3"
j = "4"
[fruit]
apple.color = "red"
apple.taste.sweet = true
[fruit.apple.texture]
smooth = true

View File

@ -0,0 +1,15 @@
# Make sure that quotes inside multiline strings are allowed, including right
# after the opening '''/""" and before the closing '''/"""
lit_one = ''''one quote''''
lit_two = '''''two quotes'''''
lit_one_space = ''' 'one quote' '''
lit_two_space = ''' ''two quotes'' '''
one = """"one quote""""
two = """""two quotes"""""
one_space = """ "one quote" """
two_space = """ ""two quotes"" """
mismatch1 = """aaa'''bbb"""
mismatch2 = '''aaa"""bbb'''

View File

@ -0,0 +1 @@
{ "title": "TOML Example", "owner": { "name": "Tom Preston-Werner", "dob": "1979-05-27T07:32:00-08:00" }, "database": { "server": "192.168.1.1", "ports": [ 8000, 8001, 8002 ], "connection_max": 5000, "enabled": true }, "servers": { "alpha": { "ip": "10.0.0.1", "dc": "eqdc10" }, "beta": { "ip": "10.0.0.2", "dc": "eqdc10" } }, "clients": { "data": [ [ "gamma", "delta" ], [ 1, 2 ] ], "hosts": [ "alpha", "omega" ] } }

View File

@ -0,0 +1,33 @@
# This is a TOML document.
title = "TOML Example"
[owner]
name = "Tom Preston-Werner"
dob = 1979-05-27T07:32:00-08:00 # First class dates
[database]
server = "192.168.1.1"
ports = [ 8000, 8001, 8002 ]
connection_max = 5000
enabled = true
[servers]
# Indentation (tabs and/or spaces) is allowed but not required
[servers.alpha]
ip = "10.0.0.1"
dc = "eqdc10"
[servers.beta]
ip = "10.0.0.2"
dc = "eqdc10"
[clients]
data = [ ["gamma", "delta"], [1, 2] ]
# Line breaks are OK when inside arrays
hosts = [
"alpha",
"omega"
]

View File

@ -0,0 +1,110 @@
import os
import toml
const toml_text = os.read_file(
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.toml') or { panic(err) }
fn test_toml() {
// File containing the complete text from the example in the official TOML project README.md:
// https://github.com/toml-lang/toml/blob/3b11f6921da7b6f5db37af039aa021fee450c091/README.md#Example
toml_doc := toml.parse(toml_text) or { panic(err) }
toml_json := toml_doc.to_json()
// NOTE Kept for easier debugging:
// dump(toml_doc.ast)
// println(toml_json)
// assert false
assert toml_json == os.read_file(
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.out') or { panic(err) }
title := toml_doc.value('title')
assert title == toml.Any('TOML Example')
assert title as string == 'TOML Example'
owner := toml_doc.value('owner') as map[string]toml.Any
any_name := owner.value('name') or { panic(err) }
assert any_name.string() == 'Tom Preston-Werner'
database := toml_doc.value('database') as map[string]toml.Any
db_serv := database['server'] or {
panic('could not access "server" index in "database" variable')
}
assert db_serv as string == '192.168.1.1'
// TODO BUG depending on WHAT directory the tests is run from, this one assert sometimes fail?!?!
// assert toml_doc.value('owner.name') as string == 'Tom Preston-Werner'
assert toml_doc.value('database.server') as string == '192.168.1.1'
database_ports := toml_doc.value('database.ports') as []toml.Any
assert database_ports[0] as i64 == 8000
assert database_ports[1] as i64 == 8001
assert database_ports[2] as i64 == 8002
assert database_ports[0].int() == 8000
assert database_ports[1].int() == 8001
assert database_ports[2].int() == 8002
assert toml_doc.value('database.connection_max') as i64 == 5000
assert toml_doc.value('database.enabled') as bool == true
assert toml_doc.value('servers.alpha.ip').string() == '10.0.0.1'
assert toml_doc.value('servers.alpha.dc').string() == 'eqdc10'
assert toml_doc.value('servers.beta.ip').string() == '10.0.0.2'
assert toml_doc.value('servers.beta.dc').string() == 'eqdc10'
clients_data := (toml_doc.value('clients.data') as []toml.Any)
// dump(clients_data)
// assert false
gamma_delta_array := clients_data[0] as []toml.Any
digits_array := clients_data[1] as []toml.Any
assert gamma_delta_array[0].string() == 'gamma'
assert gamma_delta_array[1].string() == 'delta'
assert digits_array[0].int() == 1
assert digits_array[1].int() == 2
clients_hosts := (toml_doc.value('clients.hosts') as []toml.Any).as_strings()
assert clients_hosts[0] == 'alpha'
assert clients_hosts[1] == 'omega'
}
fn test_toml_file() {
out_path := os.join_path(os.temp_dir(), 'v_toml_tests')
test_file := os.join_path(out_path, 'toml_example.toml')
os.mkdir_all(out_path) or { assert false }
defer {
os.rmdir_all(out_path) or {}
}
os.write_file(test_file, toml_text) or { assert false }
toml_doc := toml.parse_file(test_file) or { panic(err) }
toml_json := toml_doc.to_json()
// NOTE Kept for easier debugging:
// dump(toml_doc.ast)
// println(toml_json)
// assert false
assert toml_json == os.read_file(
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.out') or { panic(err) }
}
fn test_toml_parse_text() {
toml_doc := toml.parse_text(toml_text) or { panic(err) }
toml_json := toml_doc.to_json()
assert toml_json == os.read_file(
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.out') or { panic(err) }
}
fn test_toml_parse() {
toml_doc := toml.parse(toml_text) or { panic(err) }
toml_json := toml_doc.to_json()
assert toml_json == os.read_file(
os.real_path(os.join_path(os.dir(@FILE), 'testdata', os.file_name(@FILE).all_before_last('.'))) +
'.out') or { panic(err) }
}

View File

@ -0,0 +1,70 @@
import toml
fn test_string() {
str_value := 'test string'
toml_txt := 'string = "test string"'
toml_doc := toml.parse(toml_txt) or { panic(err) }
value := toml_doc.value('string')
assert value == toml.Any(str_value)
assert value as string == str_value
assert value.string() == str_value
}
fn test_i64() {
toml_txt := 'i64 = 120'
toml_doc := toml.parse(toml_txt) or { panic(err) }
value := toml_doc.value('i64')
assert value == toml.Any(i64(120))
assert value as i64 == 120
assert value.i64() == i64(120)
}
fn test_bool() {
toml_txt := '
bool_true = true
bool_false = false'
toml_doc := toml.parse(toml_txt) or { panic(err) }
value_true := toml_doc.value('bool_true')
assert value_true == toml.Any(true)
assert value_true as bool == true
assert value_true != toml.Any(false)
assert value_true as bool != false
assert value_true.bool() == true
value_false := toml_doc.value('bool_false')
assert value_false == toml.Any(false)
assert value_false as bool == false
assert value_false != toml.Any(true)
assert value_false as bool != true
assert value_false.bool() == false
}
fn test_bool_key_is_not_value() {
toml_txt := 'true = true
false = false'
toml_doc := toml.parse(toml_txt) or { panic(err) }
value_true := toml_doc.value('true')
assert value_true == toml.Any(true)
assert value_true as bool == true
assert value_true != toml.Any(false)
assert value_true as bool != false
value_false := toml_doc.value('false')
assert value_false == toml.Any(false)
assert value_false as bool == false
assert value_false != toml.Any(true)
assert value_false as bool != true
}
fn test_single_letter_key() {
toml_txt := '[v]
open_sourced = "Jun 22 2019 20:20:28"'
toml_doc := toml.parse(toml_txt) or { panic(err) }
value := toml_doc.value('v.open_sourced').string()
assert value == 'Jun 22 2019 20:20:28'
}

View File

@ -0,0 +1,13 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module token
// Position represents a position in a TOML document.
pub struct Position {
pub:
len int // length of the literal in the source
line_nr int // the line number in the source where the token occured
pos int // the position of the token in scanner text
col int // the column in the source where the token occured
}

View File

@ -0,0 +1,52 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module token
// Token holds information about the current scan of bytes.
pub struct Token {
pub:
kind Kind // the token number/enum; for quick comparisons
lit string // literal representation of the token
col int // the column in the source where the token occured
line_nr int // the line number in the source where the token occured
pos int // the position of the token in scanner text
len int // length of the literal
}
// Kind represents a logical type of entity found in any given TOML document.
pub enum Kind {
unknown
eof
bare // user
boolean // true or false
number // 123
quoted // 'foo', "foo", """foo""" or '''foo'''
plus // +
minus // -
underscore // _
comma // ,
colon // :
hash // # comment
assign // =
lcbr // {
rcbr // }
lsbr // [
rsbr // ]
nl // \n linefeed / newline character
cr // \r carrige return
tab // \t character
whitespace // ` `
period // .
_end_
}
[inline]
pub fn (tok &Token) position() Position {
return Position{
len: tok.len
line_nr: tok.line_nr - 1
pos: tok.pos
col: tok.col - 1
}
}

217
vlib/toml/toml.v 100644
View File

@ -0,0 +1,217 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module toml
import os
import toml.ast
import toml.util
import toml.input
import toml.scanner
import toml.parser
import time
// Null is used in sumtype checks as a "default" value when nothing else is possible.
pub struct Null {
}
// Config is used to configure the toml parser.
// Only one of the fields `text` or `file_path`, is allowed to be set at time of configuration.
pub struct Config {
pub:
text string // TOML text
file_path string // '/path/to/file.toml'
parse_comments bool
}
// Doc is a representation of a TOML document.
// A document can be constructed from a `string` buffer or from a file path
pub struct Doc {
pub:
ast &ast.Root
}
// parse_file parses the TOML file in `path`.
pub fn parse_file(path string) ?Doc {
input_config := input.Config{
file_path: path
}
scanner_config := scanner.Config{
input: input_config
}
parser_config := parser.Config{
scanner: scanner.new_scanner(scanner_config) ?
}
mut p := parser.new_parser(parser_config)
ast := p.parse() ?
return Doc{
ast: ast
}
}
// parse_text parses the TOML document provided in `text`.
pub fn parse_text(text string) ?Doc {
input_config := input.Config{
text: text
}
scanner_config := scanner.Config{
input: input_config
}
parser_config := parser.Config{
scanner: scanner.new_scanner(scanner_config) ?
}
mut p := parser.new_parser(parser_config)
ast := p.parse() ?
return Doc{
ast: ast
}
}
// parse parses the TOML document provided in `input`.
// parse automatically try to determine if the type of `input` is a file or text.
// For explicit parsing of input see `parse_file` or `parse_text`.
pub fn parse(toml string) ?Doc {
mut input_config := input.Config{}
if !toml.contains('\n') && os.is_file(toml) {
input_config = input.Config{
file_path: toml
}
} else {
input_config = input.Config{
text: toml
}
}
scanner_config := scanner.Config{
input: input_config
}
parser_config := parser.Config{
scanner: scanner.new_scanner(scanner_config) ?
}
mut p := parser.new_parser(parser_config)
ast := p.parse() ?
return Doc{
ast: ast
}
}
// to_json returns a compact json string of the complete document
pub fn (d Doc) to_json() string {
return d.ast.to_json()
}
// value queries a value from the TOML document.
pub fn (d Doc) value(key string) Any {
values := d.ast.table as map[string]ast.Node
// any_values := d.ast_to_any(values) as map[string]Any
return d.get_map_value_as_any(values, key)
}
// ast_to_any_value converts `from` ast.Node to toml.Any value.
fn (d Doc) ast_to_any(value ast.Node) Any {
// `match` isn't currently very suitable for further unwrapping sumtypes in the if's...
if value is ast.Date || value is ast.Time || value is ast.DateTime {
mut tim := time.Time{}
if value is ast.Date {
date_str := (value as ast.Date).text
tim = time.parse_rfc3339(date_str) or {
return Any(Null{})
// TODO decide this
// panic(@MOD + '.' + @STRUCT + '.' + @FN +
// ' failed converting "$date_str" to iso8601: $err')
}
} else if value is ast.Time {
time_str := (value as ast.Time).text
tim = time.parse_rfc3339(time_str) or {
return Any(Null{})
// TODO decide this
// panic(@MOD + '.' + @STRUCT + '.' + @FN +
// ' failed converting "$time_str" to rfc3339: $err')
}
} else {
// value is ast.DateTime
datetime_str := (value as ast.DateTime).text
tim = time.parse_rfc3339(datetime_str) or {
return Any(Null{})
// TODO decide this
// panic(@MOD + '.' + @STRUCT + '.' + @FN +
// ' failed converting "$datetime_str" to rfc3339: $err')
}
}
return Any(tim)
}
match value {
ast.Quoted {
return Any((value as ast.Quoted).text)
}
ast.Number {
str := (value as ast.Number).text
if str.contains('.') {
return Any(str.f64())
}
return Any(str.i64())
}
ast.Bool {
str := (value as ast.Bool).text
if str == 'true' {
return Any(true)
}
return Any(false)
}
map[string]ast.Node {
m := (value as map[string]ast.Node)
mut am := map[string]Any{}
for k, v in m {
am[k] = d.ast_to_any(v)
}
return am
// return d.get_map_value(m, key_split[1..].join('.'))
}
[]ast.Node {
a := (value as []ast.Node)
mut aa := []Any{}
for val in a {
aa << d.ast_to_any(val)
}
return aa
}
else {
return Any(Null{})
}
}
return Any(Null{})
// TODO decide this
// panic(@MOD + '.' + @STRUCT + '.' + @FN + ' can\'t convert "$value"')
// return Any('')
}
// get_map_value_as_any returns the value found at `key` in the map `values` as `Any` type.
fn (d Doc) get_map_value_as_any(values map[string]ast.Node, key string) Any {
key_split := key.split('.')
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' getting "${key_split[0]}"')
if key_split[0] in values.keys() {
value := values[key_split[0]] or {
return Any(Null{})
// TODO decide this
// panic(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key" does not exist')
}
// `match` isn't currently very suitable for these types of sum type constructs...
if value is map[string]ast.Node {
m := (value as map[string]ast.Node)
next_key := key_split[1..].join('.')
if next_key == '' {
return d.ast_to_any(value)
}
return d.get_map_value_as_any(m, next_key)
}
return d.ast_to_any(value)
}
return Any(Null{})
// TODO decide this
// panic(@MOD + '.' + @STRUCT + '.' + @FN + ' key "$key" does not exist')
}

View File

@ -0,0 +1,14 @@
// Copyright (c) 2021 Lars Pontoppidan. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module util
[inline]
pub fn is_key_char(c byte) bool {
return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) // || c == `_` || c == `-` <- these are identified when tokenizing
}
[if trace_toml ?]
pub fn printdbg(id string, message string) {
eprintln(id + ' ' + message)
}