From 69fa87ad244b3c167c1a06bb17825cb4e169a56c Mon Sep 17 00:00:00 2001 From: Larpon Date: Thu, 11 Nov 2021 06:27:41 +0100 Subject: [PATCH] toml: add date and time checks (#12427) --- vlib/time/parse.c.v | 25 +++-- vlib/time/parse_test.v | 2 + vlib/toml/checker/checker.v | 104 ++++++++++++++++++++ vlib/toml/tests/burntsushi.toml-test_test.v | 4 - 4 files changed, 125 insertions(+), 10 deletions(-) diff --git a/vlib/time/parse.c.v b/vlib/time/parse.c.v index 3bcd3422dc..b3f94ebd56 100644 --- a/vlib/time/parse.c.v +++ b/vlib/time/parse.c.v @@ -4,22 +4,26 @@ module time // parse_rfc3339 returns time from a date string in RFC 3339 datetime format. +// See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of +// the differences between ISO-8601 and RFC 3339. pub fn parse_rfc3339(s string) ?Time { if s == '' { return error_invalid_time(0) } - mut t := parse_iso8601(s) or { Time{} } + // Normalize the input before parsing. Good since iso8601 doesn't permit lower case `t` and `z`. + sn := s.replace_each(['t', 'T', 'z', 'Z']) + mut t := parse_iso8601(sn) or { Time{} } // If parse_iso8601 DID NOT result in default values (i.e. date was parsed correctly) if t != Time{} { return t } - t_i := s.index('T') or { -1 } - parts := if t_i != -1 { [s[..t_i], s[t_i + 1..]] } else { s.split(' ') } + t_i := sn.index('T') or { -1 } + parts := if t_i != -1 { [sn[..t_i], sn[t_i + 1..]] } else { sn.split(' ') } - // Check if s is date only + // Check if sn is date only if !parts[0].contains_any(' Z') && parts[0].contains('-') { - year, month, day := parse_iso8601_date(s) ? + year, month, day := parse_iso8601_date(sn) ? t = new_time(Time{ year: year month: month @@ -27,7 +31,7 @@ pub fn parse_rfc3339(s string) ?Time { }) return t } - // Check if s is time only + // Check if sn is time only if !parts[0].contains('-') && parts[0].contains(':') { mut hour_, mut minute_, mut second_, mut microsecond_, mut unix_offset, mut is_local_time := 0, 0, 0, 0, i64(0), true hour_, minute_, second_, microsecond_, unix_offset, is_local_time = parse_iso8601_time(parts[0]) ? @@ -173,6 +177,15 @@ fn parse_iso8601_date(s string) ?(int, int, int) { if count != 3 { return error_invalid_time(10) } + if year > 9999 { + return error_invalid_time(13) + } + if month > 12 { + return error_invalid_time(14) + } + if day > 31 { + return error_invalid_time(15) + } return year, month, day } diff --git a/vlib/time/parse_test.v b/vlib/time/parse_test.v index cf637ca1d0..f5740057cf 100644 --- a/vlib/time/parse_test.v +++ b/vlib/time/parse_test.v @@ -112,6 +112,8 @@ fn test_parse_iso8601_invalid() { '2020-06-05Z', '2020-06-05+00:00', '15:38:06', + '2020-06-32T15:38:06.015959', + '2020-13-13T15:38:06.015959', ] for format in formats { time.parse_iso8601(format) or { diff --git a/vlib/toml/checker/checker.v b/vlib/toml/checker/checker.v index 003b731cf9..3a4bc0942f 100644 --- a/vlib/toml/checker/checker.v +++ b/vlib/toml/checker/checker.v @@ -9,6 +9,7 @@ import toml.util import toml.token import toml.scanner import encoding.utf8 +import time pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`] @@ -37,6 +38,15 @@ fn (c Checker) visit(value &ast.Value) ? { ast.Quoted { c.check_quoted(value) ? } + ast.DateTime { + c.check_date_time(value) ? + } + ast.Date { + c.check_date(value) ? + } + ast.Time { + c.check_time(value) ? + } else { // TODO add more checks to make BurntSushi/toml-test invalid TOML pass } @@ -260,6 +270,100 @@ fn (c Checker) check_boolean(b ast.Bool) ? { ' boolean values like "$lit" can only be `true` or `false` literals, not `$lit` in ...${c.excerpt(b.pos)}...') } +// check_date_time returns an error if `dt` is not a valid TOML date-time string (RFC 3339). +// See also https://ijmacd.github.io/rfc3339-iso8601 for a more +// visual representation of the RFC 3339 format. +fn (c Checker) check_date_time(dt ast.DateTime) ? { + lit := dt.text + mut split := []string{} + // RFC 3339 Date-Times can be split via 4 separators (` `, `_`, `T` and `t`). + if lit.to_lower().contains_any(' _t') { + if lit.contains(' ') { + split = lit.split(' ') + } else if lit.contains('_') { + split = lit.split('_') + } else if lit.contains('T') { + split = lit.split('T') + } else if lit.contains('t') { + split = lit.split('t') + } + // Validate the split into date and time parts. + if split.len != 2 { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" contains too many date/time separators in ...${c.excerpt(dt.pos)}...') + } + // Re-use date and time validation code for detailed testing of each part + c.check_date(ast.Date{ + text: split[0] + pos: token.Position{ + len: split[0].len + line_nr: dt.pos.line_nr + pos: dt.pos.pos + col: dt.pos.col + } + }) ? + c.check_time(ast.Time{ + text: split[1] + pos: token.Position{ + len: split[1].len + line_nr: dt.pos.line_nr + pos: dt.pos.pos + split[0].len + col: dt.pos.col + split[0].len + } + }) ? + // Use V's builtin functionality to validate the string + time.parse_rfc3339(lit) or { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" is not a valid RFC 3339 Date-Time format string "$err". In ...${c.excerpt(dt.pos)}...') + } + } else { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" is not a valid RFC 3339 Date-Time format string in ...${c.excerpt(dt.pos)}...') + } +} + +// check_time returns an error if `date` is not a valid TOML date string (RFC 3339). +fn (c Checker) check_date(date ast.Date) ? { + lit := date.text + parts := lit.split('-') + if parts.len != 3 { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" is not a valid RFC 3339 Date format string in ...${c.excerpt(date.pos)}...') + } + yyyy := parts[0] + if yyyy.len != 4 { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" does not have a valid RFC 3339 year indication in ...${c.excerpt(date.pos)}...') + } + mm := parts[1] + if mm.len != 2 { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" does not have a valid RFC 3339 month indication in ...${c.excerpt(date.pos)}...') + } + dd := parts[2] + if dd.len != 2 { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" does not have a valid RFC 3339 day indication in ...${c.excerpt(date.pos)}...') + } + // Use V's builtin functionality to validate the string + time.parse_rfc3339(lit) or { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" is not a valid RFC 3339 Date format string "$err". In ...${c.excerpt(date.pos)}...') + } +} + +// check_time returns an error if `t` is not a valid TOML time string (RFC 3339). +fn (c Checker) check_time(t ast.Time) ? { + lit := t.text + // Split any offsets from the time + parts := lit.split('-') + // Use V's builtin functionality to validate the time string + time.parse_rfc3339(parts[0]) or { + return error(@MOD + '.' + @STRUCT + '.' + @FN + + ' "$lit" is not a valid RFC 3339 Time format string "$err". In ...${c.excerpt(t.pos)}...') + } +} + // check_quoted returns an error if `q` is not a valid quoted TOML string. fn (c Checker) check_quoted(q ast.Quoted) ? { lit := q.text diff --git a/vlib/toml/tests/burntsushi.toml-test_test.v b/vlib/toml/tests/burntsushi.toml-test_test.v index 7d3c44f2a8..a93945b142 100644 --- a/vlib/toml/tests/burntsushi.toml-test_test.v +++ b/vlib/toml/tests/burntsushi.toml-test_test.v @@ -24,10 +24,6 @@ const ( // Array 'array/tables-1.toml', 'array/text-after-array-entries.toml', - // Date / Time - 'datetime/impossible-date.toml', - 'datetime/no-leads-with-milli.toml', - 'datetime/no-leads.toml', ] )