toml: comply with BurntSushi @eb989e5 (#12616)

pull/12618/head
Larpon 2021-11-29 21:15:22 +01:00 committed by GitHub
parent 83260e5074
commit 14424100e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 11 deletions

View File

@ -14,7 +14,7 @@ jobs:
timeout-minutes: 10 timeout-minutes: 10
env: env:
TOML_BS_TESTS_PATH: vlib/toml/tests/testdata/burntsushi/toml-test TOML_BS_TESTS_PATH: vlib/toml/tests/testdata/burntsushi/toml-test
TOML_BS_TESTS_PINNED_COMMIT: 8baf830 TOML_BS_TESTS_PINNED_COMMIT: eb989e5
TOML_IARNA_TESTS_PATH: vlib/toml/tests/testdata/iarna/toml-test TOML_IARNA_TESTS_PATH: vlib/toml/tests/testdata/iarna/toml-test
TOML_IARNA_TESTS_PINNED_COMMIT: 1880b1a TOML_IARNA_TESTS_PINNED_COMMIT: 1880b1a
steps: steps:

View File

@ -356,7 +356,23 @@ fn (c Checker) check_date(date ast.Date) ? {
fn (c Checker) check_time(t ast.Time) ? { fn (c Checker) check_time(t ast.Time) ? {
lit := t.text lit := t.text
// Split any offsets from the time // Split any offsets from the time
parts := lit.split('-') mut offset_splitter := if lit.contains('+') { '+' } else { '-' }
parts := lit.split(offset_splitter)
mut hhmmss := parts[0].all_before('.')
// Check for 2 digits in all fields
mut check_length := 8
if hhmmss.to_upper().ends_with('Z') {
check_length++
}
if hhmmss.len != check_length {
starts_with_zero := hhmmss.starts_with('0')
if !starts_with_zero {
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' "$lit" must be zero prefixed in ...${c.excerpt(t.pos)}...')
}
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' "$lit" is not a valid RFC 3339 Time format string in ...${c.excerpt(t.pos)}...')
}
// Use V's builtin functionality to validate the time string // Use V's builtin functionality to validate the time string
time.parse_rfc3339(parts[0]) or { time.parse_rfc3339(parts[0]) or {
return error(@MOD + '.' + @STRUCT + '.' + @FN + return error(@MOD + '.' + @STRUCT + '.' + @FN +
@ -397,6 +413,7 @@ fn (c Checker) check_quoted_escapes(q ast.Quoted) ? {
// See https://toml.io/en/v1.0.0#string for more info on string types. // See https://toml.io/en/v1.0.0#string for more info on string types.
is_basic := q.quote == `\"` is_basic := q.quote == `\"`
contains_newlines := q.text.contains('\n')
for { for {
ch := s.next() ch := s.next()
if ch == scanner.end_of_text { if ch == scanner.end_of_text {
@ -414,11 +431,18 @@ fn (c Checker) check_quoted_escapes(q ast.Quoted) ? {
escape := ch_byte.ascii_str() + next_ch.ascii_str() escape := ch_byte.ascii_str() + next_ch.ascii_str()
if is_basic { if is_basic {
if q.is_multiline { if q.is_multiline {
if next_ch == byte(32) && s.peek(1) == byte(92) { if next_ch == byte(32) {
if s.peek(1) == byte(92) {
st := s.state() st := s.state()
return error(@MOD + '.' + @STRUCT + '.' + @FN + return error(@MOD + '.' + @STRUCT + '.' + @FN +
' can not escape whitespaces before escapes in multi-line strings (`\\ \\`) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...') ' can not escape whitespaces before escapes in multi-line strings (`\\ \\`) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...')
} }
if !contains_newlines {
st := s.state()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' can not escape whitespaces in multi-line strings (`\\ `) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...')
}
}
if next_ch in [`\t`, `\n`, ` `] { if next_ch in [`\t`, `\n`, ` `] {
s.next() s.next()
continue continue
@ -518,10 +542,16 @@ pub fn (c Checker) check_comment(comment ast.Comment) ? {
mut s := scanner.new_simple(lit) ? mut s := scanner.new_simple(lit) ?
for { for {
ch := s.next() ch := s.next()
if ch == -1 { if ch == scanner.end_of_text {
break break
} }
ch_byte := byte(ch) ch_byte := byte(ch)
// Check for carrige return
if ch_byte == 0x0D {
st := s.state()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' carrige return character `$ch_byte.hex()` is not allowed ($st.line_nr,$st.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(st.pos, 10)}...')
}
// Check for control characters (allow TAB) // Check for control characters (allow TAB)
if util.is_illegal_ascii_control_character(ch_byte) { if util.is_illegal_ascii_control_character(ch_byte) {
st := s.state() st := s.state()

View File

@ -252,7 +252,7 @@ fn (d Decoder) decode_date_time(mut dt ast.DateTime) ? {
if ms.len > 1 { if ms.len > 1 {
return return
} }
ms = ms + '0'.repeat(6 - ms.len) + z ms = ms + '0'.repeat(4 - ms.len) + z
dt.text = yymmddhhmmss + '.' + ms + offset dt.text = yymmddhhmmss + '.' + ms + offset
} }
} }

View File

@ -93,6 +93,12 @@ pub fn (mut s Scanner) scan() ?token.Token {
ascii := byte_c.ascii_str() ascii := byte_c.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'current char "$ascii"') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'current char "$ascii"')
if byte_c == byte(0x0) {
s.reset()
return error(@MOD + '.' + @STRUCT + '.' + @FN +
' NULL control character `$c.hex()` is not allowed at ($s.line_nr,$s.col) "$ascii" near ...${s.excerpt(s.pos, 5)}...')
}
is_sign := c == `+` || c == `-` is_sign := c == `+` || c == `-`
// (+/-)nan & (+/-)inf // (+/-)nan & (+/-)inf
@ -340,7 +346,7 @@ fn (mut s Scanner) ignore_line() ?string {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()} / $c"') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()} / $c"')
if s.at_crlf() { if s.at_crlf() {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'letting `\\r\\n` slip through') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'letting `\\r\\n` slip through')
return s.text[start..s.pos] return s.text[start..s.pos + 1]
} }
} }
return s.text[start..s.pos] return s.text[start..s.pos]

View File

@ -2,8 +2,8 @@ import toml
fn test_crlf() { fn test_crlf() {
str_value := 'test string' str_value := 'test string'
mut toml_txt := 'crlf_string = "test string" mut toml_txt := 'crlf_string = "test string"\r\n
# Comment with CRLF\r\n' # Comment with CRLF is not allowed'
toml_doc := toml.parse(toml_txt) or { panic(err) } toml_doc := toml.parse(toml_txt) or { panic(err) }
value := toml_doc.value('crlf_string') value := toml_doc.value('crlf_string')