toml: improve comment support (#12305)
parent
52df19ef61
commit
9a3967bd7d
|
@ -11,7 +11,8 @@ pub struct Root {
|
||||||
pub:
|
pub:
|
||||||
input input.Config // User input configuration
|
input input.Config // User input configuration
|
||||||
pub mut:
|
pub mut:
|
||||||
table Value
|
comments []Comment
|
||||||
|
table Value
|
||||||
// errors []errors.Error // all the checker errors in the file
|
// errors []errors.Error // all the checker errors in the file
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,9 +5,10 @@ module checker
|
||||||
|
|
||||||
import toml.ast
|
import toml.ast
|
||||||
import toml.ast.walker
|
import toml.ast.walker
|
||||||
// import toml.util
|
import toml.util
|
||||||
import toml.token
|
import toml.token
|
||||||
import toml.scanner
|
import toml.scanner
|
||||||
|
import encoding.utf8
|
||||||
|
|
||||||
pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`]
|
pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`]
|
||||||
|
|
||||||
|
@ -255,9 +256,10 @@ fn (c Checker) check_quoted(q ast.Quoted) ? {
|
||||||
triple_quote := quote + quote + quote
|
triple_quote := quote + quote + quote
|
||||||
if q.is_multiline && lit.ends_with(triple_quote) {
|
if q.is_multiline && lit.ends_with(triple_quote) {
|
||||||
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
||||||
' string values like "$lit" is has unbalanced quote literals `q.quote` in ...${c.excerpt(q.pos)}...')
|
' string values like "$lit" has unbalanced quote literals `q.quote` in ...${c.excerpt(q.pos)}...')
|
||||||
}
|
}
|
||||||
c.check_quoted_escapes(q) ?
|
c.check_quoted_escapes(q) ?
|
||||||
|
c.check_utf8_validity(q) ?
|
||||||
}
|
}
|
||||||
|
|
||||||
// check_quoted_escapes returns an error for any disallowed escape sequences.
|
// check_quoted_escapes returns an error for any disallowed escape sequences.
|
||||||
|
@ -314,3 +316,37 @@ fn (c Checker) check_quoted_escapes(q ast.Quoted) ? {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check_utf8_string returns an error if `str` is not valid UTF8.
|
||||||
|
fn (c Checker) check_utf8_validity(q ast.Quoted) ? {
|
||||||
|
lit := q.text
|
||||||
|
if !utf8.validate_str(lit) {
|
||||||
|
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
||||||
|
' the string value "$lit" is not valid UTF-8 in ...${c.excerpt(q.pos)}...')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn (c Checker) check_comment(cmt ast.Comment) ? {
|
||||||
|
lit := cmt.text
|
||||||
|
// Setup a scanner in stack memory for easier navigation.
|
||||||
|
mut s := scanner.new_simple(lit) ?
|
||||||
|
for {
|
||||||
|
ch := s.next()
|
||||||
|
if ch == -1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
ch_byte := byte(ch)
|
||||||
|
// Check for control characters (allow TAB)
|
||||||
|
if util.is_illegal_ascii_control_character(ch_byte) {
|
||||||
|
st := s.state()
|
||||||
|
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
||||||
|
' control character `$ch_byte.hex()` is not allowed ($st.line_nr,$st.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(st.pos, 10)}...')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for bad UTF-8 encoding
|
||||||
|
if !utf8.validate_str(lit) {
|
||||||
|
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
||||||
|
' comment "$lit" is not valid UTF-8 in ...${c.excerpt(cmt.pos)}...')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -59,6 +59,9 @@ fn (mut p Parser) run_checker() ? {
|
||||||
scanner: p.scanner
|
scanner: p.scanner
|
||||||
}
|
}
|
||||||
chckr.check(p.root_map) ?
|
chckr.check(p.root_map) ?
|
||||||
|
for comment in p.ast_root.comments {
|
||||||
|
chckr.check_comment(comment) ?
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,8 +243,8 @@ pub fn (mut p Parser) root_table() ? {
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"')
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"')
|
||||||
match p.tok.kind {
|
match p.tok.kind {
|
||||||
.hash {
|
.hash {
|
||||||
// TODO table.comments << p.comment()
|
|
||||||
c := p.comment()
|
c := p.comment()
|
||||||
|
p.ast_root.comments << c
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"')
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"')
|
||||||
}
|
}
|
||||||
//.whitespace, .tab, .nl {
|
//.whitespace, .tab, .nl {
|
||||||
|
|
|
@ -174,9 +174,7 @@ pub fn (mut s Scanner) scan() ?token.Token {
|
||||||
return s.new_token(.quoted, ident_string, ident_string.len)
|
return s.new_token(.quoted, ident_string, ident_string.len)
|
||||||
}
|
}
|
||||||
`#` {
|
`#` {
|
||||||
start := s.pos //+ 1
|
hash := s.ignore_line() ?
|
||||||
s.ignore_line() ?
|
|
||||||
hash := s.text[start..s.pos]
|
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified comment hash "$hash" ($hash.len)')
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified comment hash "$hash" ($hash.len)')
|
||||||
return s.new_token(.hash, hash, hash.len + 1)
|
return s.new_token(.hash, hash, hash.len + 1)
|
||||||
}
|
}
|
||||||
|
@ -318,18 +316,14 @@ fn (mut s Scanner) new_token(kind token.Kind, lit string, len int) token.Token {
|
||||||
|
|
||||||
// ignore_line forwards the scanner to the end of the current line.
|
// ignore_line forwards the scanner to the end of the current line.
|
||||||
[direct_array_access; inline]
|
[direct_array_access; inline]
|
||||||
fn (mut s Scanner) ignore_line() ? {
|
fn (mut s Scanner) ignore_line() ?string {
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' ignoring until EOL')
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' ignoring until EOL...')
|
||||||
|
start := s.pos
|
||||||
for c := s.at(); c != -1 && c != `\n`; c = s.at() {
|
for c := s.at(); c != -1 && c != `\n`; c = s.at() {
|
||||||
// Check for control characters (allow TAB)
|
|
||||||
if util.is_illegal_ascii_control_character(c) {
|
|
||||||
return error(@MOD + '.' + @STRUCT + '.' + @FN +
|
|
||||||
' control character `$c.hex()` is not allowed ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...')
|
|
||||||
}
|
|
||||||
s.next()
|
s.next()
|
||||||
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()}"')
|
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()}"')
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
return s.text[start..s.pos]
|
||||||
}
|
}
|
||||||
|
|
||||||
// inc_line_number increases the internal line number.
|
// inc_line_number increases the internal line number.
|
||||||
|
|
|
@ -19,9 +19,6 @@ const (
|
||||||
'string/basic-out-of-range-unicode-escape-1.toml',
|
'string/basic-out-of-range-unicode-escape-1.toml',
|
||||||
'string/basic-out-of-range-unicode-escape-2.toml',
|
'string/basic-out-of-range-unicode-escape-2.toml',
|
||||||
'string/bad-uni-esc.toml',
|
'string/bad-uni-esc.toml',
|
||||||
// Encoding
|
|
||||||
'encoding/bad-utf8-in-comment.toml',
|
|
||||||
'encoding/bad-utf8-in-string.toml',
|
|
||||||
// Table
|
// Table
|
||||||
'table/rrbrace.toml',
|
'table/rrbrace.toml',
|
||||||
'table/duplicate-table-array2.toml',
|
'table/duplicate-table-array2.toml',
|
||||||
|
|
Loading…
Reference in New Issue