x/json2: fix decoder errors (#13655)

* x/json2: fix decoder errors, refactoring

* x/json2: add error-catching tests

* x/json2: add missing docs

* x/json2: fmt
pull/13661/head
Ned 2022-03-05 19:02:43 +08:00 committed by GitHub
parent 3ef437e679
commit 0e5ae7126f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 133 additions and 39 deletions

View File

@ -11,6 +11,80 @@ pub struct Null {
is_null bool = true is_null bool = true
} }
pub enum ValueKind {
unknown
array
object
string_
number
}
// str returns the string representation of the specific ValueKind
pub fn (k ValueKind) str() string {
return match k {
.unknown { 'unknown' }
.array { 'array' }
.object { 'object' }
.string_ { 'string' }
.number { 'number' }
}
}
fn format_message(msg string, line int, column int) string {
return '[x.json2] $msg ($line:$column)'
}
pub struct DecodeError {
line int
column int
message string
}
// code returns the error code of DecodeError
pub fn (err DecodeError) code() int {
return 3
}
// msg returns the message of the DecodeError
pub fn (err DecodeError) msg() string {
return format_message(err.message, err.line, err.column)
}
pub struct InvalidTokenError {
DecodeError
token Token
expected TokenKind
}
// code returns the error code of the InvalidTokenError
pub fn (err InvalidTokenError) code() int {
return 2
}
// msg returns the message of the InvalidTokenError
pub fn (err InvalidTokenError) msg() string {
footer_text := if err.expected != .none_ { ', expecting `$err.expected`' } else { '' }
return format_message('invalid token `$err.token.kind`$footer_text', err.token.line,
err.token.full_col())
}
pub struct UnknownTokenError {
DecodeError
token Token
kind ValueKind = .unknown
}
// code returns the error code of the UnknownTokenError
pub fn (err UnknownTokenError) code() int {
return 1
}
// msg returns the error message of the UnknownTokenError
pub fn (err UnknownTokenError) msg() string {
return format_message("unknown token '$err.token.lit' when decoding ${err.kind}.",
err.token.line, err.token.full_col())
}
struct Parser { struct Parser {
mut: mut:
scanner &Scanner scanner &Scanner
@ -21,14 +95,6 @@ mut:
convert_type bool = true convert_type bool = true
} }
struct InvalidTokenError {
MessageError
}
struct UnknownTokenError {
MessageError
}
fn (mut p Parser) next() { fn (mut p Parser) next() {
p.p_tok = p.tok p.p_tok = p.tok
p.tok = p.n_tok p.tok = p.n_tok
@ -38,16 +104,14 @@ fn (mut p Parser) next() {
fn (mut p Parser) next_with_err() ? { fn (mut p Parser) next_with_err() ? {
p.next() p.next()
if p.tok.kind == .error { if p.tok.kind == .error {
return error(p.emit_error(p.tok.lit.bytestr())) return IError(DecodeError{
line: p.tok.line
column: p.tok.full_col()
message: p.tok.lit.bytestr()
})
} }
} }
fn (p Parser) emit_error(msg string) string {
line := p.tok.line
column := p.tok.col + p.tok.lit.len
return '[x.json2] $msg ($line:$column)'
}
// TODO: copied from v.util to avoid the entire module and its functions // TODO: copied from v.util to avoid the entire module and its functions
// from being imported. remove later once -skip-unused is enabled by default. // from being imported. remove later once -skip-unused is enabled by default.
fn skip_bom(file_content string) string { fn skip_bom(file_content string) string {
@ -81,8 +145,8 @@ fn (mut p Parser) decode() ?Any {
p.next_with_err() ? p.next_with_err() ?
fi := p.decode_value() ? fi := p.decode_value() ?
if p.tok.kind != .eof { if p.tok.kind != .eof {
return IError(&InvalidTokenError{ return IError(InvalidTokenError{
msg: p.emit_error('invalid token `$p.tok.kind`') token: p.tok
}) })
} }
return fi return fi
@ -90,7 +154,9 @@ fn (mut p Parser) decode() ?Any {
fn (mut p Parser) decode_value() ?Any { fn (mut p Parser) decode_value() ?Any {
if p.n_level + 1 == 500 { if p.n_level + 1 == 500 {
return error(p.emit_error('reached maximum nesting level of 500')) return IError(DecodeError{
message: 'reached maximum nesting level of 500'
})
} }
match p.tok.kind { match p.tok.kind {
.lsbr { .lsbr {
@ -134,8 +200,8 @@ fn (mut p Parser) decode_value() ?Any {
return Any(str) return Any(str)
} }
else { else {
return IError(&InvalidTokenError{ return IError(InvalidTokenError{
msg: p.emit_error('invalid token `$p.tok.kind`') token: p.tok
}) })
} }
} }
@ -152,16 +218,15 @@ fn (mut p Parser) decode_array() ?Any {
items << item items << item
if p.tok.kind == .comma { if p.tok.kind == .comma {
p.next_with_err() ? p.next_with_err() ?
if p.tok.kind == .rsbr || p.tok.kind == .rcbr { if p.tok.kind == .rsbr {
return IError(&InvalidTokenError{ return IError(InvalidTokenError{
msg: p.emit_error('invalid token `$p.tok.lit') token: p.tok
}) })
} }
} else if p.tok.kind == .rsbr { } else if p.tok.kind != .rsbr {
break return IError(UnknownTokenError{
} else { token: p.tok
return IError(&UnknownTokenError{ kind: .array
msg: p.emit_error("unknown token '$p.tok.lit' when decoding array.")
}) })
} }
} }
@ -175,23 +240,31 @@ fn (mut p Parser) decode_object() ?Any {
p.next_with_err() ? p.next_with_err() ?
p.n_level++ p.n_level++
for p.tok.kind != .rcbr { for p.tok.kind != .rcbr {
is_key := p.tok.kind == .str_ && p.n_tok.kind == .colon if p.tok.kind != .str_ {
if !is_key { return IError(InvalidTokenError{
return IError(&InvalidTokenError{ token: p.tok
msg: p.emit_error('invalid token `$p.tok.kind`, expecting `str_`') expected: .str_
}) })
} }
cur_key := p.tok.lit.bytestr() cur_key := p.tok.lit.bytestr()
p.next_with_err() ? p.next_with_err() ?
p.next_with_err() ? if p.tok.kind != .colon {
fields[cur_key] = p.decode_value() ? return IError(InvalidTokenError{
if p.tok.kind == .comma { token: p.tok
p.next_with_err() ? expected: .colon
if p.tok.kind != .str_ {
return IError(&UnknownTokenError{
msg: p.emit_error("unknown token '$p.tok.lit' when decoding object.")
}) })
} }
p.next_with_err() ?
fields[cur_key] = p.decode_value() ?
if p.tok.kind != .comma && p.tok.kind != .rcbr {
return IError(UnknownTokenError{
token: p.tok
kind: .object
})
} else if p.tok.kind == .comma {
p.next_with_err() ?
} }
} }
p.next_with_err() ? p.next_with_err() ?

View File

@ -66,3 +66,19 @@ fn test_nested_array_object() ? {
decoded := parser.decode() ? decoded := parser.decode() ?
assert parser.n_level == 0 assert parser.n_level == 0
} }
fn test_raw_decode_map_invalid() ? {
raw_decode('{"name","Bob","age":20}') or {
assert err.msg() == '[x.json2] invalid token `comma`, expecting `colon` (0:5)'
return
}
assert false
}
fn test_raw_decode_array_invalid() ? {
raw_decode('["Foo", 1,}') or {
assert err.msg() == '[x.json2] invalid token `rcbr` (0:5)'
return
}
assert false
}

View File

@ -30,13 +30,18 @@ enum TokenKind {
rcbr = 125 rcbr = 125
} }
struct Token { pub struct Token {
lit []byte lit []byte
kind TokenKind kind TokenKind
line int line int
col int col int
} }
// full_col returns the full column information which includes the length
pub fn (t Token) full_col() int {
return t.col + t.lit.len
}
const ( const (
// list of characters commonly used in JSON. // list of characters commonly used in JSON.
char_list = [`{`, `}`, `[`, `]`, `,`, `:`] char_list = [`{`, `}`, `[`, `]`, `,`, `:`]