x.json2: add fast_raw_decode; support for i64 and f32 (#6586)

pull/6593/head
Ned Palacios 2020-10-09 22:11:55 +08:00 committed by GitHub
parent f734f8167b
commit fc375a40f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 200 additions and 162 deletions

View File

@ -162,7 +162,7 @@ pub fn (d Doc) get_name(stmt ast.Stmt) string {
ast.InterfaceDecl { return stmt.name } ast.InterfaceDecl { return stmt.name }
ast.TypeDecl { return d.get_type_name(stmt) } ast.TypeDecl { return d.get_type_name(stmt) }
ast.ConstDecl { return 'Constants' } ast.ConstDecl { return 'Constants' }
else { return '' } else { return typeof(stmt) }
} }
} }

View File

@ -11,7 +11,7 @@ import v.util
import v.pref import v.pref
// `Any` is a sum type that lists the possible types to be decoded and used. // `Any` is a sum type that lists the possible types to be decoded and used.
pub type Any = string | int | f64 | any_int | any_float | bool | Null | []Any | map[string]Any pub type Any = string | int | i64 | f32 | f64 | any_int | any_float | bool | Null | []Any | map[string]Any
// `Null` struct is a simple representation of the `null` value in JSON. // `Null` struct is a simple representation of the `null` value in JSON.
pub struct Null {} pub struct Null {}
@ -36,16 +36,15 @@ mut:
p_tok token.Token p_tok token.Token
tok token.Token tok token.Token
n_tok token.Token n_tok token.Token
nn_tok token.Token
mode ParseMode = .invalid mode ParseMode = .invalid
n_level int n_level int
convert_type bool = true
} }
fn (mut p Parser) next() { fn (mut p Parser) next() {
p.p_tok = p.tok p.p_tok = p.tok
p.tok = p.n_tok p.tok = p.n_tok
p.n_tok = p.nn_tok p.n_tok = p.scanner.scan()
p.nn_tok = p.scanner.scan()
} }
fn (p Parser) emit_error(msg string) string { fn (p Parser) emit_error(msg string) string {
@ -53,18 +52,20 @@ fn (p Parser) emit_error(msg string) string {
cur := p.tok cur := p.tok
mut pp := util.imax(0, util.imin(source.len - 1, cur.pos)) mut pp := util.imax(0, util.imin(source.len - 1, cur.pos))
if source.len > 0 { if source.len > 0 {
for ; pp >= 0; pp-- { for pp >= 0 {
if source[pp] == `\r` || source[pp] == `\n` { if source[pp] !in [`\r`, `\n`] {
break pp--
continue
} }
break
} }
} }
column := util.imax(0, cur.pos - pp + cur.len - 1) column := util.imax(0, cur.pos - pp + cur.len - 1)
line := cur.line_nr line := cur.line_nr
return '[jisoni] ' + msg + ' (At line $line, column $column)' return '[jisoni] $msg ($line:$column)'
} }
fn new_parser(srce string) Parser { fn new_parser(srce string, convert_type bool) Parser {
mut src := srce mut src := srce
// from v/util/util.v // from v/util/util.v
if src.len >= 3 { if src.len >= 3 {
@ -77,23 +78,35 @@ fn new_parser(srce string) Parser {
} }
} }
} }
mut p := Parser{ return Parser{
scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{}), scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{}),
convert_type: convert_type
} }
return p
} }
fn check_valid_hex(str string) ?bool { fn check_valid_hex(str string) ? {
if str.len != 4 { if str.len != 4 {
return error('Hex string must be 4 characters.') return error('hex string must be 4 characters.')
} }
for l in str { for l in str {
if l.is_hex_digit() { continue } if l.is_hex_digit() { continue }
return error('Provided string is not a hex digit.') return error('provided string is not a hex digit.')
} }
}
return true fn (mut p Parser) decode() ?Any {
p.detect_parse_mode()
if p.mode == .invalid {
return error(p.emit_error('invalid JSON.'))
}
fi := p.decode_value() or {
return error(p.emit_error(err))
}
if p.tok.kind != .eof {
return error(p.emit_error('unknown token `$p.tok.kind`.'))
}
return fi
} }
fn (p Parser) is_formfeed() bool { fn (p Parser) is_formfeed() bool {
@ -121,7 +134,6 @@ fn (mut p Parser) detect_parse_mode() {
p.tok = p.scanner.scan() p.tok = p.scanner.scan()
p.n_tok = p.scanner.scan() p.n_tok = p.scanner.scan()
p.nn_tok = p.scanner.scan()
if src.len == 1 && p.tok.kind == .string && p.n_tok.kind == .eof { if src.len == 1 && p.tok.kind == .string && p.n_tok.kind == .eof {
p.mode == .invalid p.mode == .invalid
@ -149,88 +161,76 @@ fn (mut p Parser) detect_parse_mode() {
} }
fn (mut p Parser) decode_value() ?Any { fn (mut p Parser) decode_value() ?Any {
mut fi := Any{} if p.n_level == 500 {
return error('reached maximum nesting level of 500.')
}
if (p.tok.kind == .lsbr && p.n_tok.kind == .lcbr) || (p.p_tok.kind == p.tok.kind && p.tok.kind == .lsbr) { if (p.tok.kind == .lsbr && p.n_tok.kind == .lcbr) || (p.p_tok.kind == p.tok.kind && p.tok.kind == .lsbr) {
p.n_level++ p.n_level++
} }
if p.n_level == 500 {
return error('Reached maximum nesting level of 500.')
}
match p.tok.kind { match p.tok.kind {
.lsbr { .lsbr {
item := p.decode_array()? return p.decode_array()
fi = item
} }
.lcbr { .lcbr {
item := p.decode_object()? return p.decode_object()
fi = item
} }
.number { .number {
item := p.decode_number()? return p.decode_number()
fi = item
} }
.key_true { .key_true {
fi = Any(true) p.next()
return if p.convert_type { Any(true) } else { Any('true') }
} }
.key_false { .key_false {
fi = Any(false) p.next()
return if p.convert_type { Any(false) } else { Any('false') }
} }
.name { .name {
if p.tok.lit != 'null' { if p.tok.lit != 'null' {
return error('Unknown identifier `$p.tok.lit`') return error('unknown identifier `$p.tok.lit`')
} }
fi = Any(Null{}) p.next()
return if p.convert_type { Any(Null{}) } else { Any('null') }
} }
.string { .string {
if p.is_singlequote() { if p.is_singlequote() {
return error('Strings must be in double-quotes.') return error('strings must be in double-quotes.')
} }
item := p.decode_string() or { return p.decode_string()
return error(err)
}
fi = item
} }
else { else {
if p.tok.kind == .minus && p.n_tok.kind == .number && p.n_tok.pos == p.tok.pos+1 { if p.tok.kind == .minus && p.n_tok.kind == .number && p.n_tok.pos == p.tok.pos+1 {
p.next() p.next()
d_num := p.decode_number() or { d_num := p.decode_number()?
return error(err) return d_num
}
p.next()
fi = d_num
return fi
} }
return error('[decode_value] Unknown token `$p.tok.lit`') return error('unknown token \'$p.tok.lit\' when decoding value')
} }
} }
p.next()
if p.is_formfeed() { if p.is_formfeed() {
return error(formfeed_err) return error(formfeed_err)
} }
return fi return Any{}
} }
fn (mut p Parser) decode_string() ?Any { fn (mut p Parser) decode_string() ?Any {
mut strwr := strings.new_builder(200) mut strwr := strings.new_builder(200)
mut fi := Any{}
for i := 0; i < p.tok.lit.len; i++ { for i := 0; i < p.tok.lit.len; i++ {
// s := p.tok.lit[i].str() // s := p.tok.lit[i].str()
// println('$i $s') // println('$i $s')
if ((i-1 >= 0 && p.tok.lit[i-1] != `/`) || i == 0) && int(p.tok.lit[i]) in [9, 10, 0] { if ((i-1 >= 0 && p.tok.lit[i-1] != `/`) || i == 0) && int(p.tok.lit[i]) in [9, 10, 0] {
return error('Character must be escaped with a backslash.') return error('character must be escaped with a backslash.')
} }
if i == p.tok.lit.len-1 && p.tok.lit[i] == 92 { if i == p.tok.lit.len-1 && p.tok.lit[i] == 92 {
return error('Invalid backslash escape.') return error('invalid backslash escape.')
} }
if i+1 < p.tok.lit.len && p.tok.lit[i] == 92 { if i+1 < p.tok.lit.len && p.tok.lit[i] == 92 {
@ -239,15 +239,13 @@ fn (mut p Parser) decode_string() ?Any {
if peek == `u` { if peek == `u` {
if i+5 < p.tok.lit.len { if i+5 < p.tok.lit.len {
codepoint := p.tok.lit[i+2..i+6] codepoint := p.tok.lit[i+2..i+6]
check_valid_hex(codepoint) or { check_valid_hex(codepoint)?
return error(err)
}
hex_val := strconv.parse_int(codepoint, 16, 0) hex_val := strconv.parse_int(codepoint, 16, 0)
strwr.write_b(byte(hex_val)) strwr.write_b(byte(hex_val))
i += 5 i += 5
continue continue
} else { } else {
return error('Incomplete unicode escape.') return error('incomplete unicode escape.')
} }
} }
@ -255,24 +253,29 @@ fn (mut p Parser) decode_string() ?Any {
strwr.write_b(p.tok.lit[i]) strwr.write_b(p.tok.lit[i])
continue continue
} else { } else {
return error('Invalid backslash escape.') return error('invalid backslash escape.')
} }
if peek == 85 { if int(peek) == 85 {
return error('Unicode endpoints must be in lowercase `u`.') return error('unicode endpoints must be in lowercase `u`.')
} }
if int(peek) in [9, 229] { if int(peek) in [9, 229] {
return error('Unicode endpoint not allowed.') return error('unicode endpoint not allowed.')
} }
} }
strwr.write_b(p.tok.lit[i]) strwr.write_b(p.tok.lit[i])
} }
fi = strwr.str() p.next()
return fi defer {
strwr.free()
}
str := strwr.str()
return Any(str)
} }
// now returns string instead of int or float
fn (mut p Parser) decode_number() ?Any { fn (mut p Parser) decode_number() ?Any {
src := p.scanner.text src := p.scanner.text
mut tl := p.tok.lit mut tl := p.tok.lit
@ -280,37 +283,46 @@ fn (mut p Parser) decode_number() ?Any {
sep_by_dot := tl.to_lower().split('.') sep_by_dot := tl.to_lower().split('.')
if tl.starts_with('0x') && tl.all_after('0x').len <= 2 { if tl.starts_with('0x') && tl.all_after('0x').len <= 2 {
return error('Hex numbers should not be less than or equal to two digits.') return error('hex numbers should not be less than or equal to two digits.')
} }
if src[p.p_tok.pos + p.p_tok.len] == `0` && src[p.p_tok.pos + p.p_tok.len + 1].is_digit() { if src[p.p_tok.pos + p.p_tok.len] == `0` && src[p.p_tok.pos + p.p_tok.len + 1].is_digit() {
return error('Leading zeroes in integers are not allowed.') return error('leading zeroes in integers are not allowed.')
} }
if tl.starts_with('.') { if tl.starts_with('.') {
return error('Decimals must start with a digit followed by a dot.') return error('lecimals must start with a digit followed by a dot.')
} }
if tl.ends_with('+') || tl.ends_with('-') { if tl.ends_with('+') || tl.ends_with('-') {
return error('Exponents must have a digit before the sign.') return error('exponents must have a digit before the sign.')
} }
if sep_by_dot.len > 1 { if sep_by_dot.len > 1 {
// analyze json number structure // analyze json number structure
// -[digit][dot][digit][E/e][-/+][digit] // -[digit][dot][digit][E/e][-/+][digit]
// float number
is_fl = true is_fl = true
last := sep_by_dot.last() last := sep_by_dot.last()
if last.starts_with('e') { if last.starts_with('e') {
return error('Exponents must have a digit before the exponent notation.') return error('exponents must have a digit before the exponent notation.')
} }
} }
if p.p_tok.kind == .minus && p.tok.pos == p.p_tok.pos+1 { if p.p_tok.kind == .minus && p.tok.pos == p.p_tok.pos+1 {
tl = '-' + tl tl = '-$tl'
} }
return if is_fl { Any(tl.f64()) } else { Any(tl.int()) } p.next()
if p.convert_type {
return if is_fl {
Any(tl.f64())
} else {
Any(tl.int())
}
}
return Any(tl)
} }
fn (mut p Parser) decode_array() ?Any { fn (mut p Parser) decode_array() ?Any {
@ -321,10 +333,7 @@ fn (mut p Parser) decode_array() ?Any {
return error(eof_err) return error(eof_err)
} }
item := p.decode_value() or { item := p.decode_value()?
return error(err)
}
items << item items << item
if p.tok.kind == .comma && p.n_tok.kind !in [.rsbr, .comma] { if p.tok.kind == .comma && p.n_tok.kind !in [.rsbr, .comma] {
p.next() p.next()
@ -335,16 +344,15 @@ fn (mut p Parser) decode_array() ?Any {
break break
} }
return error('Unknown token `$p.tok.lit` when decoding arrays.') return error('unknown token \'$p.tok.lit\' when decoding arrays.')
} }
p.next()
return Any(items) return Any(items)
} }
fn (mut p Parser) decode_object() ?Any { fn (mut p Parser) decode_object() ?Any {
mut fields := map[string]Any mut fields := map[string]Any
mut cur_key := '' mut cur_key := ''
p.next() p.next()
for p.tok.kind != .rcbr { for p.tok.kind != .rcbr {
@ -360,33 +368,27 @@ fn (mut p Parser) decode_object() ?Any {
} }
if p.is_singlequote() { if p.is_singlequote() {
return error('Object keys must be in single quotes.') return error('object keys must be in single quotes.')
} }
if !is_key { if !is_key {
return error('Invalid token `$p.tok.lit`, expected `string`') return error('invalid token `$p.tok.lit`, expected \'string\'')
} }
cur_key = p.tok.lit cur_key = p.tok.lit
p.next() p.next()
p.next() p.next()
item := p.decode_value() or { fields[cur_key] = p.decode_value()?
return error(err)
}
fields[cur_key] = item
if p.tok.kind == .comma && p.n_tok.kind !in [.rcbr, .comma] { if p.tok.kind == .comma && p.n_tok.kind !in [.rcbr, .comma] {
p.next() p.next()
continue continue
} } else if p.tok.kind == .rcbr {
if p.tok.kind == .rcbr {
break break
} }
return error('Unknown token `$p.tok.lit` when decoding object.') return error('unknown token \'$p.tok.lit\' when decoding object.')
} }
p.next()
return Any(fields) return Any(fields)
} }

View File

@ -4,44 +4,54 @@
module json2 module json2
import strings import strings
fn write_value(v Any, i int, len int, mut wr strings.Builder) {
str := v.str()
wr.write(if v is string { '"$str"' } else { str })
if i >= len-1 { return }
wr.write_b(`,`)
}
// String representation of the `map[string]Any`. // String representation of the `map[string]Any`.
pub fn (flds map[string]Any) str() string { pub fn (flds map[string]Any) str() string {
mut wr := strings.new_builder(200) mut wr := strings.new_builder(200)
wr.write('{') wr.write_b(`{`)
mut i := 0 mut i := 0
for k, v in flds { for k, v in flds {
wr.write('"$k":') wr.write('"$k":')
if v is string { write_value(v, i, flds.len, mut wr)
wr.write('"' + *v + '"')
} else {
wr.write(v.str())
}
if i < flds.len-1 { wr.write(',') }
i++ i++
} }
wr.write('}') wr.write_b(`}`)
return wr.str() defer {
wr.free()
}
res := wr.str()
return res
} }
// String representation of the `[]Any`. // String representation of the `[]Any`.
pub fn (flds []Any) str() string { pub fn (flds []Any) str() string {
mut wr := strings.new_builder(200) mut wr := strings.new_builder(200)
wr.write('[') wr.write_b(`[`)
for i, v in flds { for i, v in flds {
if v is string { write_value(v, i, flds.len, mut wr)
wr.write('"' + *v + '"')
} else {
wr.write(v.str())
}
if i < flds.len-1 { wr.write(',') }
} }
wr.write(']') wr.write_b(`]`)
return wr.str() defer {
wr.free()
}
res := wr.str()
return res
} }
// String representation of the `Any` type. // String representation of the `Any` type.
pub fn (f Any) str() string { pub fn (f Any) str() string {
match f { match f {
string { return *f } string { return *f }
int { return (*f).str() } int { return (*f).str() }
i64 { return (*f).str() }
f32 { return (*f).str() }
f64 { return (*f).str() } f64 { return (*f).str() }
any_int { return (*f).str() } any_int { return (*f).str() }
any_float { return (*f).str() } any_float { return (*f).str() }

View File

@ -10,22 +10,14 @@ pub interface Serializable {
// Decodes a JSON string into an `Any` type. Returns an option. // Decodes a JSON string into an `Any` type. Returns an option.
pub fn raw_decode(src string) ?Any { pub fn raw_decode(src string) ?Any {
mut p := new_parser(src) mut p := new_parser(src, true)
p.detect_parse_mode() return p.decode()
}
if p.mode == .invalid { // Same with `raw_decode`, but skips the type conversion for certain types when decoding a certain value.
return error(p.emit_error('Invalid JSON.')) pub fn fast_raw_decode(src string) ?Any {
} mut p := new_parser(src, false)
return p.decode()
fi := p.decode_value() or {
return error(p.emit_error(err))
}
if p.tok.kind != .eof {
return error(p.emit_error('Unknown token `$p.tok.kind`.'))
}
return fi
} }
// A generic function that decodes a JSON string into the target type. // A generic function that decodes a JSON string into the target type.
// //
@ -38,8 +30,33 @@ pub fn decode<T>(src string) T {
typ.from_json(res) typ.from_json(res)
return typ return typ
} }
// TODO: decode must return an optional generics
// pub fn decode2<T>(src string) ?T {
// res := raw_decode(src)?
// match typeof(T) {
// 'string' {
// return res.str()
// }
// 'int' {
// return res.int()
// }
// 'f64' {
// return res.f64()
// }
// else {
// mut typ := T{}
// typ.from_json(res)
// return typ
// }
// }
// }
// A generic function that encodes a type into a JSON string. // A generic function that encodes a type into a JSON string.
pub fn encode<T>(typ T) string { pub fn encode<T>(typ T) string {
// if typeof(typ) in ['string', 'int', 'f64'] {
// return Any(typ).str()
// }
return typ.to_json() return typ.to_json()
} }
// A simple function that returns `Null` struct. For use on constructing an `Any` object. // A simple function that returns `Null` struct. For use on constructing an `Any` object.
@ -48,60 +65,61 @@ pub fn null() Null {
} }
// Use `Any` as a map. // Use `Any` as a map.
pub fn (f Any) as_map() map[string]Any { pub fn (f Any) as_map() map[string]Any {
mut mp := map[string]Any if f is map[string]Any {
return *f
match f { } else if f is []Any {
map[string]Any { mut mp := map[string]Any
return *f arr := f
} for i, fi in arr {
string { mp['$i'] = fi
mp['0'] = f
return mp
}
int {
mp['0'] = f
return mp
}
bool {
mp['0'] = f
return mp
}
f64 {
mp['0'] = f
return mp
}
Null {
mp['0'] = f
return mp
}
else {
if typeof(f) == 'array_Any' {
arr := f as []Any
for i, fi in arr {
mp[i.str()] = fi
}
return mp
}
return mp
} }
return mp
} }
return { '0': f }
} }
// Use `Any` as an integer. // Use `Any` as an integer.
pub fn (f Any) int() int { pub fn (f Any) int() int {
match f { match f {
int { return *f } int { return *f }
i64 { return int(*f) }
f64 { return f.str().int() } f64 { return f.str().int() }
f32 { return f.str().int() }
bool { return int(f) }
else { return 0 } else { return 0 }
} }
} }
// Use `Any` as a 64-bit integer.
pub fn (f Any) i64() i64 {
match f {
int { return *f }
i64 { return int(*f) }
f64 { return f.str().i64() }
f32 { return f.str().i64() }
bool { return int(f) }
else { return 0 }
}
}
// Use `Any` as a 32-bit float.
pub fn (f Any) f32() f32 {
match f {
int { return *f }
i64 { return (*f).str().f32() }
f64 { return (*f).str().f32() }
f32 { return *f }
else { return 0.0 }
}
}
// Use `Any` as a float. // Use `Any` as a float.
pub fn (f Any) f64() f64 { pub fn (f Any) f64() f64 {
match f { match f {
int { return *f } int { return *f }
i64 { return *f }
f64 { return *f } f64 { return *f }
f32 { return (*f).str().f64() }
else { return 0.0 } else { return 0.0 }
} }
} }
@ -109,9 +127,7 @@ pub fn (f Any) f64() f64 {
pub fn (f Any) arr() []Any { pub fn (f Any) arr() []Any {
if f is []Any { if f is []Any {
return *f return *f
} } else if f is map[string]Any {
if f is map[string]Any {
mut arr := []Any{} mut arr := []Any{}
mp := *f mp := *f
for _, v in mp { for _, v in mp {
@ -119,7 +135,6 @@ pub fn (f Any) arr() []Any {
} }
return arr return arr
} }
return [f] return [f]
} }

View File

@ -17,7 +17,7 @@ fn (e Employee) to_json() string {
mut mp := map[string]json2.Any mut mp := map[string]json2.Any
mp['name'] = e.name mp['name'] = e.name
mp['age'] = e.age mp['age'] = e.age
mp['salary'] = f64(e.salary) mp['salary'] = e.salary
mp['title'] = int(e.title) mp['title'] = int(e.title)
/* /*
@ -58,6 +58,17 @@ fn test_simple() {
assert ym['title'].int() == 2 assert ym['title'].int() == 2
} }
fn test_fast_raw_decode() {
s := '{"name":"Peter","age":28,"salary":95000.5,"title":2}'
o := json2.fast_raw_decode(s) or {
assert false
json2.Any{}
}
str := o.str()
assert str == '{"name":"Peter","age":"28","salary":"95000.5","title":"2"}'
}
/* /*
struct User2 { struct User2 {
age int age int