x.json2: add fast_raw_decode; support for i64 and f32 (#6586)

pull/6593/head
Ned Palacios 2020-10-09 22:11:55 +08:00 committed by GitHub
parent f734f8167b
commit fc375a40f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 200 additions and 162 deletions

View File

@ -162,7 +162,7 @@ pub fn (d Doc) get_name(stmt ast.Stmt) string {
ast.InterfaceDecl { return stmt.name }
ast.TypeDecl { return d.get_type_name(stmt) }
ast.ConstDecl { return 'Constants' }
else { return '' }
else { return typeof(stmt) }
}
}

View File

@ -11,7 +11,7 @@ import v.util
import v.pref
// `Any` is a sum type that lists the possible types to be decoded and used.
pub type Any = string | int | f64 | any_int | any_float | bool | Null | []Any | map[string]Any
pub type Any = string | int | i64 | f32 | f64 | any_int | any_float | bool | Null | []Any | map[string]Any
// `Null` struct is a simple representation of the `null` value in JSON.
pub struct Null {}
@ -36,16 +36,15 @@ mut:
p_tok token.Token
tok token.Token
n_tok token.Token
nn_tok token.Token
mode ParseMode = .invalid
n_level int
convert_type bool = true
}
fn (mut p Parser) next() {
p.p_tok = p.tok
p.tok = p.n_tok
p.n_tok = p.nn_tok
p.nn_tok = p.scanner.scan()
p.n_tok = p.scanner.scan()
}
fn (p Parser) emit_error(msg string) string {
@ -53,18 +52,20 @@ fn (p Parser) emit_error(msg string) string {
cur := p.tok
mut pp := util.imax(0, util.imin(source.len - 1, cur.pos))
if source.len > 0 {
for ; pp >= 0; pp-- {
if source[pp] == `\r` || source[pp] == `\n` {
break
for pp >= 0 {
if source[pp] !in [`\r`, `\n`] {
pp--
continue
}
break
}
}
column := util.imax(0, cur.pos - pp + cur.len - 1)
line := cur.line_nr
return '[jisoni] ' + msg + ' (At line $line, column $column)'
return '[jisoni] $msg ($line:$column)'
}
fn new_parser(srce string) Parser {
fn new_parser(srce string, convert_type bool) Parser {
mut src := srce
// from v/util/util.v
if src.len >= 3 {
@ -77,23 +78,35 @@ fn new_parser(srce string) Parser {
}
}
}
mut p := Parser{
return Parser{
scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{}),
convert_type: convert_type
}
return p
}
fn check_valid_hex(str string) ?bool {
fn check_valid_hex(str string) ? {
if str.len != 4 {
return error('Hex string must be 4 characters.')
return error('hex string must be 4 characters.')
}
for l in str {
if l.is_hex_digit() { continue }
return error('Provided string is not a hex digit.')
return error('provided string is not a hex digit.')
}
}
return true
fn (mut p Parser) decode() ?Any {
p.detect_parse_mode()
if p.mode == .invalid {
return error(p.emit_error('invalid JSON.'))
}
fi := p.decode_value() or {
return error(p.emit_error(err))
}
if p.tok.kind != .eof {
return error(p.emit_error('unknown token `$p.tok.kind`.'))
}
return fi
}
fn (p Parser) is_formfeed() bool {
@ -121,7 +134,6 @@ fn (mut p Parser) detect_parse_mode() {
p.tok = p.scanner.scan()
p.n_tok = p.scanner.scan()
p.nn_tok = p.scanner.scan()
if src.len == 1 && p.tok.kind == .string && p.n_tok.kind == .eof {
p.mode == .invalid
@ -149,88 +161,76 @@ fn (mut p Parser) detect_parse_mode() {
}
fn (mut p Parser) decode_value() ?Any {
mut fi := Any{}
if p.n_level == 500 {
return error('reached maximum nesting level of 500.')
}
if (p.tok.kind == .lsbr && p.n_tok.kind == .lcbr) || (p.p_tok.kind == p.tok.kind && p.tok.kind == .lsbr) {
p.n_level++
}
if p.n_level == 500 {
return error('Reached maximum nesting level of 500.')
}
match p.tok.kind {
.lsbr {
item := p.decode_array()?
fi = item
return p.decode_array()
}
.lcbr {
item := p.decode_object()?
fi = item
return p.decode_object()
}
.number {
item := p.decode_number()?
fi = item
return p.decode_number()
}
.key_true {
fi = Any(true)
p.next()
return if p.convert_type { Any(true) } else { Any('true') }
}
.key_false {
fi = Any(false)
p.next()
return if p.convert_type { Any(false) } else { Any('false') }
}
.name {
if p.tok.lit != 'null' {
return error('Unknown identifier `$p.tok.lit`')
return error('unknown identifier `$p.tok.lit`')
}
fi = Any(Null{})
p.next()
return if p.convert_type { Any(Null{}) } else { Any('null') }
}
.string {
if p.is_singlequote() {
return error('Strings must be in double-quotes.')
return error('strings must be in double-quotes.')
}
item := p.decode_string() or {
return error(err)
}
fi = item
return p.decode_string()
}
else {
if p.tok.kind == .minus && p.n_tok.kind == .number && p.n_tok.pos == p.tok.pos+1 {
p.next()
d_num := p.decode_number() or {
return error(err)
}
p.next()
fi = d_num
return fi
d_num := p.decode_number()?
return d_num
}
return error('[decode_value] Unknown token `$p.tok.lit`')
return error('unknown token \'$p.tok.lit\' when decoding value')
}
}
p.next()
if p.is_formfeed() {
return error(formfeed_err)
}
return fi
return Any{}
}
fn (mut p Parser) decode_string() ?Any {
mut strwr := strings.new_builder(200)
mut fi := Any{}
for i := 0; i < p.tok.lit.len; i++ {
// s := p.tok.lit[i].str()
// println('$i $s')
if ((i-1 >= 0 && p.tok.lit[i-1] != `/`) || i == 0) && int(p.tok.lit[i]) in [9, 10, 0] {
return error('Character must be escaped with a backslash.')
return error('character must be escaped with a backslash.')
}
if i == p.tok.lit.len-1 && p.tok.lit[i] == 92 {
return error('Invalid backslash escape.')
return error('invalid backslash escape.')
}
if i+1 < p.tok.lit.len && p.tok.lit[i] == 92 {
@ -239,15 +239,13 @@ fn (mut p Parser) decode_string() ?Any {
if peek == `u` {
if i+5 < p.tok.lit.len {
codepoint := p.tok.lit[i+2..i+6]
check_valid_hex(codepoint) or {
return error(err)
}
check_valid_hex(codepoint)?
hex_val := strconv.parse_int(codepoint, 16, 0)
strwr.write_b(byte(hex_val))
i += 5
continue
} else {
return error('Incomplete unicode escape.')
return error('incomplete unicode escape.')
}
}
@ -255,24 +253,29 @@ fn (mut p Parser) decode_string() ?Any {
strwr.write_b(p.tok.lit[i])
continue
} else {
return error('Invalid backslash escape.')
return error('invalid backslash escape.')
}
if peek == 85 {
return error('Unicode endpoints must be in lowercase `u`.')
if int(peek) == 85 {
return error('unicode endpoints must be in lowercase `u`.')
}
if int(peek) in [9, 229] {
return error('Unicode endpoint not allowed.')
return error('unicode endpoint not allowed.')
}
}
strwr.write_b(p.tok.lit[i])
}
fi = strwr.str()
return fi
p.next()
defer {
strwr.free()
}
str := strwr.str()
return Any(str)
}
// now returns string instead of int or float
fn (mut p Parser) decode_number() ?Any {
src := p.scanner.text
mut tl := p.tok.lit
@ -280,37 +283,46 @@ fn (mut p Parser) decode_number() ?Any {
sep_by_dot := tl.to_lower().split('.')
if tl.starts_with('0x') && tl.all_after('0x').len <= 2 {
return error('Hex numbers should not be less than or equal to two digits.')
return error('hex numbers should not be less than or equal to two digits.')
}
if src[p.p_tok.pos + p.p_tok.len] == `0` && src[p.p_tok.pos + p.p_tok.len + 1].is_digit() {
return error('Leading zeroes in integers are not allowed.')
return error('leading zeroes in integers are not allowed.')
}
if tl.starts_with('.') {
return error('Decimals must start with a digit followed by a dot.')
return error('lecimals must start with a digit followed by a dot.')
}
if tl.ends_with('+') || tl.ends_with('-') {
return error('Exponents must have a digit before the sign.')
return error('exponents must have a digit before the sign.')
}
if sep_by_dot.len > 1 {
// analyze json number structure
// -[digit][dot][digit][E/e][-/+][digit]
// float number
is_fl = true
last := sep_by_dot.last()
if last.starts_with('e') {
return error('Exponents must have a digit before the exponent notation.')
return error('exponents must have a digit before the exponent notation.')
}
}
if p.p_tok.kind == .minus && p.tok.pos == p.p_tok.pos+1 {
tl = '-' + tl
tl = '-$tl'
}
return if is_fl { Any(tl.f64()) } else { Any(tl.int()) }
p.next()
if p.convert_type {
return if is_fl {
Any(tl.f64())
} else {
Any(tl.int())
}
}
return Any(tl)
}
fn (mut p Parser) decode_array() ?Any {
@ -321,10 +333,7 @@ fn (mut p Parser) decode_array() ?Any {
return error(eof_err)
}
item := p.decode_value() or {
return error(err)
}
item := p.decode_value()?
items << item
if p.tok.kind == .comma && p.n_tok.kind !in [.rsbr, .comma] {
p.next()
@ -335,16 +344,15 @@ fn (mut p Parser) decode_array() ?Any {
break
}
return error('Unknown token `$p.tok.lit` when decoding arrays.')
return error('unknown token \'$p.tok.lit\' when decoding arrays.')
}
p.next()
return Any(items)
}
fn (mut p Parser) decode_object() ?Any {
mut fields := map[string]Any
mut cur_key := ''
p.next()
for p.tok.kind != .rcbr {
@ -360,33 +368,27 @@ fn (mut p Parser) decode_object() ?Any {
}
if p.is_singlequote() {
return error('Object keys must be in single quotes.')
return error('object keys must be in single quotes.')
}
if !is_key {
return error('Invalid token `$p.tok.lit`, expected `string`')
return error('invalid token `$p.tok.lit`, expected \'string\'')
}
cur_key = p.tok.lit
p.next()
p.next()
item := p.decode_value() or {
return error(err)
}
fields[cur_key] = item
fields[cur_key] = p.decode_value()?
if p.tok.kind == .comma && p.n_tok.kind !in [.rcbr, .comma] {
p.next()
continue
}
if p.tok.kind == .rcbr {
} else if p.tok.kind == .rcbr {
break
}
return error('Unknown token `$p.tok.lit` when decoding object.')
return error('unknown token \'$p.tok.lit\' when decoding object.')
}
p.next()
return Any(fields)
}

View File

@ -4,44 +4,54 @@
module json2
import strings
fn write_value(v Any, i int, len int, mut wr strings.Builder) {
str := v.str()
wr.write(if v is string { '"$str"' } else { str })
if i >= len-1 { return }
wr.write_b(`,`)
}
// String representation of the `map[string]Any`.
pub fn (flds map[string]Any) str() string {
mut wr := strings.new_builder(200)
wr.write('{')
wr.write_b(`{`)
mut i := 0
for k, v in flds {
wr.write('"$k":')
if v is string {
wr.write('"' + *v + '"')
} else {
wr.write(v.str())
}
if i < flds.len-1 { wr.write(',') }
write_value(v, i, flds.len, mut wr)
i++
}
wr.write('}')
return wr.str()
wr.write_b(`}`)
defer {
wr.free()
}
res := wr.str()
return res
}
// String representation of the `[]Any`.
pub fn (flds []Any) str() string {
mut wr := strings.new_builder(200)
wr.write('[')
wr.write_b(`[`)
for i, v in flds {
if v is string {
wr.write('"' + *v + '"')
} else {
wr.write(v.str())
write_value(v, i, flds.len, mut wr)
}
if i < flds.len-1 { wr.write(',') }
wr.write_b(`]`)
defer {
wr.free()
}
wr.write(']')
return wr.str()
res := wr.str()
return res
}
// String representation of the `Any` type.
pub fn (f Any) str() string {
match f {
string { return *f }
int { return (*f).str() }
i64 { return (*f).str() }
f32 { return (*f).str() }
f64 { return (*f).str() }
any_int { return (*f).str() }
any_float { return (*f).str() }

View File

@ -10,22 +10,14 @@ pub interface Serializable {
// Decodes a JSON string into an `Any` type. Returns an option.
pub fn raw_decode(src string) ?Any {
mut p := new_parser(src)
p.detect_parse_mode()
if p.mode == .invalid {
return error(p.emit_error('Invalid JSON.'))
mut p := new_parser(src, true)
return p.decode()
}
fi := p.decode_value() or {
return error(p.emit_error(err))
}
if p.tok.kind != .eof {
return error(p.emit_error('Unknown token `$p.tok.kind`.'))
}
return fi
// Same with `raw_decode`, but skips the type conversion for certain types when decoding a certain value.
pub fn fast_raw_decode(src string) ?Any {
mut p := new_parser(src, false)
return p.decode()
}
// A generic function that decodes a JSON string into the target type.
//
@ -38,8 +30,33 @@ pub fn decode<T>(src string) T {
typ.from_json(res)
return typ
}
// TODO: decode must return an optional generics
// pub fn decode2<T>(src string) ?T {
// res := raw_decode(src)?
// match typeof(T) {
// 'string' {
// return res.str()
// }
// 'int' {
// return res.int()
// }
// 'f64' {
// return res.f64()
// }
// else {
// mut typ := T{}
// typ.from_json(res)
// return typ
// }
// }
// }
// A generic function that encodes a type into a JSON string.
pub fn encode<T>(typ T) string {
// if typeof(typ) in ['string', 'int', 'f64'] {
// return Any(typ).str()
// }
return typ.to_json()
}
// A simple function that returns `Null` struct. For use on constructing an `Any` object.
@ -48,60 +65,61 @@ pub fn null() Null {
}
// Use `Any` as a map.
pub fn (f Any) as_map() map[string]Any {
mut mp := map[string]Any
match f {
map[string]Any {
if f is map[string]Any {
return *f
}
string {
mp['0'] = f
return mp
}
int {
mp['0'] = f
return mp
}
bool {
mp['0'] = f
return mp
}
f64 {
mp['0'] = f
return mp
}
Null {
mp['0'] = f
return mp
}
else {
if typeof(f) == 'array_Any' {
arr := f as []Any
} else if f is []Any {
mut mp := map[string]Any
arr := f
for i, fi in arr {
mp[i.str()] = fi
mp['$i'] = fi
}
return mp
}
return mp
}
}
return { '0': f }
}
// Use `Any` as an integer.
pub fn (f Any) int() int {
match f {
int { return *f }
i64 { return int(*f) }
f64 { return f.str().int() }
f32 { return f.str().int() }
bool { return int(f) }
else { return 0 }
}
}
// Use `Any` as a 64-bit integer.
pub fn (f Any) i64() i64 {
match f {
int { return *f }
i64 { return int(*f) }
f64 { return f.str().i64() }
f32 { return f.str().i64() }
bool { return int(f) }
else { return 0 }
}
}
// Use `Any` as a 32-bit float.
pub fn (f Any) f32() f32 {
match f {
int { return *f }
i64 { return (*f).str().f32() }
f64 { return (*f).str().f32() }
f32 { return *f }
else { return 0.0 }
}
}
// Use `Any` as a float.
pub fn (f Any) f64() f64 {
match f {
int { return *f }
i64 { return *f }
f64 { return *f }
f32 { return (*f).str().f64() }
else { return 0.0 }
}
}
@ -109,9 +127,7 @@ pub fn (f Any) f64() f64 {
pub fn (f Any) arr() []Any {
if f is []Any {
return *f
}
if f is map[string]Any {
} else if f is map[string]Any {
mut arr := []Any{}
mp := *f
for _, v in mp {
@ -119,7 +135,6 @@ pub fn (f Any) arr() []Any {
}
return arr
}
return [f]
}

View File

@ -17,7 +17,7 @@ fn (e Employee) to_json() string {
mut mp := map[string]json2.Any
mp['name'] = e.name
mp['age'] = e.age
mp['salary'] = f64(e.salary)
mp['salary'] = e.salary
mp['title'] = int(e.title)
/*
@ -58,6 +58,17 @@ fn test_simple() {
assert ym['title'].int() == 2
}
fn test_fast_raw_decode() {
s := '{"name":"Peter","age":28,"salary":95000.5,"title":2}'
o := json2.fast_raw_decode(s) or {
assert false
json2.Any{}
}
str := o.str()
assert str == '{"name":"Peter","age":"28","salary":"95000.5","title":"2"}'
}
/*
struct User2 {
age int