x.json2: add customized JSON output capability via Encoder (#13654)

pull/13658/head
Ned 2022-03-04 19:39:23 +08:00 committed by GitHub
parent 74d5106e8f
commit 437fa02f27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 275 additions and 168 deletions

View File

@ -236,13 +236,13 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
match value {
ast.Quoted {
json_text := json2.Any(value.text).json_str()
return '{ "type": "string", "value": "$json_text" }'
return '{ "type": "string", "value": $json_text }'
}
ast.DateTime {
// Normalization for json
mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
'T')
typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
|| json_text.all_after('T').contains('+') {
'datetime'
} else {
@ -252,16 +252,16 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
// It seems it's implementation specific how time and
// date-time values are represented in detail. For now we follow the BurntSushi format
// that expands to 6 digits which is also a valid RFC 3339 representation.
json_text = to_alexcrichton_time(json_text)
json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
return '{ "type": "$typ", "value": "$json_text" }'
}
ast.Date {
json_text := json2.Any(value.text).json_str()
return '{ "type": "date", "value": "$json_text" }'
return '{ "type": "date", "value": $json_text }'
}
ast.Time {
mut json_text := json2.Any(value.text).json_str()
json_text = to_alexcrichton_time(json_text)
json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
return '{ "type": "time", "value": "$json_text" }'
}
ast.Bool {
@ -270,12 +270,12 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
}
ast.Null {
json_text := json2.Any(value.text).json_str()
return '{ "type": "null", "value": "$json_text" }'
return '{ "type": "null", "value": $json_text }'
}
ast.Number {
text := value.text
if text.contains('inf') || text.contains('nan') {
return '{ "type": "float", "value": "$value.text" }'
return '{ "type": "float", "value": $value.text }'
}
if !text.starts_with('0x') && (text.contains('.') || text.to_lower().contains('e')) {
mut val := ''
@ -297,7 +297,7 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
mut str := '{ '
for key, val in value {
json_key := json2.Any(key).json_str()
str += ' "$json_key": ${to_alexcrichton(val, array_type)},'
str += ' $json_key: ${to_alexcrichton(val, array_type)},'
}
str = str.trim_right(',')
str += ' }'

View File

@ -199,26 +199,30 @@ fn to_burntsushi(value ast.Value) string {
match value {
ast.Quoted {
json_text := json2.Any(value.text).json_str()
return '{ "type": "string", "value": "$json_text" }'
return '{ "type": "string", "value": $json_text }'
}
ast.DateTime {
// Normalization for json
json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T')
typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
// NB: Since encoding strings in JSON now automatically includes quotes,
// I added a somewhat a workaround by adding an ending quote in order to
// recognize properly the date time type. - Ned
typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
|| json_text.all_after('T').contains('+') {
'datetime'
} else {
'datetime-local'
}
return '{ "type": "$typ", "value": "$json_text" }'
return '{ "type": "$typ", "value": $json_text }'
}
ast.Date {
json_text := json2.Any(value.text).json_str()
return '{ "type": "date-local", "value": "$json_text" }'
return '{ "type": "date-local", "value": $json_text }'
}
ast.Time {
json_text := json2.Any(value.text).json_str()
return '{ "type": "time-local", "value": "$json_text" }'
return '{ "type": "time-local", "value": $json_text }'
}
ast.Bool {
json_text := json2.Any(value.text.bool()).json_str()
@ -226,7 +230,7 @@ fn to_burntsushi(value ast.Value) string {
}
ast.Null {
json_text := json2.Any(value.text).json_str()
return '{ "type": "null", "value": "$json_text" }'
return '{ "type": "null", "value": $json_text }'
}
ast.Number {
if value.text.contains('inf') || value.text.contains('nan') {
@ -251,7 +255,7 @@ fn to_burntsushi(value ast.Value) string {
mut str := '{ '
for key, val in value {
json_key := json2.Any(key).json_str()
str += ' "$json_key": ${to_burntsushi(val)},'
str += ' $json_key: ${to_burntsushi(val)},'
}
str = str.trim_right(',')
str += ' }'

View File

@ -288,15 +288,15 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
ast.Quoted {
json_text := json2.Any(value.text).json_str()
if skip_value_map {
return '"$json_text"'
return json_text
}
return '{ "type": "string", "value": "$json_text" }'
return '{ "type": "string", "value": $json_text }'
}
ast.DateTime {
// Normalization for json
mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
'T')
typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
|| json_text.all_after('T').contains('+') {
'datetime'
} else {
@ -306,40 +306,41 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
// It seems it's implementation specific how time and
// date-time values are represented in detail. For now we follow the BurntSushi format
// that expands to 6 digits which is also a valid RFC 3339 representation.
json_text = to_iarna_time(json_text)
json_text = to_iarna_time(json_text[1..json_text.len - 1])
if skip_value_map {
return '"$json_text"'
return json_text
}
return '{ "type": "$typ", "value": "$json_text" }'
}
ast.Date {
json_text := json2.Any(value.text).json_str()
if skip_value_map {
return '"$json_text"'
return json_text
}
return '{ "type": "date", "value": "$json_text" }'
return '{ "type": "date", "value": $json_text }'
}
ast.Time {
mut json_text := json2.Any(value.text).json_str()
json_text = to_iarna_time(json_text)
// NB: Removes the quotes of the encoded JSON string - Ned
json_text = to_iarna_time(json_text[1..json_text.len - 1])
if skip_value_map {
return '"$json_text"'
return json_text
}
return '{ "type": "time", "value": "$json_text" }'
}
ast.Bool {
json_text := json2.Any(value.text.bool()).json_str()
if skip_value_map {
return '$json_text'
return json_text
}
return '{ "type": "bool", "value": "$json_text" }'
}
ast.Null {
json_text := json2.Any(value.text).json_str()
if skip_value_map {
return '$json_text'
return json_text
}
return '{ "type": "null", "value": "$json_text" }'
return '{ "type": "null", "value": $json_text }'
}
ast.Number {
if value.text.contains('inf') {
@ -384,7 +385,7 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
mut str := '{ '
for key, val in value {
json_key := json2.Any(key).json_str()
str += ' "$json_key": ${to_iarna(val, skip_value_map)},'
str += ' $json_key: ${to_iarna(val, skip_value_map)},'
}
str = str.trim_right(',')
str += ' }'

View File

@ -27,19 +27,16 @@ fn any_to_json(a toml.Any) string {
return 'null'
}
toml.DateTime {
json_text := json2.Any(a.str())
return '"$json_text.json_str()"'
return json2.Any(a.str()).json_str()
}
toml.Date {
json_text := json2.Any(a.str())
return '"$json_text.json_str()"'
return json2.Any(a.str()).json_str()
}
toml.Time {
json_text := json2.Any(a.str())
return '"$json_text.json_str()"'
return json2.Any(a.str()).json_str()
}
string {
return '"' + json2.Any(a.str()).json_str() + '"'
return json2.Any(a.str()).json_str()
}
bool {
return json2.Any(bool(a)).json_str()
@ -63,7 +60,7 @@ fn any_to_json(a toml.Any) string {
mut str := '{'
for key, val in a {
json_key := json2.Any(key)
str += ' "$json_key.json_str()": ${any_to_json(val)},'
str += ' $json_key.json_str(): ${any_to_json(val)},'
}
str = str.trim_right(',')
str += ' }'

View File

@ -3,54 +3,125 @@
// that can be found in the LICENSE file.
module json2
import io
import strings
fn write_value(v Any, i int, len int, mut wr strings.Builder) {
str := v.json_str()
if v is string {
wr.write_string('"$str"')
} else {
wr.write_string(str)
}
if i >= len - 1 {
return
}
wr.write_byte(`,`)
// Encoder encodes the an `Any` type into JSON representation.
// It provides parameters in order to change the end result.
pub struct Encoder {
newline byte
newline_spaces_count int
escape_unicode bool = true
}
// str returns the string representation of the `map[string]Any`.
[manualfree]
pub fn (flds map[string]Any) str() string {
mut wr := strings.new_builder(200)
wr.write_byte(`{`)
mut i := 0
for k, v in flds {
wr.write_string('"$k":')
write_value(v, i, flds.len, mut wr)
i++
}
wr.write_byte(`}`)
defer {
unsafe { wr.free() }
}
res := wr.str()
return res
// byte array versions of the most common tokens/chars
// to avoid reallocations
const null_in_bytes = 'null'.bytes()
const true_in_bytes = 'true'.bytes()
const false_in_bytes = 'false'.bytes()
const zero_in_bytes = [byte(`0`)]
const comma_bytes = [byte(`,`)]
const colon_bytes = [byte(`:`)]
const space_bytes = [byte(` `)]
const unicode_escape_chars = [byte(`\\`), `u`]
const quote_bytes = [byte(`"`)]
const escaped_chars = [(r'\b').bytes(), (r'\f').bytes(), (r'\n').bytes(),
(r'\r').bytes(), (r'\t').bytes()]
// encode_value encodes an `Any` value to the specific writer.
pub fn (e &Encoder) encode_value(f Any, mut wr io.Writer) ? {
e.encode_value_with_level(f, 1, mut wr) ?
}
// str returns the string representation of the `[]Any`.
[manualfree]
pub fn (flds []Any) str() string {
mut wr := strings.new_builder(200)
wr.write_byte(`[`)
for i, v in flds {
write_value(v, i, flds.len, mut wr)
fn (e &Encoder) encode_newline(level int, mut wr io.Writer) ? {
if e.newline != 0 {
wr.write([e.newline]) ?
for j := 0; j < level * e.newline_spaces_count; j++ {
wr.write(json2.space_bytes) ?
}
}
wr.write_byte(`]`)
defer {
unsafe { wr.free() }
}
fn (e &Encoder) encode_value_with_level(f Any, level int, mut wr io.Writer) ? {
match f {
string {
e.encode_string(f, mut wr) ?
}
bool {
if f == true {
wr.write(json2.true_in_bytes) ?
} else {
wr.write(json2.false_in_bytes) ?
}
}
int, u64, i64 {
wr.write(f.str().bytes()) ?
}
f32, f64 {
$if !nofloat ? {
str_float := f.str().bytes()
wr.write(str_float) ?
if str_float[str_float.len - 1] == `.` {
wr.write(json2.zero_in_bytes) ?
}
return
}
wr.write(json2.zero_in_bytes) ?
}
map[string]Any {
wr.write([byte(`{`)]) ?
mut i := 0
for k, v in f {
e.encode_newline(level, mut wr) ?
e.encode_string(k, mut wr) ?
wr.write(json2.colon_bytes) ?
if e.newline != 0 {
wr.write(json2.space_bytes) ?
}
e.encode_value_with_level(v, level + 1, mut wr) ?
if i < f.len - 1 {
wr.write(json2.comma_bytes) ?
}
i++
}
e.encode_newline(level - 1, mut wr) ?
wr.write([byte(`}`)]) ?
}
[]Any {
wr.write([byte(`[`)]) ?
for i, v in f {
e.encode_newline(level, mut wr) ?
e.encode_value_with_level(v, level + 1, mut wr) ?
if i < f.len - 1 {
wr.write(json2.comma_bytes) ?
}
}
e.encode_newline(level - 1, mut wr) ?
wr.write([byte(`]`)]) ?
}
Null {
wr.write(json2.null_in_bytes) ?
}
}
res := wr.str()
return res
}
// str returns the JSON string representation of the `map[string]Any` type.
pub fn (f map[string]Any) str() string {
return Any(f).json_str()
}
// str returns the JSON string representation of the `[]Any` type.
pub fn (f []Any) str() string {
return Any(f).json_str()
}
// str returns the string representation of the `Any` type. Use the `json_str` method
@ -64,113 +135,102 @@ pub fn (f Any) str() string {
}
// json_str returns the JSON string representation of the `Any` type.
pub fn (f Any) json_str() string {
match f {
string {
return json_string(f)
}
bool, int, u64, i64 {
return f.str()
}
f32 {
$if !nofloat ? {
str_f32 := f.str()
if str_f32.ends_with('.') {
return '${str_f32}0'
}
return str_f32
}
return '0'
}
f64 {
$if !nofloat ? {
str_f64 := f.str()
if str_f64.ends_with('.') {
return '${str_f64}0'
}
return str_f64
}
return '0'
}
map[string]Any {
return f.str()
}
[]Any {
return f.str()
}
Null {
return 'null'
}
}
}
// char_len_list is a modified version of builtin.utf8_str_len
// that returns an array of character lengths. (e.g "t✔" => [1,2])
fn char_len_list(s string) []int {
mut l := 1
mut ls := []int{}
for i := 0; i < s.len; i++ {
c := s[i]
if (c & (1 << 7)) != 0 {
for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
l++
i++
}
}
ls << l
l = 1
}
return ls
}
const escaped_chars = [r'\b', r'\f', r'\n', r'\r', r'\t']
// json_string returns the JSON spec-compliant version of the string.
[manualfree]
fn json_string(s string) string {
// not the best implementation but will revisit it soon
char_lens := char_len_list(s)
mut sb := strings.new_builder(s.len)
mut i := 0
pub fn (f Any) json_str() string {
mut sb := strings.new_builder(4096)
defer {
unsafe {
char_lens.free()
// freeing string builder on defer after
// returning .str() still isn't working :(
// sb.free()
unsafe { sb.free() }
}
mut enc := Encoder{}
enc.encode_value(f, mut sb) or { return '' }
return sb.str()
}
// prettify_json_str returns the pretty-formatted JSON string representation of the `Any` type.
[manualfree]
pub fn (f Any) prettify_json_str() string {
mut sb := strings.new_builder(4096)
defer {
unsafe { sb.free() }
}
mut enc := Encoder{
newline: `\n`
newline_spaces_count: 4
}
enc.encode_value(f, mut sb) or { return '' }
return sb.str()
}
// CharLengthIterator is an iterator that generates a char
// length value of every iteration based on the given text.
// (e.g.: "t✔" => [t => 1, ✔ => 2])
struct CharLengthIterator {
text string
mut:
idx int
}
fn (mut iter CharLengthIterator) next() ?int {
if iter.idx >= iter.text.len {
return none
}
defer {
iter.idx++
}
mut len := 1
c := iter.text[iter.idx]
if (c & (1 << 7)) != 0 {
for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
len++
iter.idx++
}
}
return len
}
// encode_string returns the JSON spec-compliant version of the string.
[manualfree]
fn (e &Encoder) encode_string(s string, mut wr io.Writer) ? {
mut char_lens := CharLengthIterator{
text: s
}
mut i := 0
wr.write(json2.quote_bytes) ?
for char_len in char_lens {
if char_len == 1 {
chr := s[i]
if chr in important_escapable_chars {
for j := 0; j < important_escapable_chars.len; j++ {
if chr == important_escapable_chars[j] {
sb.write_string(json2.escaped_chars[j])
wr.write(json2.escaped_chars[j]) ?
break
}
}
} else if chr == `"` || chr == `/` || chr == `\\` {
sb.write_string('\\' + chr.ascii_str())
wr.write([byte(`\\`), chr]) ?
} else if int(chr) < 0x20 {
hex_code := chr.hex()
sb.write_string('\\u00$hex_code')
hex_code := chr.hex().bytes()
wr.write(json2.unicode_escape_chars) ? // \u
wr.write(json2.zero_in_bytes) ? // \u0
wr.write(json2.zero_in_bytes) ? // \u00
wr.write(hex_code) ? // \u00xxxx
} else {
sb.write_byte(chr)
wr.write([byte(chr)]) ?
}
} else {
slice := s[i..i + char_len]
hex_code := slice.utf32_code().hex()
if hex_code.len < 4 {
// an utf8 codepoint
sb.write_string(slice)
hex_code := slice.utf32_code().hex().bytes()
if !e.escape_unicode || hex_code.len < 4 {
// unescaped non-ASCII char
wr.write(slice.bytes()) ?
} else if hex_code.len == 4 {
sb.write_string('\\u$hex_code')
// a unicode endpoint
wr.write(json2.unicode_escape_chars) ?
wr.write(hex_code) ?
} else {
// TODO: still figuring out what
// to do with more than 4 chars
sb.write_byte(` `)
wr.write(json2.space_bytes) ?
}
unsafe {
slice.free()
@ -179,7 +239,6 @@ fn json_string(s string) string {
}
i += char_len
}
str := sb.str()
unsafe { sb.free() }
return str
wr.write(json2.quote_bytes) ?
}

View File

@ -1,20 +1,21 @@
import x.json2
import strings
fn test_json_string_characters() {
text := json2.raw_decode(r'"\n\r\b\f\t\\\"\/"') or { '' }
assert text.json_str() == '\\n\\r\\b\\f\\t\\\\\\"\\/'
assert text.json_str() == '"\\n\\r\\b\\f\\t\\\\\\"\\/"'
}
fn test_json_escape_low_chars() {
esc := '\u001b'
assert esc.len == 1
text := json2.Any(esc)
assert text.json_str() == r'\u001b'
assert text.json_str() == r'"\u001b"'
}
fn test_json_string() {
text := json2.Any('test')
assert text.json_str() == r'te\u2714st'
assert text.json_str() == r'"te\u2714st"'
boolean := json2.Any(true)
assert boolean.json_str() == 'true'
integer := json2.Any(int(-5))
@ -27,12 +28,12 @@ fn test_json_string() {
fn test_json_string_emoji() {
text := json2.Any('🐈')
assert text.json_str() == r' '
assert text.json_str() == r'" "'
}
fn test_json_string_non_ascii() {
text := json2.Any('')
assert text.json_str() == r'\u3072\u3089\u304c\u306a'
assert text.json_str() == r'"\u3072\u3089\u304c\u306a"'
}
fn test_utf8_strings_are_not_modified() ? {
@ -42,3 +43,48 @@ fn test_utf8_strings_are_not_modified() ? {
// dump(deresult)
assert deresult.str() == original
}
fn test_encoder_unescaped_utf32() ? {
jap_text := json2.Any('')
enc := json2.Encoder{
escape_unicode: false
}
mut sb := strings.new_builder(20)
enc.encode_value(jap_text, mut sb) ?
assert sb.str() == '"$jap_text"'
sb.go_back_to(0)
emoji_text := json2.Any('🐈')
enc.encode_value(emoji_text, mut sb) ?
assert sb.str() == '"$emoji_text"'
}
fn test_encoder_prettify() ? {
obj := {
'hello': json2.Any('world')
'arr': [json2.Any('im a string'), [json2.Any('3rd level')]]
'obj': {
'map': json2.Any('map inside a map')
}
}
enc := json2.Encoder{
newline: `\n`
newline_spaces_count: 2
}
mut sb := strings.new_builder(20)
enc.encode_value(obj, mut sb) ?
assert sb.str() == '{
"hello": "world",
"arr": [
"im a string",
[
"3rd level"
]
],
"obj": {
"map": "map inside a map"
}
}'
}