x.json2: add customized JSON output capability via Encoder (#13654)

pull/13658/head
Ned 2022-03-04 19:39:23 +08:00 committed by GitHub
parent 74d5106e8f
commit 437fa02f27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 275 additions and 168 deletions

View File

@ -236,13 +236,13 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
match value { match value {
ast.Quoted { ast.Quoted {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
return '{ "type": "string", "value": "$json_text" }' return '{ "type": "string", "value": $json_text }'
} }
ast.DateTime { ast.DateTime {
// Normalization for json // Normalization for json
mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ', mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
'T') 'T')
typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-') typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
|| json_text.all_after('T').contains('+') { || json_text.all_after('T').contains('+') {
'datetime' 'datetime'
} else { } else {
@ -252,16 +252,16 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
// It seems it's implementation specific how time and // It seems it's implementation specific how time and
// date-time values are represented in detail. For now we follow the BurntSushi format // date-time values are represented in detail. For now we follow the BurntSushi format
// that expands to 6 digits which is also a valid RFC 3339 representation. // that expands to 6 digits which is also a valid RFC 3339 representation.
json_text = to_alexcrichton_time(json_text) json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
return '{ "type": "$typ", "value": "$json_text" }' return '{ "type": "$typ", "value": "$json_text" }'
} }
ast.Date { ast.Date {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
return '{ "type": "date", "value": "$json_text" }' return '{ "type": "date", "value": $json_text }'
} }
ast.Time { ast.Time {
mut json_text := json2.Any(value.text).json_str() mut json_text := json2.Any(value.text).json_str()
json_text = to_alexcrichton_time(json_text) json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
return '{ "type": "time", "value": "$json_text" }' return '{ "type": "time", "value": "$json_text" }'
} }
ast.Bool { ast.Bool {
@ -270,12 +270,12 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
} }
ast.Null { ast.Null {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
return '{ "type": "null", "value": "$json_text" }' return '{ "type": "null", "value": $json_text }'
} }
ast.Number { ast.Number {
text := value.text text := value.text
if text.contains('inf') || text.contains('nan') { if text.contains('inf') || text.contains('nan') {
return '{ "type": "float", "value": "$value.text" }' return '{ "type": "float", "value": $value.text }'
} }
if !text.starts_with('0x') && (text.contains('.') || text.to_lower().contains('e')) { if !text.starts_with('0x') && (text.contains('.') || text.to_lower().contains('e')) {
mut val := '' mut val := ''
@ -297,7 +297,7 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
mut str := '{ ' mut str := '{ '
for key, val in value { for key, val in value {
json_key := json2.Any(key).json_str() json_key := json2.Any(key).json_str()
str += ' "$json_key": ${to_alexcrichton(val, array_type)},' str += ' $json_key: ${to_alexcrichton(val, array_type)},'
} }
str = str.trim_right(',') str = str.trim_right(',')
str += ' }' str += ' }'

View File

@ -199,26 +199,30 @@ fn to_burntsushi(value ast.Value) string {
match value { match value {
ast.Quoted { ast.Quoted {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
return '{ "type": "string", "value": "$json_text" }' return '{ "type": "string", "value": $json_text }'
} }
ast.DateTime { ast.DateTime {
// Normalization for json // Normalization for json
json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T') json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T')
typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
// NB: Since encoding strings in JSON now automatically includes quotes,
// I added a somewhat a workaround by adding an ending quote in order to
// recognize properly the date time type. - Ned
typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
|| json_text.all_after('T').contains('+') { || json_text.all_after('T').contains('+') {
'datetime' 'datetime'
} else { } else {
'datetime-local' 'datetime-local'
} }
return '{ "type": "$typ", "value": "$json_text" }' return '{ "type": "$typ", "value": $json_text }'
} }
ast.Date { ast.Date {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
return '{ "type": "date-local", "value": "$json_text" }' return '{ "type": "date-local", "value": $json_text }'
} }
ast.Time { ast.Time {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
return '{ "type": "time-local", "value": "$json_text" }' return '{ "type": "time-local", "value": $json_text }'
} }
ast.Bool { ast.Bool {
json_text := json2.Any(value.text.bool()).json_str() json_text := json2.Any(value.text.bool()).json_str()
@ -226,7 +230,7 @@ fn to_burntsushi(value ast.Value) string {
} }
ast.Null { ast.Null {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
return '{ "type": "null", "value": "$json_text" }' return '{ "type": "null", "value": $json_text }'
} }
ast.Number { ast.Number {
if value.text.contains('inf') || value.text.contains('nan') { if value.text.contains('inf') || value.text.contains('nan') {
@ -251,7 +255,7 @@ fn to_burntsushi(value ast.Value) string {
mut str := '{ ' mut str := '{ '
for key, val in value { for key, val in value {
json_key := json2.Any(key).json_str() json_key := json2.Any(key).json_str()
str += ' "$json_key": ${to_burntsushi(val)},' str += ' $json_key: ${to_burntsushi(val)},'
} }
str = str.trim_right(',') str = str.trim_right(',')
str += ' }' str += ' }'

View File

@ -288,15 +288,15 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
ast.Quoted { ast.Quoted {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
if skip_value_map { if skip_value_map {
return '"$json_text"' return json_text
} }
return '{ "type": "string", "value": "$json_text" }' return '{ "type": "string", "value": $json_text }'
} }
ast.DateTime { ast.DateTime {
// Normalization for json // Normalization for json
mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ', mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
'T') 'T')
typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-') typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
|| json_text.all_after('T').contains('+') { || json_text.all_after('T').contains('+') {
'datetime' 'datetime'
} else { } else {
@ -306,40 +306,41 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
// It seems it's implementation specific how time and // It seems it's implementation specific how time and
// date-time values are represented in detail. For now we follow the BurntSushi format // date-time values are represented in detail. For now we follow the BurntSushi format
// that expands to 6 digits which is also a valid RFC 3339 representation. // that expands to 6 digits which is also a valid RFC 3339 representation.
json_text = to_iarna_time(json_text) json_text = to_iarna_time(json_text[1..json_text.len - 1])
if skip_value_map { if skip_value_map {
return '"$json_text"' return json_text
} }
return '{ "type": "$typ", "value": "$json_text" }' return '{ "type": "$typ", "value": "$json_text" }'
} }
ast.Date { ast.Date {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
if skip_value_map { if skip_value_map {
return '"$json_text"' return json_text
} }
return '{ "type": "date", "value": "$json_text" }' return '{ "type": "date", "value": $json_text }'
} }
ast.Time { ast.Time {
mut json_text := json2.Any(value.text).json_str() mut json_text := json2.Any(value.text).json_str()
json_text = to_iarna_time(json_text) // NB: Removes the quotes of the encoded JSON string - Ned
json_text = to_iarna_time(json_text[1..json_text.len - 1])
if skip_value_map { if skip_value_map {
return '"$json_text"' return json_text
} }
return '{ "type": "time", "value": "$json_text" }' return '{ "type": "time", "value": "$json_text" }'
} }
ast.Bool { ast.Bool {
json_text := json2.Any(value.text.bool()).json_str() json_text := json2.Any(value.text.bool()).json_str()
if skip_value_map { if skip_value_map {
return '$json_text' return json_text
} }
return '{ "type": "bool", "value": "$json_text" }' return '{ "type": "bool", "value": "$json_text" }'
} }
ast.Null { ast.Null {
json_text := json2.Any(value.text).json_str() json_text := json2.Any(value.text).json_str()
if skip_value_map { if skip_value_map {
return '$json_text' return json_text
} }
return '{ "type": "null", "value": "$json_text" }' return '{ "type": "null", "value": $json_text }'
} }
ast.Number { ast.Number {
if value.text.contains('inf') { if value.text.contains('inf') {
@ -384,7 +385,7 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
mut str := '{ ' mut str := '{ '
for key, val in value { for key, val in value {
json_key := json2.Any(key).json_str() json_key := json2.Any(key).json_str()
str += ' "$json_key": ${to_iarna(val, skip_value_map)},' str += ' $json_key: ${to_iarna(val, skip_value_map)},'
} }
str = str.trim_right(',') str = str.trim_right(',')
str += ' }' str += ' }'

View File

@ -27,19 +27,16 @@ fn any_to_json(a toml.Any) string {
return 'null' return 'null'
} }
toml.DateTime { toml.DateTime {
json_text := json2.Any(a.str()) return json2.Any(a.str()).json_str()
return '"$json_text.json_str()"'
} }
toml.Date { toml.Date {
json_text := json2.Any(a.str()) return json2.Any(a.str()).json_str()
return '"$json_text.json_str()"'
} }
toml.Time { toml.Time {
json_text := json2.Any(a.str()) return json2.Any(a.str()).json_str()
return '"$json_text.json_str()"'
} }
string { string {
return '"' + json2.Any(a.str()).json_str() + '"' return json2.Any(a.str()).json_str()
} }
bool { bool {
return json2.Any(bool(a)).json_str() return json2.Any(bool(a)).json_str()
@ -63,7 +60,7 @@ fn any_to_json(a toml.Any) string {
mut str := '{' mut str := '{'
for key, val in a { for key, val in a {
json_key := json2.Any(key) json_key := json2.Any(key)
str += ' "$json_key.json_str()": ${any_to_json(val)},' str += ' $json_key.json_str(): ${any_to_json(val)},'
} }
str = str.trim_right(',') str = str.trim_right(',')
str += ' }' str += ' }'

View File

@ -3,54 +3,125 @@
// that can be found in the LICENSE file. // that can be found in the LICENSE file.
module json2 module json2
import io
import strings import strings
fn write_value(v Any, i int, len int, mut wr strings.Builder) { // Encoder encodes the an `Any` type into JSON representation.
str := v.json_str() // It provides parameters in order to change the end result.
if v is string { pub struct Encoder {
wr.write_string('"$str"') newline byte
} else { newline_spaces_count int
wr.write_string(str) escape_unicode bool = true
}
if i >= len - 1 {
return
}
wr.write_byte(`,`)
} }
// str returns the string representation of the `map[string]Any`. // byte array versions of the most common tokens/chars
[manualfree] // to avoid reallocations
pub fn (flds map[string]Any) str() string { const null_in_bytes = 'null'.bytes()
mut wr := strings.new_builder(200)
wr.write_byte(`{`) const true_in_bytes = 'true'.bytes()
mut i := 0
for k, v in flds { const false_in_bytes = 'false'.bytes()
wr.write_string('"$k":')
write_value(v, i, flds.len, mut wr) const zero_in_bytes = [byte(`0`)]
i++
} const comma_bytes = [byte(`,`)]
wr.write_byte(`}`)
defer { const colon_bytes = [byte(`:`)]
unsafe { wr.free() }
} const space_bytes = [byte(` `)]
res := wr.str()
return res const unicode_escape_chars = [byte(`\\`), `u`]
const quote_bytes = [byte(`"`)]
const escaped_chars = [(r'\b').bytes(), (r'\f').bytes(), (r'\n').bytes(),
(r'\r').bytes(), (r'\t').bytes()]
// encode_value encodes an `Any` value to the specific writer.
pub fn (e &Encoder) encode_value(f Any, mut wr io.Writer) ? {
e.encode_value_with_level(f, 1, mut wr) ?
} }
// str returns the string representation of the `[]Any`. fn (e &Encoder) encode_newline(level int, mut wr io.Writer) ? {
[manualfree] if e.newline != 0 {
pub fn (flds []Any) str() string { wr.write([e.newline]) ?
mut wr := strings.new_builder(200) for j := 0; j < level * e.newline_spaces_count; j++ {
wr.write_byte(`[`) wr.write(json2.space_bytes) ?
for i, v in flds { }
write_value(v, i, flds.len, mut wr)
} }
wr.write_byte(`]`) }
defer {
unsafe { wr.free() } fn (e &Encoder) encode_value_with_level(f Any, level int, mut wr io.Writer) ? {
match f {
string {
e.encode_string(f, mut wr) ?
}
bool {
if f == true {
wr.write(json2.true_in_bytes) ?
} else {
wr.write(json2.false_in_bytes) ?
}
}
int, u64, i64 {
wr.write(f.str().bytes()) ?
}
f32, f64 {
$if !nofloat ? {
str_float := f.str().bytes()
wr.write(str_float) ?
if str_float[str_float.len - 1] == `.` {
wr.write(json2.zero_in_bytes) ?
}
return
}
wr.write(json2.zero_in_bytes) ?
}
map[string]Any {
wr.write([byte(`{`)]) ?
mut i := 0
for k, v in f {
e.encode_newline(level, mut wr) ?
e.encode_string(k, mut wr) ?
wr.write(json2.colon_bytes) ?
if e.newline != 0 {
wr.write(json2.space_bytes) ?
}
e.encode_value_with_level(v, level + 1, mut wr) ?
if i < f.len - 1 {
wr.write(json2.comma_bytes) ?
}
i++
}
e.encode_newline(level - 1, mut wr) ?
wr.write([byte(`}`)]) ?
}
[]Any {
wr.write([byte(`[`)]) ?
for i, v in f {
e.encode_newline(level, mut wr) ?
e.encode_value_with_level(v, level + 1, mut wr) ?
if i < f.len - 1 {
wr.write(json2.comma_bytes) ?
}
}
e.encode_newline(level - 1, mut wr) ?
wr.write([byte(`]`)]) ?
}
Null {
wr.write(json2.null_in_bytes) ?
}
} }
res := wr.str() }
return res
// str returns the JSON string representation of the `map[string]Any` type.
pub fn (f map[string]Any) str() string {
return Any(f).json_str()
}
// str returns the JSON string representation of the `[]Any` type.
pub fn (f []Any) str() string {
return Any(f).json_str()
} }
// str returns the string representation of the `Any` type. Use the `json_str` method // str returns the string representation of the `Any` type. Use the `json_str` method
@ -64,113 +135,102 @@ pub fn (f Any) str() string {
} }
// json_str returns the JSON string representation of the `Any` type. // json_str returns the JSON string representation of the `Any` type.
pub fn (f Any) json_str() string {
match f {
string {
return json_string(f)
}
bool, int, u64, i64 {
return f.str()
}
f32 {
$if !nofloat ? {
str_f32 := f.str()
if str_f32.ends_with('.') {
return '${str_f32}0'
}
return str_f32
}
return '0'
}
f64 {
$if !nofloat ? {
str_f64 := f.str()
if str_f64.ends_with('.') {
return '${str_f64}0'
}
return str_f64
}
return '0'
}
map[string]Any {
return f.str()
}
[]Any {
return f.str()
}
Null {
return 'null'
}
}
}
// char_len_list is a modified version of builtin.utf8_str_len
// that returns an array of character lengths. (e.g "t✔" => [1,2])
fn char_len_list(s string) []int {
mut l := 1
mut ls := []int{}
for i := 0; i < s.len; i++ {
c := s[i]
if (c & (1 << 7)) != 0 {
for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
l++
i++
}
}
ls << l
l = 1
}
return ls
}
const escaped_chars = [r'\b', r'\f', r'\n', r'\r', r'\t']
// json_string returns the JSON spec-compliant version of the string.
[manualfree] [manualfree]
fn json_string(s string) string { pub fn (f Any) json_str() string {
// not the best implementation but will revisit it soon mut sb := strings.new_builder(4096)
char_lens := char_len_list(s)
mut sb := strings.new_builder(s.len)
mut i := 0
defer { defer {
unsafe { unsafe { sb.free() }
char_lens.free() }
// freeing string builder on defer after mut enc := Encoder{}
// returning .str() still isn't working :( enc.encode_value(f, mut sb) or { return '' }
// sb.free() return sb.str()
}
// prettify_json_str returns the pretty-formatted JSON string representation of the `Any` type.
[manualfree]
pub fn (f Any) prettify_json_str() string {
mut sb := strings.new_builder(4096)
defer {
unsafe { sb.free() }
}
mut enc := Encoder{
newline: `\n`
newline_spaces_count: 4
}
enc.encode_value(f, mut sb) or { return '' }
return sb.str()
}
// CharLengthIterator is an iterator that generates a char
// length value of every iteration based on the given text.
// (e.g.: "t✔" => [t => 1, ✔ => 2])
struct CharLengthIterator {
text string
mut:
idx int
}
fn (mut iter CharLengthIterator) next() ?int {
if iter.idx >= iter.text.len {
return none
}
defer {
iter.idx++
}
mut len := 1
c := iter.text[iter.idx]
if (c & (1 << 7)) != 0 {
for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
len++
iter.idx++
} }
} }
return len
}
// encode_string returns the JSON spec-compliant version of the string.
[manualfree]
fn (e &Encoder) encode_string(s string, mut wr io.Writer) ? {
mut char_lens := CharLengthIterator{
text: s
}
mut i := 0
wr.write(json2.quote_bytes) ?
for char_len in char_lens { for char_len in char_lens {
if char_len == 1 { if char_len == 1 {
chr := s[i] chr := s[i]
if chr in important_escapable_chars { if chr in important_escapable_chars {
for j := 0; j < important_escapable_chars.len; j++ { for j := 0; j < important_escapable_chars.len; j++ {
if chr == important_escapable_chars[j] { if chr == important_escapable_chars[j] {
sb.write_string(json2.escaped_chars[j]) wr.write(json2.escaped_chars[j]) ?
break break
} }
} }
} else if chr == `"` || chr == `/` || chr == `\\` { } else if chr == `"` || chr == `/` || chr == `\\` {
sb.write_string('\\' + chr.ascii_str()) wr.write([byte(`\\`), chr]) ?
} else if int(chr) < 0x20 { } else if int(chr) < 0x20 {
hex_code := chr.hex() hex_code := chr.hex().bytes()
sb.write_string('\\u00$hex_code') wr.write(json2.unicode_escape_chars) ? // \u
wr.write(json2.zero_in_bytes) ? // \u0
wr.write(json2.zero_in_bytes) ? // \u00
wr.write(hex_code) ? // \u00xxxx
} else { } else {
sb.write_byte(chr) wr.write([byte(chr)]) ?
} }
} else { } else {
slice := s[i..i + char_len] slice := s[i..i + char_len]
hex_code := slice.utf32_code().hex() hex_code := slice.utf32_code().hex().bytes()
if hex_code.len < 4 { if !e.escape_unicode || hex_code.len < 4 {
// an utf8 codepoint // unescaped non-ASCII char
sb.write_string(slice) wr.write(slice.bytes()) ?
} else if hex_code.len == 4 { } else if hex_code.len == 4 {
sb.write_string('\\u$hex_code') // a unicode endpoint
wr.write(json2.unicode_escape_chars) ?
wr.write(hex_code) ?
} else { } else {
// TODO: still figuring out what // TODO: still figuring out what
// to do with more than 4 chars // to do with more than 4 chars
sb.write_byte(` `) wr.write(json2.space_bytes) ?
} }
unsafe { unsafe {
slice.free() slice.free()
@ -179,7 +239,6 @@ fn json_string(s string) string {
} }
i += char_len i += char_len
} }
str := sb.str()
unsafe { sb.free() } wr.write(json2.quote_bytes) ?
return str
} }

View File

@ -1,20 +1,21 @@
import x.json2 import x.json2
import strings
fn test_json_string_characters() { fn test_json_string_characters() {
text := json2.raw_decode(r'"\n\r\b\f\t\\\"\/"') or { '' } text := json2.raw_decode(r'"\n\r\b\f\t\\\"\/"') or { '' }
assert text.json_str() == '\\n\\r\\b\\f\\t\\\\\\"\\/' assert text.json_str() == '"\\n\\r\\b\\f\\t\\\\\\"\\/"'
} }
fn test_json_escape_low_chars() { fn test_json_escape_low_chars() {
esc := '\u001b' esc := '\u001b'
assert esc.len == 1 assert esc.len == 1
text := json2.Any(esc) text := json2.Any(esc)
assert text.json_str() == r'\u001b' assert text.json_str() == r'"\u001b"'
} }
fn test_json_string() { fn test_json_string() {
text := json2.Any('test') text := json2.Any('test')
assert text.json_str() == r'te\u2714st' assert text.json_str() == r'"te\u2714st"'
boolean := json2.Any(true) boolean := json2.Any(true)
assert boolean.json_str() == 'true' assert boolean.json_str() == 'true'
integer := json2.Any(int(-5)) integer := json2.Any(int(-5))
@ -27,12 +28,12 @@ fn test_json_string() {
fn test_json_string_emoji() { fn test_json_string_emoji() {
text := json2.Any('🐈') text := json2.Any('🐈')
assert text.json_str() == r' ' assert text.json_str() == r'" "'
} }
fn test_json_string_non_ascii() { fn test_json_string_non_ascii() {
text := json2.Any('') text := json2.Any('')
assert text.json_str() == r'\u3072\u3089\u304c\u306a' assert text.json_str() == r'"\u3072\u3089\u304c\u306a"'
} }
fn test_utf8_strings_are_not_modified() ? { fn test_utf8_strings_are_not_modified() ? {
@ -42,3 +43,48 @@ fn test_utf8_strings_are_not_modified() ? {
// dump(deresult) // dump(deresult)
assert deresult.str() == original assert deresult.str() == original
} }
fn test_encoder_unescaped_utf32() ? {
jap_text := json2.Any('')
enc := json2.Encoder{
escape_unicode: false
}
mut sb := strings.new_builder(20)
enc.encode_value(jap_text, mut sb) ?
assert sb.str() == '"$jap_text"'
sb.go_back_to(0)
emoji_text := json2.Any('🐈')
enc.encode_value(emoji_text, mut sb) ?
assert sb.str() == '"$emoji_text"'
}
fn test_encoder_prettify() ? {
obj := {
'hello': json2.Any('world')
'arr': [json2.Any('im a string'), [json2.Any('3rd level')]]
'obj': {
'map': json2.Any('map inside a map')
}
}
enc := json2.Encoder{
newline: `\n`
newline_spaces_count: 2
}
mut sb := strings.new_builder(20)
enc.encode_value(obj, mut sb) ?
assert sb.str() == '{
"hello": "world",
"arr": [
"im a string",
[
"3rd level"
]
],
"obj": {
"map": "map inside a map"
}
}'
}