x.json2: add customized JSON output capability via Encoder (#13654)

2022-03-04 19:39:23 +08:00 · 2022-03-04 19:39:23 +08:00 · 437fa02f27
parent 74d5106e8f
commit 437fa02f27
6 changed files with 275 additions and 168 deletions
--- a/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v
+++ b/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v
@ -236,13 +236,13 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 	match value {
 		ast.Quoted {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "string", "value": "$json_text" }'
+			return '{ "type": "string", "value": $json_text }'
 		}
 		ast.DateTime {
 			// Normalization for json
 			mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
 				'T')
-			typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
+			typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
 				|| json_text.all_after('T').contains('+') {
 				'datetime'
 			} else {
@ -252,16 +252,16 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 			// It seems it's implementation specific how time and
 			// date-time values are represented in detail. For now we follow the BurntSushi format
 			// that expands to 6 digits which is also a valid RFC 3339 representation.
-			json_text = to_alexcrichton_time(json_text)
+			json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
 			return '{ "type": "$typ", "value": "$json_text" }'
 		}
 		ast.Date {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "date", "value": "$json_text" }'
+			return '{ "type": "date", "value": $json_text }'
 		}
 		ast.Time {
 			mut json_text := json2.Any(value.text).json_str()
-			json_text = to_alexcrichton_time(json_text)
+			json_text = to_alexcrichton_time(json_text[1..json_text.len - 1])
 			return '{ "type": "time", "value": "$json_text" }'
 		}
 		ast.Bool {
@ -270,12 +270,12 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 		}
 		ast.Null {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "null", "value": "$json_text" }'
+			return '{ "type": "null", "value": $json_text }'
 		}
 		ast.Number {
 			text := value.text
 			if text.contains('inf') || text.contains('nan') {
-				return '{ "type": "float", "value": "$value.text" }'
+				return '{ "type": "float", "value": $value.text }'
 			}
 			if !text.starts_with('0x') && (text.contains('.') || text.to_lower().contains('e')) {
 				mut val := ''
@ -297,7 +297,7 @@ fn to_alexcrichton(value ast.Value, array_type int) string {
 			mut str := '{ '
 			for key, val in value {
 				json_key := json2.Any(key).json_str()
-				str += ' "$json_key": ${to_alexcrichton(val, array_type)},'
+				str += ' $json_key: ${to_alexcrichton(val, array_type)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/toml/tests/burntsushi.toml-test_test.v
+++ b/vlib/toml/tests/burntsushi.toml-test_test.v
@ -199,26 +199,30 @@ fn to_burntsushi(value ast.Value) string {
 	match value {
 		ast.Quoted {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "string", "value": "$json_text" }'
+			return '{ "type": "string", "value": $json_text }'
 		}
 		ast.DateTime {
 			// Normalization for json
 			json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T')
-			typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
+
 			// NB: Since encoding strings in JSON now automatically includes quotes,
 			// I added a somewhat a workaround by adding an ending quote in order to
 			// recognize properly the date time type. - Ned
 			typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
 				|| json_text.all_after('T').contains('+') {
 				'datetime'
 			} else {
 				'datetime-local'
 			}
-			return '{ "type": "$typ", "value": "$json_text" }'
+			return '{ "type": "$typ", "value": $json_text }'
 		}
 		ast.Date {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "date-local", "value": "$json_text" }'
+			return '{ "type": "date-local", "value": $json_text }'
 		}
 		ast.Time {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "time-local", "value": "$json_text" }'
+			return '{ "type": "time-local", "value": $json_text }'
 		}
 		ast.Bool {
 			json_text := json2.Any(value.text.bool()).json_str()
@ -226,7 +230,7 @@ fn to_burntsushi(value ast.Value) string {
 		}
 		ast.Null {
 			json_text := json2.Any(value.text).json_str()
-			return '{ "type": "null", "value": "$json_text" }'
+			return '{ "type": "null", "value": $json_text }'
 		}
 		ast.Number {
 			if value.text.contains('inf') || value.text.contains('nan') {
@ -251,7 +255,7 @@ fn to_burntsushi(value ast.Value) string {
 			mut str := '{ '
 			for key, val in value {
 				json_key := json2.Any(key).json_str()
-				str += ' "$json_key": ${to_burntsushi(val)},'
+				str += ' $json_key: ${to_burntsushi(val)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/toml/tests/iarna.toml-spec-tests_test.v
+++ b/vlib/toml/tests/iarna.toml-spec-tests_test.v
@ -288,15 +288,15 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
 		ast.Quoted {
 			json_text := json2.Any(value.text).json_str()
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
-			return '{ "type": "string", "value": "$json_text" }'
+			return '{ "type": "string", "value": $json_text }'
 		}
 		ast.DateTime {
 			// Normalization for json
 			mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ',
 				'T')
-			typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-')
+			typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-')
 				|| json_text.all_after('T').contains('+') {
 				'datetime'
 			} else {
@ -306,40 +306,41 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
 			// It seems it's implementation specific how time and
 			// date-time values are represented in detail. For now we follow the BurntSushi format
 			// that expands to 6 digits which is also a valid RFC 3339 representation.
-			json_text = to_iarna_time(json_text)
+			json_text = to_iarna_time(json_text[1..json_text.len - 1])
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
 			return '{ "type": "$typ", "value": "$json_text" }'
 		}
 		ast.Date {
 			json_text := json2.Any(value.text).json_str()
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
-			return '{ "type": "date", "value": "$json_text" }'
+			return '{ "type": "date", "value": $json_text }'
 		}
 		ast.Time {
 			mut json_text := json2.Any(value.text).json_str()
-			json_text = to_iarna_time(json_text)
+			// NB: Removes the quotes of the encoded JSON string - Ned
 			json_text = to_iarna_time(json_text[1..json_text.len - 1])
 			if skip_value_map {
-				return '"$json_text"'
+				return json_text
 			}
 			return '{ "type": "time", "value": "$json_text" }'
 		}
 		ast.Bool {
 			json_text := json2.Any(value.text.bool()).json_str()
 			if skip_value_map {
-				return '$json_text'
+				return json_text
 			}
 			return '{ "type": "bool", "value": "$json_text" }'
 		}
 		ast.Null {
 			json_text := json2.Any(value.text).json_str()
 			if skip_value_map {
-				return '$json_text'
+				return json_text
 			}
-			return '{ "type": "null", "value": "$json_text" }'
+			return '{ "type": "null", "value": $json_text }'
 		}
 		ast.Number {
 			if value.text.contains('inf') {
@ -384,7 +385,7 @@ fn to_iarna(value ast.Value, skip_value_map bool) string {
 			mut str := '{ '
 			for key, val in value {
 				json_key := json2.Any(key).json_str()
-				str += ' "$json_key": ${to_iarna(val, skip_value_map)},'
+				str += ' $json_key: ${to_iarna(val, skip_value_map)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/toml/to/to.v
+++ b/vlib/toml/to/to.v
@ -27,19 +27,16 @@ fn any_to_json(a toml.Any) string {
 			return 'null'
 		}
 		toml.DateTime {
-			json_text := json2.Any(a.str())
+			return json2.Any(a.str()).json_str()
 			return '"$json_text.json_str()"'
 		}
 		toml.Date {
-			json_text := json2.Any(a.str())
+			return json2.Any(a.str()).json_str()
 			return '"$json_text.json_str()"'
 		}
 		toml.Time {
-			json_text := json2.Any(a.str())
+			return json2.Any(a.str()).json_str()
 			return '"$json_text.json_str()"'
 		}
 		string {
-			return '"' + json2.Any(a.str()).json_str() + '"'
+			return json2.Any(a.str()).json_str()
 		}
 		bool {
 			return json2.Any(bool(a)).json_str()
@ -63,7 +60,7 @@ fn any_to_json(a toml.Any) string {
 			mut str := '{'
 			for key, val in a {
 				json_key := json2.Any(key)
-				str += ' "$json_key.json_str()": ${any_to_json(val)},'
+				str += ' $json_key.json_str(): ${any_to_json(val)},'
 			}
 			str = str.trim_right(',')
 			str += ' }'
--- a/vlib/x/json2/encoder.v
+++ b/vlib/x/json2/encoder.v
@ -3,54 +3,125 @@
 // that can be found in the LICENSE file.
 module json2
 import io
 import strings
-fn write_value(v Any, i int, len int, mut wr strings.Builder) {
+// Encoder encodes the an `Any` type into JSON representation.
-	str := v.json_str()
+// It provides parameters in order to change the end result.
-	if v is string {
+pub struct Encoder {
-		wr.write_string('"$str"')
+	newline              byte
-	} else {
+	newline_spaces_count int
-		wr.write_string(str)
+	escape_unicode       bool = true
 	}
 	if i >= len - 1 {
 		return
 	}
 	wr.write_byte(`,`)
 }
-// str returns the string representation of the `map[string]Any`.
+// byte array versions of the most common tokens/chars
-[manualfree]
+// to avoid reallocations
-pub fn (flds map[string]Any) str() string {
+const null_in_bytes = 'null'.bytes()
-	mut wr := strings.new_builder(200)
+
-	wr.write_byte(`{`)
+const true_in_bytes = 'true'.bytes()
-	mut i := 0
+
-	for k, v in flds {
+const false_in_bytes = 'false'.bytes()
-		wr.write_string('"$k":')
+
-		write_value(v, i, flds.len, mut wr)
+const zero_in_bytes = [byte(`0`)]
-		i++
+
-	}
+const comma_bytes = [byte(`,`)]
-	wr.write_byte(`}`)
+
-	defer {
+const colon_bytes = [byte(`:`)]
-		unsafe { wr.free() }
+
-	}
+const space_bytes = [byte(` `)]
-	res := wr.str()
+
-	return res
+const unicode_escape_chars = [byte(`\\`), `u`]
 const quote_bytes = [byte(`"`)]
 const escaped_chars = [(r'\b').bytes(), (r'\f').bytes(), (r'\n').bytes(),
 	(r'\r').bytes(), (r'\t').bytes()]
 // encode_value encodes an `Any` value to the specific writer.
 pub fn (e &Encoder) encode_value(f Any, mut wr io.Writer) ? {
 	e.encode_value_with_level(f, 1, mut wr) ?
 }
-// str returns the string representation of the `[]Any`.
+fn (e &Encoder) encode_newline(level int, mut wr io.Writer) ? {
-[manualfree]
+	if e.newline != 0 {
-pub fn (flds []Any) str() string {
+		wr.write([e.newline]) ?
-	mut wr := strings.new_builder(200)
+		for j := 0; j < level * e.newline_spaces_count; j++ {
-	wr.write_byte(`[`)
+			wr.write(json2.space_bytes) ?
-	for i, v in flds {
+		}
 		write_value(v, i, flds.len, mut wr)
 	}
-	wr.write_byte(`]`)
+}
-	defer {
+
-		unsafe { wr.free() }
+fn (e &Encoder) encode_value_with_level(f Any, level int, mut wr io.Writer) ? {
 	match f {
 		string {
 			e.encode_string(f, mut wr) ?
 		}
 		bool {
 			if f == true {
 				wr.write(json2.true_in_bytes) ?
 			} else {
 				wr.write(json2.false_in_bytes) ?
 			}
 		}
 		int, u64, i64 {
 			wr.write(f.str().bytes()) ?
 		}
 		f32, f64 {
 			$if !nofloat ? {
 				str_float := f.str().bytes()
 				wr.write(str_float) ?
 				if str_float[str_float.len - 1] == `.` {
 					wr.write(json2.zero_in_bytes) ?
 				}
 				return
 			}
 			wr.write(json2.zero_in_bytes) ?
 		}
 		map[string]Any {
 			wr.write([byte(`{`)]) ?
 			mut i := 0
 			for k, v in f {
 				e.encode_newline(level, mut wr) ?
 				e.encode_string(k, mut wr) ?
 				wr.write(json2.colon_bytes) ?
 				if e.newline != 0 {
 					wr.write(json2.space_bytes) ?
 				}
 				e.encode_value_with_level(v, level + 1, mut wr) ?
 				if i < f.len - 1 {
 					wr.write(json2.comma_bytes) ?
 				}
 				i++
 			}
 			e.encode_newline(level - 1, mut wr) ?
 			wr.write([byte(`}`)]) ?
 		}
 		[]Any {
 			wr.write([byte(`[`)]) ?
 			for i, v in f {
 				e.encode_newline(level, mut wr) ?
 				e.encode_value_with_level(v, level + 1, mut wr) ?
 				if i < f.len - 1 {
 					wr.write(json2.comma_bytes) ?
 				}
 			}
 			e.encode_newline(level - 1, mut wr) ?
 			wr.write([byte(`]`)]) ?
 		}
 		Null {
 			wr.write(json2.null_in_bytes) ?
 		}
 	}
-	res := wr.str()
+}
-	return res
+
 // str returns the JSON string representation of the `map[string]Any` type.
 pub fn (f map[string]Any) str() string {
 	return Any(f).json_str()
 }
 // str returns the JSON string representation of the `[]Any` type.
 pub fn (f []Any) str() string {
 	return Any(f).json_str()
 }
 // str returns the string representation of the `Any` type. Use the `json_str` method
@ -64,113 +135,102 @@ pub fn (f Any) str() string {
 }
 // json_str returns the JSON string representation of the `Any` type.
 pub fn (f Any) json_str() string {
 	match f {
 		string {
 			return json_string(f)
 		}
 		bool, int, u64, i64 {
 			return f.str()
 		}
 		f32 {
 			$if !nofloat ? {
 				str_f32 := f.str()
 				if str_f32.ends_with('.') {
 					return '${str_f32}0'
 				}
 				return str_f32
 			}
 			return '0'
 		}
 		f64 {
 			$if !nofloat ? {
 				str_f64 := f.str()
 				if str_f64.ends_with('.') {
 					return '${str_f64}0'
 				}
 				return str_f64
 			}
 			return '0'
 		}
 		map[string]Any {
 			return f.str()
 		}
 		[]Any {
 			return f.str()
 		}
 		Null {
 			return 'null'
 		}
 	}
 }
 // char_len_list is a modified version of builtin.utf8_str_len
 // that returns an array of character lengths. (e.g "t✔" => [1,2])
 fn char_len_list(s string) []int {
 	mut l := 1
 	mut ls := []int{}
 	for i := 0; i < s.len; i++ {
 		c := s[i]
 		if (c & (1 << 7)) != 0 {
 			for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
 				l++
 				i++
 			}
 		}
 		ls << l
 		l = 1
 	}
 	return ls
 }
 const escaped_chars = [r'\b', r'\f', r'\n', r'\r', r'\t']
 // json_string returns the JSON spec-compliant version of the string.
 [manualfree]
-fn json_string(s string) string {
+pub fn (f Any) json_str() string {
-	// not the best implementation but will revisit it soon
+	mut sb := strings.new_builder(4096)
 	char_lens := char_len_list(s)
 	mut sb := strings.new_builder(s.len)
 	mut i := 0
 	defer {
-		unsafe {
+		unsafe { sb.free() }
-			char_lens.free()
+	}
-			// freeing string builder on defer after
+	mut enc := Encoder{}
-			// returning .str() still isn't working :(
+	enc.encode_value(f, mut sb) or { return '' }
-			// sb.free()
+	return sb.str()
 }
 // prettify_json_str returns the pretty-formatted JSON string representation of the `Any` type.
 [manualfree]
 pub fn (f Any) prettify_json_str() string {
 	mut sb := strings.new_builder(4096)
 	defer {
 		unsafe { sb.free() }
 	}
 	mut enc := Encoder{
 		newline: `\n`
 		newline_spaces_count: 4
 	}
 	enc.encode_value(f, mut sb) or { return '' }
 	return sb.str()
 }
 // CharLengthIterator is an iterator that generates a char
 // length value of every iteration based on the given text.
 // (e.g.: "t✔" => [t => 1, ✔ => 2])
 struct CharLengthIterator {
 	text string
 mut:
 	idx int
 }
 fn (mut iter CharLengthIterator) next() ?int {
 	if iter.idx >= iter.text.len {
 		return none
 	}
 	defer {
 		iter.idx++
 	}
 	mut len := 1
 	c := iter.text[iter.idx]
 	if (c & (1 << 7)) != 0 {
 		for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
 			len++
 			iter.idx++
 		}
 	}
 	return len
 }
 // encode_string returns the JSON spec-compliant version of the string.
 [manualfree]
 fn (e &Encoder) encode_string(s string, mut wr io.Writer) ? {
 	mut char_lens := CharLengthIterator{
 		text: s
 	}
 	mut i := 0
 	wr.write(json2.quote_bytes) ?
 	for char_len in char_lens {
 		if char_len == 1 {
 			chr := s[i]
 			if chr in important_escapable_chars {
 				for j := 0; j < important_escapable_chars.len; j++ {
 					if chr == important_escapable_chars[j] {
-						sb.write_string(json2.escaped_chars[j])
+						wr.write(json2.escaped_chars[j]) ?
 						break
 					}
 				}
 			} else if chr == `"` || chr == `/` || chr == `\\` {
-				sb.write_string('\\' + chr.ascii_str())
+				wr.write([byte(`\\`), chr]) ?
 			} else if int(chr) < 0x20 {
-				hex_code := chr.hex()
+				hex_code := chr.hex().bytes()
-				sb.write_string('\\u00$hex_code')
+				wr.write(json2.unicode_escape_chars) ? // \u
 				wr.write(json2.zero_in_bytes) ? // \u0
 				wr.write(json2.zero_in_bytes) ? // \u00
 				wr.write(hex_code) ? // \u00xxxx
 			} else {
-				sb.write_byte(chr)
+				wr.write([byte(chr)]) ?
 			}
 		} else {
 			slice := s[i..i + char_len]
-			hex_code := slice.utf32_code().hex()
+			hex_code := slice.utf32_code().hex().bytes()
-			if hex_code.len < 4 {
+			if !e.escape_unicode || hex_code.len < 4 {
-				// an utf8 codepoint
+				// unescaped non-ASCII char
-				sb.write_string(slice)
+				wr.write(slice.bytes()) ?
 			} else if hex_code.len == 4 {
-				sb.write_string('\\u$hex_code')
+				// a unicode endpoint
 				wr.write(json2.unicode_escape_chars) ?
 				wr.write(hex_code) ?
 			} else {
 				// TODO: still figuring out what
 				// to do with more than 4 chars
-				sb.write_byte(` `)
+				wr.write(json2.space_bytes) ?
 			}
 			unsafe {
 				slice.free()
@ -179,7 +239,6 @@ fn json_string(s string) string {
 		}
 		i += char_len
 	}
-	str := sb.str()
+
-	unsafe { sb.free() }
+	wr.write(json2.quote_bytes) ?
 	return str
 }
--- a/vlib/x/json2/encoder_test.v
+++ b/vlib/x/json2/encoder_test.v
@ -1,20 +1,21 @@
 import x.json2
 import strings
 fn test_json_string_characters() {
 	text := json2.raw_decode(r'"\n\r\b\f\t\\\"\/"') or { '' }
-	assert text.json_str() == '\\n\\r\\b\\f\\t\\\\\\"\\/'
+	assert text.json_str() == '"\\n\\r\\b\\f\\t\\\\\\"\\/"'
 }
 fn test_json_escape_low_chars() {
 	esc := '\u001b'
 	assert esc.len == 1
 	text := json2.Any(esc)
-	assert text.json_str() == r'\u001b'
+	assert text.json_str() == r'"\u001b"'
 }
 fn test_json_string() {
 	text := json2.Any('te✔st')
-	assert text.json_str() == r'te\u2714st'
+	assert text.json_str() == r'"te\u2714st"'
 	boolean := json2.Any(true)
 	assert boolean.json_str() == 'true'
 	integer := json2.Any(int(-5))
@ -27,12 +28,12 @@ fn test_json_string() {
 fn test_json_string_emoji() {
 	text := json2.Any('🐈')
-	assert text.json_str() == r' '
+	assert text.json_str() == r'" "'
 }
 fn test_json_string_non_ascii() {
 	text := json2.Any('ひらがな')
-	assert text.json_str() == r'\u3072\u3089\u304c\u306a'
+	assert text.json_str() == r'"\u3072\u3089\u304c\u306a"'
 }
 fn test_utf8_strings_are_not_modified() ? {
@ -42,3 +43,48 @@ fn test_utf8_strings_are_not_modified() ? {
 	// dump(deresult)
 	assert deresult.str() == original
 }
 fn test_encoder_unescaped_utf32() ? {
 	jap_text := json2.Any('ひらがな')
 	enc := json2.Encoder{
 		escape_unicode: false
 	}
 	mut sb := strings.new_builder(20)
 	enc.encode_value(jap_text, mut sb) ?
 	assert sb.str() == '"$jap_text"'
 	sb.go_back_to(0)
 	emoji_text := json2.Any('🐈')
 	enc.encode_value(emoji_text, mut sb) ?
 	assert sb.str() == '"$emoji_text"'
 }
 fn test_encoder_prettify() ? {
 	obj := {
 		'hello': json2.Any('world')
 		'arr':   [json2.Any('im a string'), [json2.Any('3rd level')]]
 		'obj':   {
 			'map': json2.Any('map inside a map')
 		}
 	}
 	enc := json2.Encoder{
 		newline: `\n`
 		newline_spaces_count: 2
 	}
 	mut sb := strings.new_builder(20)
 	enc.encode_value(obj, mut sb) ?
 	assert sb.str() == '{
  "hello": "world",
  "arr": [
    "im a string",
    [
      "3rd level"
    ]
  ],
  "obj": {
    "map": "map inside a map"
  }
 }'
 }