x.json2: proper string encoding + minor fixes (#9026)
parent
506041a15b
commit
ee879f3e41
|
@ -6,7 +6,7 @@ module json2
|
||||||
import strings
|
import strings
|
||||||
|
|
||||||
fn write_value(v Any, i int, len int, mut wr strings.Builder) {
|
fn write_value(v Any, i int, len int, mut wr strings.Builder) {
|
||||||
str := v.str()
|
str := v.json_str()
|
||||||
if v is string {
|
if v is string {
|
||||||
wr.write_string('"$str"')
|
wr.write_string('"$str"')
|
||||||
} else {
|
} else {
|
||||||
|
@ -51,11 +51,21 @@ pub fn (flds []Any) str() string {
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
// str returns the string representation of the `Any` type.
|
// str returns the string representation of the `Any` type. Use the `json_str` method
|
||||||
|
// if you want to use the escaped str() version of the `Any` type.
|
||||||
pub fn (f Any) str() string {
|
pub fn (f Any) str() string {
|
||||||
|
if f is string {
|
||||||
|
return f
|
||||||
|
} else {
|
||||||
|
return f.json_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// json_str returns the JSON string representation of the `Any` type.
|
||||||
|
pub fn (f Any) json_str() string {
|
||||||
match f {
|
match f {
|
||||||
string {
|
string {
|
||||||
return f
|
return json_string(f)
|
||||||
}
|
}
|
||||||
int {
|
int {
|
||||||
return f.str()
|
return f.str()
|
||||||
|
@ -85,3 +95,76 @@ pub fn (f Any) str() string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// char_len_list is a modified version of builtin.utf8_str_len
|
||||||
|
// that returns an array of character lengths. (e.g "t✔" => [1,2])
|
||||||
|
fn char_len_list(s string) []int {
|
||||||
|
mut l := 1
|
||||||
|
mut ls := []int{}
|
||||||
|
for i := 0; i < s.len; i++ {
|
||||||
|
c := s[i]
|
||||||
|
if (c & (1 << 7)) != 0 {
|
||||||
|
for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
|
||||||
|
l++
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ls << l
|
||||||
|
l = 1
|
||||||
|
}
|
||||||
|
return ls
|
||||||
|
}
|
||||||
|
|
||||||
|
const escaped_chars = [r'\b', r'\f', r'\n', r'\r', r'\t']
|
||||||
|
|
||||||
|
// json_string returns the JSON spec-compliant version of the string.
|
||||||
|
[manualfree]
|
||||||
|
fn json_string(s string) string {
|
||||||
|
// not the best implementation but will revisit it soon
|
||||||
|
char_lens := char_len_list(s)
|
||||||
|
mut sb := strings.new_builder(s.len)
|
||||||
|
mut i := 0
|
||||||
|
defer {
|
||||||
|
unsafe {
|
||||||
|
char_lens.free()
|
||||||
|
// freeing string builder on defer after
|
||||||
|
// returning .str() still isn't working :(
|
||||||
|
// sb.free()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for char_len in char_lens {
|
||||||
|
if char_len == 1 {
|
||||||
|
chr := s[i]
|
||||||
|
if chr in json2.important_escapable_chars {
|
||||||
|
for j := 0 ; j < json2.important_escapable_chars.len; j++ {
|
||||||
|
if chr == json2.important_escapable_chars[j] {
|
||||||
|
sb.write_string(escaped_chars[j])
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if chr == `"` || chr == `/` || chr == `\\` {
|
||||||
|
sb.write_string('\\' + chr.ascii_str())
|
||||||
|
} else {
|
||||||
|
sb.write_b(chr)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
slice := s[i .. i + char_len]
|
||||||
|
hex_code := slice.utf32_code().hex()
|
||||||
|
if hex_code.len == 4 {
|
||||||
|
sb.write_string('\\u$hex_code')
|
||||||
|
} else {
|
||||||
|
// TODO: still figuring out what
|
||||||
|
// to do with more than 4 chars
|
||||||
|
sb.write_b(` `)
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
slice.free()
|
||||||
|
hex_code.free()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += char_len
|
||||||
|
}
|
||||||
|
str := sb.str()
|
||||||
|
unsafe { sb.free() }
|
||||||
|
return str
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
import x.json2
|
||||||
|
|
||||||
|
fn test_json_string_characters() {
|
||||||
|
text := json2.raw_decode(r'"\n\r\b\f\t\\\"\/"') or { '' }
|
||||||
|
assert text.json_str() == '\\n\\r\\b\\f\\t\\\\\\"\\/'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_json_string() {
|
||||||
|
text := json2.Any('te✔st')
|
||||||
|
assert text.json_str() == r'te\u2714st'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_json_string_emoji() {
|
||||||
|
text := json2.Any('🐈')
|
||||||
|
assert text.json_str() == r' '
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_json_string_non_ascii() {
|
||||||
|
text := json2.Any('ひらがな')
|
||||||
|
assert text.json_str() == r'\u3072\u3089\u304c\u306a'
|
||||||
|
}
|
|
@ -41,12 +41,12 @@ const (
|
||||||
// list of characters commonly used in JSON.
|
// list of characters commonly used in JSON.
|
||||||
char_list = [`{`, `}`, `[`, `]`, `,`, `:`]
|
char_list = [`{`, `}`, `[`, `]`, `,`, `:`]
|
||||||
// list of newlines to check when moving to a new position.
|
// list of newlines to check when moving to a new position.
|
||||||
newlines = [`\r`, `\n`, byte(9), `\t`]
|
newlines = [`\r`, `\n`, `\t`]
|
||||||
// list of escapable that needs to be escaped inside a JSON string.
|
// list of escapable that needs to be escaped inside a JSON string.
|
||||||
// double quotes and forward slashes are excluded intentionally since
|
// double quotes and forward slashes are excluded intentionally since
|
||||||
// they have their own separate checks for it in order to pass the
|
// they have their own separate checks for it in order to pass the
|
||||||
// JSON test suite (https://github.com/nst/JSONTestSuite/).
|
// JSON test suite (https://github.com/nst/JSONTestSuite/).
|
||||||
important_escapable_chars = [byte(9), 10, 0, `\b`, `\f`, `\n`, `\r`, `\t`]
|
important_escapable_chars = [`\b`, `\f`, `\n`, `\r`, `\t`]
|
||||||
// list of valid unicode escapes aside from \u{4-hex digits}
|
// list of valid unicode escapes aside from \u{4-hex digits}
|
||||||
valid_unicode_escapes = [`b`, `f`, `n`, `r`, `t`, `\\`, `"`, `/`]
|
valid_unicode_escapes = [`b`, `f`, `n`, `r`, `t`, `\\`, `"`, `/`]
|
||||||
// used for transforming escapes into valid unicode (eg. n => \n)
|
// used for transforming escapes into valid unicode (eg. n => \n)
|
||||||
|
@ -129,7 +129,7 @@ fn (mut s Scanner) text_scan() Token {
|
||||||
} else if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`)
|
} else if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`)
|
||||||
&& ch in json2.important_escapable_chars {
|
&& ch in json2.important_escapable_chars {
|
||||||
return s.error('character must be escaped with a backslash')
|
return s.error('character must be escaped with a backslash')
|
||||||
} else if s.pos == s.text.len - 1 && ch == `\\` {
|
} else if (s.pos == s.text.len - 1 && ch == `\\`) || ch == byte(0) {
|
||||||
return s.error('invalid backslash escape')
|
return s.error('invalid backslash escape')
|
||||||
} else if s.pos + 1 < s.text.len && ch == `\\` {
|
} else if s.pos + 1 < s.text.len && ch == `\\` {
|
||||||
peek := s.text[s.pos + 1]
|
peek := s.text[s.pos + 1]
|
||||||
|
@ -154,8 +154,15 @@ fn (mut s Scanner) text_scan() Token {
|
||||||
if codepoint.len != 4 {
|
if codepoint.len != 4 {
|
||||||
return s.error('unicode escape must have 4 hex digits')
|
return s.error('unicode escape must have 4 hex digits')
|
||||||
}
|
}
|
||||||
chrs << byte(strconv.parse_uint(codepoint.bytestr(), 16, 32))
|
val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32))
|
||||||
unsafe { codepoint.free() }
|
converted := utf32_to_str(val)
|
||||||
|
converted_bytes := converted.bytes()
|
||||||
|
chrs << converted_bytes
|
||||||
|
unsafe {
|
||||||
|
converted.free()
|
||||||
|
converted_bytes.free()
|
||||||
|
codepoint.free()
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
return s.error('incomplete unicode escape')
|
return s.error('incomplete unicode escape')
|
||||||
|
|
|
@ -20,6 +20,16 @@ fn test_str_valid_unicode_escape() {
|
||||||
assert tok.lit.bytestr() == 'H'
|
assert tok.lit.bytestr() == 'H'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn test_str_valid_unicode_escape_2() {
|
||||||
|
mut sc := Scanner{
|
||||||
|
text: r'"\u2714"'.bytes()
|
||||||
|
}
|
||||||
|
tok := sc.scan()
|
||||||
|
assert tok.kind == .str_
|
||||||
|
assert tok.lit.len == 3
|
||||||
|
assert tok.lit.bytestr() == '✔'
|
||||||
|
}
|
||||||
|
|
||||||
fn test_str_invalid_escape() {
|
fn test_str_invalid_escape() {
|
||||||
mut sc := Scanner{
|
mut sc := Scanner{
|
||||||
text: r'"\z"'.bytes()
|
text: r'"\z"'.bytes()
|
||||||
|
|
Loading…
Reference in New Issue