x.json2: create custom scanner for scanning JSON (#8716)

2021-02-26 14:36:02 +08:00 · 2021-02-26 14:36:02 +08:00 · 8dff168e01
parent 7bee3dc489
commit 8dff168e01
8 changed files with 730 additions and 372 deletions
--- a/vlib/x/json2/any_test.v
+++ b/vlib/x/json2/any_test.v
@ -1,7 +1,7 @@
 import x.json2
 const (
-	sample_data = {
+	sample_data = map{
 		'int':  json2.Any(int(1))
 		'i64':  json2.Any(i64(128))
 		'f32':  json2.Any(f32(2.0))
@ -10,7 +10,7 @@ const (
 		'str':  json2.Any('test')
 		'null': json2.Any(json2.null)
 		'arr':  json2.Any([json2.Any('lol')])
-		'obj': json2.Any({
+		'obj':  json2.Any(map{
 			'foo': json2.Any(10)
 		})
 	}
@ -126,6 +126,5 @@ fn test_str() {
 	assert sample_data['str'].str() == 'test'
 	assert sample_data['null'].str() == 'null'
 	assert sample_data['arr'].str() == '["lol"]'
-	assert sample_data.str() ==
+	assert sample_data.str() == '{"int":1,"i64":128,"f32":2.0,"f64":1.283,"bool":false,"str":"test","null":null,"arr":["lol"],"obj":{"foo":10}}'
 		'{"int":1,"i64":128,"f32":2.0,"f64":1.283,"bool":false,"str":"test","null":null,"arr":["lol"],"obj":{"foo":10}}'
 }
--- a/vlib/x/json2/decoder.v
+++ b/vlib/x/json2/decoder.v
@ -3,42 +3,20 @@
 // that can be found in the LICENSE file.
 module json2
 import strings
 import strconv
 import v.scanner
 import v.token
 import v.util
 import v.pref
 // `Any` is a sum type that lists the possible types to be decoded and used.
-pub type Any = string | int | i64 | f32 | f64 | bool | Null | []Any | map[string]Any
+pub type Any = Null | []Any | bool | f32 | f64 | i64 | int | map[string]Any | string
 // `Null` struct is a simple representation of the `null` value in JSON.
 pub struct Null {
 	is_null bool = true
 }
 enum ParseMode {
 	array
 	bool
 	invalid
 	null
 	number
 	object
 	string
 }
 const (
 	formfeed_err = 'formfeed not allowed.'
 	eof_err      = 'reached eof. data not closed properly.'
 )
 struct Parser {
 mut:
-	scanner      &scanner.Scanner
+	scanner      &Scanner
-	p_tok        token.Token
+	p_tok        Token
-	tok          token.Token
+	tok          Token
-	n_tok        token.Token
+	n_tok        Token
 	mode         ParseMode = .invalid
 	n_level      int
 	convert_type bool = true
 }
@ -49,131 +27,63 @@ fn (mut p Parser) next() {
 	p.n_tok = p.scanner.scan()
 }
-fn (p Parser) emit_error(msg string) string {
+fn (mut p Parser) next_with_err() ? {
-	source := p.scanner.text
+	p.next()
-	cur := p.tok
+	if p.tok.kind == .error {
-	mut pp := util.imax(0, util.imin(source.len - 1, cur.pos))
+		return error(p.emit_error(p.tok.lit.bytestr()))
 	if source.len > 0 {
 		for pp >= 0 {
 			if source[pp] !in [`\r`, `\n`] {
 				pp--
 				continue
 	}
 			break
 		}
 	}
 	column := util.imax(0, cur.pos - pp + cur.len - 1)
 	line := cur.line_nr
 	return '[json] $msg ($line:$column)'
 }
-fn new_parser(srce string, convert_type bool) Parser {
+fn (p Parser) emit_error(msg string) string {
-	mut src := srce
+	line := p.tok.line
-	// from v/util/util.v
+	column := p.tok.col + p.tok.lit.len
-	if src.len >= 3 {
+	return '[x.json2] $msg ($line:$column)'
-		c_text := src.str
+}
 // TODO: copied from v.util to avoid the entire module and its functions
 // from being imported. remove later once -skip-unused is enabled by default.
 fn skip_bom(file_content string) string {
 	mut raw_text := file_content
 	// BOM check
 	if raw_text.len >= 3 {
 		unsafe {
 			c_text := raw_text.str
 			if c_text[0] == 0xEF && c_text[1] == 0xBB && c_text[2] == 0xBF {
 				// skip three BOM bytes
 				offset_from_begin := 3
-				src = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
+				raw_text = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
 			}
 		}
 	}
 	return raw_text
 }
 fn new_parser(srce string, convert_type bool) Parser {
 	src := skip_bom(srce)
 	return Parser{
-		scanner: scanner.new_scanner(src, .parse_comments, &pref.Preferences{output_mode: .silent})
+		scanner: &Scanner{
 			text: src.bytes()
 		}
 		convert_type: convert_type
 	}
 }
 fn check_valid_hex(str string) ? {
 	if str.len != 4 {
 		return error('hex string must be 4 characters.')
 	}
 	for l in str {
 		if l.is_hex_digit() {
 			continue
 		}
 		return error('provided string is not a hex digit.')
 	}
 }
 fn (mut p Parser) decode() ?Any {
-	p.detect_parse_mode()
+	p.next()
-	if p.mode == .invalid {
+	p.next_with_err() ?
-		return error(p.emit_error('invalid JSON.'))
+	fi := p.decode_value() ?
 	}
 	fi := p.decode_value() or {
 		return error(p.emit_error(err))
 	}
 	if p.tok.kind != .eof {
-		return error(p.emit_error('unknown token `$p.tok.kind`.'))
+		return error(p.emit_error('invalid token `$p.tok.kind`'))
 	}
 	return fi
 }
 fn (p Parser) is_formfeed() bool {
 	prev_tok_pos := p.p_tok.pos + p.p_tok.len - 2
 	if prev_tok_pos < p.scanner.text.len && p.scanner.text[prev_tok_pos] == 0x0c {
 		return true
 	}
 	return false
 }
 fn (p Parser) is_singlequote() bool {
 	src := p.scanner.text
 	prev_tok_pos := p.p_tok.pos + p.p_tok.len
 	return src[prev_tok_pos] == `\'`
 }
 fn (mut p Parser) detect_parse_mode() {
 	src := p.scanner.text
 	if src.len > 1 && src[0].is_digit() && !src[1].is_digit() {
 		p.mode = .invalid
 		return
 	}
 	p.tok = p.scanner.scan()
 	p.n_tok = p.scanner.scan()
 	if src.len == 1 && p.tok.kind == .string && p.n_tok.kind == .eof {
 		p.mode = .invalid
 		return
 	}
 	match p.tok.kind {
 		.lcbr {
 			p.mode = .object
 		}
 		.lsbr {
 			p.mode = .array
 		}
 		.number {
 			p.mode = .number
 		}
 		.key_true, .key_false {
 			p.mode = .bool
 		}
 		.string {
 			p.mode = .string
 		}
 		.name {
 			if p.tok.lit == 'null' {
 				p.mode = .null
 			}
 		}
 		.minus {
 			if p.n_tok.kind == .number {
 				p.mode = .number
 			}
 		}
 		else {}
 	}
 }
 fn (mut p Parser) decode_value() ?Any {
 	if p.n_level == 500 {
-		return error('reached maximum nesting level of 500.')
+		return error(p.emit_error('reached maximum nesting level of 500'))
 	}
-	if (p.tok.kind == .lsbr && p.n_tok.kind == .lcbr) ||
+	if (p.tok.kind == .lsbr && p.n_tok.kind == .lcbr)
-		(p.p_tok.kind == p.tok.kind && p.tok.kind == .lsbr) {
+		|| (p.p_tok.kind == p.tok.kind && p.tok.kind == .lsbr) {
 		p.n_level++
 	}
 	match p.tok.kind {
@ -183,235 +93,76 @@ fn (mut p Parser) decode_value() ?Any {
 		.lcbr {
 			return p.decode_object()
 		}
-		.number {
+		.int_, .float {
-			return p.decode_number()
+			tl := p.tok.lit.bytestr()
 			kind := p.tok.kind
 			p.next_with_err() ?
 			if p.convert_type {
 				return if kind == .float { Any(tl.f64()) } else { Any(tl.i64()) }
 			}
-		.key_true {
+			return Any(tl)
 			p.next()
 			return if p.convert_type {
 				Any(true)
 			} else {
 				Any('true')
 		}
 		.bool_ {
 			lit := p.tok.lit.bytestr()
 			p.next_with_err() ?
 			return if p.convert_type { Any(lit.bool()) } else { Any(lit) }
 		}
-		.key_false {
+		.null {
-			p.next()
+			p.next_with_err() ?
-			return if p.convert_type {
+			return if p.convert_type { Any(null) } else { Any('null') }
 				Any(false)
 			} else {
 				Any('false')
 		}
-		}
+		.str_ {
-		.name {
+			str := p.tok.lit.bytestr()
-			if p.tok.lit != 'null' {
+			p.next_with_err() ?
-				return error('unknown identifier `$p.tok.lit`')
+			return Any(str)
 			}
 			p.next()
 			return if p.convert_type {
 				Any(Null{})
 			} else {
 				Any('null')
 			}
 		}
 		.string {
 			if p.is_singlequote() {
 				return error('strings must be in double-quotes.')
 			}
 			return p.decode_string()
 		}
 		else {
-			if p.tok.kind == .minus && p.n_tok.kind == .number && p.n_tok.pos == p.tok.pos + 1 {
+			return error(p.emit_error('invalid token `$p.tok.kind`'))
 				p.next()
 				d_num := p.decode_number() ?
 				return d_num
 		}
 			return error("unknown token '$p.tok.lit' when decoding value")
 		}
 	}
 	if p.is_formfeed() {
 		return error(formfeed_err)
 	}
 	return Any{}
 }
 fn (mut p Parser) decode_string() ?Any {
 	mut strwr := strings.new_builder(200)
 	for i := 0; i < p.tok.lit.len; i++ {
 		if ((i - 1 >= 0 && p.tok.lit[i - 1] != `/`) || i == 0) && int(p.tok.lit[i]) in [9, 10, 0] {
 			return error('character must be escaped with a backslash.')
 		}
 		if i == p.tok.lit.len - 1 && p.tok.lit[i] == 92 {
 			return error('invalid backslash escape.')
 		}
 		if i + 1 < p.tok.lit.len && p.tok.lit[i] == 92 {
 			peek := p.tok.lit[i + 1]
 			match peek {
 				`b` {
 					i++
 					strwr.write_b(`\b`)
 					continue
 				}
 				`f` {
 					i++
 					strwr.write_b(`\f`)
 					continue
 				}
 				`n` {
 					i++
 					strwr.write_b(`\n`)
 					continue
 				}
 				`r` {
 					i++
 					strwr.write_b(`\r`)
 					continue
 				}
 				`t` {
 					i++
 					strwr.write_b(`\t`)
 					continue
 				}
 				`u` {
 					if i + 5 < p.tok.lit.len {
 						codepoint := p.tok.lit[i + 2..i + 6]
 						check_valid_hex(codepoint) ?
 						hex_val := strconv.parse_int(codepoint, 16, 0)
 						strwr.write_b(byte(hex_val))
 						i += 5
 						continue
 					} else {
 						return error('incomplete unicode escape.')
 					}
 				}
 				`\\` {
 					i++
 					strwr.write_b(`\\`)
 					continue
 				}
 				`"` {
 					i++
 					strwr.write_b(`\"`)
 					continue
 				}
 				`/` {
 					i++
 					strwr.write_b(`/`)
 					continue
 				}
 				else { return error('invalid backslash escape.') }
 			}
 			if int(peek) == 85 {
 				return error('unicode endpoints must be in lowercase `u`.')
 			}
 			if int(peek) in [9, 229] {
 				return error('unicode endpoint not allowed.')
 			}
 		}
 		strwr.write_b(p.tok.lit[i])
 	}
 	p.next()
 	defer {
 		unsafe { strwr.free() }
 	}
 	str := strwr.str()
 	return Any(str)
 }
 // now returns string instead of int or float
 fn (mut p Parser) decode_number() ?Any {
 	src := p.scanner.text
 	mut tl := p.tok.lit
 	mut is_fl := false
 	sep_by_dot := tl.to_lower().split('.')
 	if tl.starts_with('0x') && tl.all_after('0x').len <= 2 {
 		return error('hex numbers should not be less than or equal to two digits.')
 	}
 	if src[p.p_tok.pos + p.p_tok.len] == `0` && src[p.p_tok.pos + p.p_tok.len + 1].is_digit() {
 		return error('leading zeroes in integers are not allowed.')
 	}
 	if tl.starts_with('.') {
 		return error('decimals must start with a digit followed by a dot.')
 	}
 	if tl.ends_with('+') || tl.ends_with('-') {
 		return error('exponents must have a digit before the sign.')
 	}
 	if sep_by_dot.len > 1 {
 		// analyze json number structure
 		// -[digit][dot][digit][E/e][-/+][digit]
 		// float number
 		is_fl = true
 		last := sep_by_dot.last()
 		if last.starts_with('e') {
 			return error('exponents must have a digit before the exponent notation.')
 		}
 	}
 	if p.p_tok.kind == .minus && p.tok.pos == p.p_tok.pos + 1 {
 		tl = '-$tl'
 	}
 	p.next()
 	if p.convert_type {
 		return if is_fl {
 			Any(tl.f64())
 		} else {
 			Any(tl.i64())
 		}
 	}
 	return Any(tl)
 }
 fn (mut p Parser) decode_array() ?Any {
 	mut items := []Any{}
-	p.next()
+	p.next_with_err() ?
 	for p.tok.kind != .rsbr {
 		if p.tok.kind == .eof {
 			return error(eof_err)
 		}
 		item := p.decode_value() ?
 		items << item
-		if p.tok.kind == .comma && p.n_tok.kind !in [.rsbr, .comma] {
+		if p.tok.kind == .comma {
-			p.next()
+			p.next_with_err() ?
-			continue
+			if p.tok.kind == .rsbr || p.tok.kind == .rcbr {
 				return error(p.emit_error('invalid token `$p.tok.lit'))
 			}
-		if p.tok.kind == .rsbr {
+		} else if p.tok.kind == .rsbr {
 			break
 		} else {
 			return error(p.emit_error("unknown token '$p.tok.lit' when decoding array."))
 		}
 		return error("unknown token '$p.tok.lit' when decoding arrays.")
 	}
-	p.next()
+	p.next_with_err() ?
 	return Any(items)
 }
 fn (mut p Parser) decode_object() ?Any {
 	mut fields := map[string]Any{}
-	mut cur_key := ''
+	p.next_with_err() ?
 	p.next()
 	for p.tok.kind != .rcbr {
-		is_key := p.tok.kind == .string && p.n_tok.kind == .colon
+		is_key := p.tok.kind == .str_ && p.n_tok.kind == .colon
 		// todo
 		// if p.is_formfeed() {
 		// return error(formfeed_err)
 		// }
 		if p.tok.kind == .eof {
 			return error(eof_err)
 		}
 		if p.is_singlequote() {
 			return error('object keys must be in single quotes.')
 		}
 		if !is_key {
-			return error("invalid token `$p.tok.lit`, expected \'string\'")
+			return error(p.emit_error('invalid token `$p.tok.kind`, expecting `str_`'))
 		}
-		cur_key = p.tok.lit
+		cur_key := p.tok.lit.bytestr()
-		p.next()
+		p.next_with_err() ?
-		p.next()
+		p.next_with_err() ?
 		fields[cur_key] = p.decode_value() ?
-		if p.tok.kind == .comma && p.n_tok.kind !in [.rcbr, .comma] {
+		if p.tok.kind == .comma {
-			p.next()
+			p.next_with_err() ?
-			continue
+			if p.tok.kind != .str_ {
-		} else if p.tok.kind == .rcbr {
+				return error(p.emit_error("unknown token '$p.tok.lit' when decoding object."))
 			break
 			}
 		return error("unknown token '$p.tok.lit' when decoding object.")
 		}
-	p.next()
+	}
 	p.next_with_err() ?
 	return Any(fields)
 }
--- a/vlib/x/json2/decoder_test.v
+++ b/vlib/x/json2/decoder_test.v
@ -54,8 +54,16 @@ fn test_raw_decode_null() {
 fn test_raw_decode_invalid() {
 	json2.raw_decode('1z') or {
-		assert err == '[json] invalid JSON. (0:0)'
+		assert err == '[x.json2] invalid token `z` (0:17)'
 		return
 	}
 	assert false
 }
 fn test_raw_decode_string_with_dollarsign() {
 	str := json2.raw_decode(r'"Hello $world"') or {
 		assert false
 		json2.Any{}
 	}
 	assert str.str() == r'Hello $world'
 }
--- a/vlib/x/json2/encoder.v
+++ b/vlib/x/json2/encoder.v
@ -65,19 +65,11 @@ pub fn (f Any) str() string {
 		}
 		f32 {
 			str_f32 := f.str()
-			return if str_f32.ends_with('.') {
+			return if str_f32.ends_with('.') { '${str_f32}0' } else { str_f32 }
 				str_f32 + '0'
 			} else {
 				str_f32
 			}
 		}
 		f64 {
 			str_f64 := f.str()
-			return if str_f64.ends_with('.') {
+			return if str_f64.ends_with('.') { '${str_f64}0' } else { str_f64 }
 				str_f64 + '0'
 			} else {
 				str_f64
 			}
 		}
 		bool {
 			return f.str()
@ -85,14 +77,11 @@ pub fn (f Any) str() string {
 		map[string]Any {
 			return f.str()
 		}
 		[]Any {
 			return f.str()
 		}
 		Null {
 			return 'null'
 		}
 		else {
 			if f is []Any {
 				return f.str()
 			}
 			return ''
 		}
 	}
 }
--- a/vlib/x/json2/json2.v
+++ b/vlib/x/json2/json2.v
@ -48,7 +48,7 @@ pub fn (f Any) as_map() map[string]Any {
 		}
 		return mp
 	}
-	return {
+	return map{
 		'0': f
 	}
 }
--- a/vlib/x/json2/json2_test.v
+++ b/vlib/x/json2/json2_test.v
@ -48,8 +48,9 @@ fn test_simple() {
 	eprintln('Employee x: $s')
 	assert s == '{"name":"Peter","age":28,"salary":95000.5,"title":2}'
 	y := json2.decode<Employee>(s) or {
 		println(err)
 		assert false
-		Employee{}
+		return
 	}
 	eprintln('Employee y: $y')
 	assert y.name == 'Peter'
@ -69,17 +70,17 @@ fn test_fast_raw_decode() {
 }
 fn test_character_unescape() {
-	// Need to test `\r`, `\b`, `\f` ??
+	message := r'{
-	message := '{
+	"newline": "new\nline",
-		"newline":"new\\nline",
+	"tab": "\ttab",
-		"tab":"\\ttab",
+	"backslash": "back\\slash",
-		"backslash": "back\\\\slash",
+	"quotes": "\"quotes\"",
 		"quotes": "\\"quotes\\"",
 	"slash":"\/dev\/null"
 }'
 	mut obj := json2.raw_decode(message) or {
 		println(err)
 		assert false
-		json2.Any{}
+		return
 	}
 	lines := obj.as_map()
 	eprintln('$lines')
@ -152,7 +153,7 @@ fn (mut u User) from_json(an json2.Any) {
 fn (u User) to_json() string {
 	// TODO: derive from field
-	mut mp := {
+	mut mp := map{
 		'age': json2.Any(u.age)
 	}
 	mp['nums'] = u.nums.map(json2.Any(it))
@ -166,13 +167,15 @@ fn (u User) to_json() string {
 fn test_parse_user() {
 	s := '{"age": 10, "nums": [1,2,3], "type": 1, "lastName": "Johnson", "IsRegistered": true, "pet_animals": {"name": "Bob", "animal": "Dog"}}'
 	u2 := json2.decode<User2>(s) or {
 		println(err)
 		assert false
-		User2{}
+		return
 	}
 	println(u2)
 	u := json2.decode<User>(s) or {
 		println(err)
 		assert false
-		User{}
+		return
 	}
 	assert u.age == 10
 	assert u.last_name == 'Johnson'
@ -249,7 +252,7 @@ fn test_struct_in_struct() {
 */
 fn test_encode_map() {
 	expected := '{"one":1,"two":2,"three":3,"four":4}'
-	numbers := {
+	numbers := map{
 		'one':   json2.Any(1)
 		'two':   json2.Any(2)
 		'three': json2.Any(3)
--- a/vlib/x/json2/scanner.v
+++ b/vlib/x/json2/scanner.v
@ -0,0 +1,288 @@
 // Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
 // Use of this source code is governed by an MIT license
 // that can be found in the LICENSE file.
 module json2
 import strconv
 struct Scanner {
 mut:
 	text []byte
 	pos  int
 	line int
 	col  int
 }
 enum TokenKind {
 	none_
 	error
 	str_
 	float
 	int_
 	null
 	bool_
 	eof
 	comma = 44
 	colon = 58
 	lsbr = 91
 	rsbr = 93
 	lcbr = 123
 	rcbr = 125
 }
 struct Token {
 	lit  []byte
 	kind TokenKind
 	line int
 	col  int
 }
 const (
 	// list of characters commonly used in JSON.
 	char_list                 = [`{`, `}`, `[`, `]`, `,`, `:`]
 	// list of newlines to check when moving to a new position.
 	newlines                  = [`\r`, `\n`, byte(9), `\t`]
 	// list of escapable that needs to be escaped inside a JSON string.
 	// double quotes and forward slashes are excluded intentionally since
 	// they have their own separate checks for it in order to pass the
 	// JSON test suite (https://github.com/nst/JSONTestSuite/).
 	important_escapable_chars = [byte(9), 10, 0, `\b`, `\f`, `\n`, `\r`, `\t`]
 	// list of valid unicode escapes aside from \u{4-hex digits}
 	valid_unicode_escapes     = [`b`, `f`, `n`, `r`, `t`, `\\`, `"`, `/`]
 	// used for transforming escapes into valid unicode (eg. n => \n)
 	unicode_transform_escapes = map{
 		98:  `\b`
 		102: `\f`
 		110: `\n`
 		114: `\r`
 		116: `\t`
 		92:  `\\`
 		34:  `"`
 		47:  `/`
 	}
 	exp_signs                 = [byte(`-`), `+`]
 )
 // move_pos proceeds to the next position.
 fn (mut s Scanner) move_pos() {
 	s.move(true, true)
 }
 // move_pos_with_newlines is the same as move_pos but only enables newline checking.
 fn (mut s Scanner) move_pos_with_newlines() {
 	s.move(false, true)
 }
 fn (mut s Scanner) move(include_space bool, include_newlines bool) {
 	s.pos++
 	if s.pos < s.text.len {
 		if include_newlines && s.text[s.pos] in json2.newlines {
 			s.line++
 			s.col = 0
 			if s.text[s.pos] == `\r` && s.pos + 1 < s.text.len && s.text[s.pos + 1] == `\n` {
 				s.pos++
 			}
 			for s.pos < s.text.len && s.text[s.pos] in json2.newlines {
 				s.move_pos()
 			}
 		} else if include_space && s.text[s.pos] == ` ` {
 			s.pos++
 			s.col++
 			for s.pos < s.text.len && s.text[s.pos] == ` ` {
 				s.move_pos()
 			}
 		}
 	} else {
 		s.col++
 	}
 }
 // error returns an error token.
 fn (s Scanner) error(description string) Token {
 	return s.tokenize(description.bytes(), .error)
 }
 // tokenize returns a token based on the given lit and kind.
 fn (s Scanner) tokenize(lit []byte, kind TokenKind) Token {
 	return Token{
 		lit: lit
 		kind: kind
 		col: s.col
 		line: s.line
 	}
 }
 // text_scan scans and returns a string token.
 [manualfree]
 fn (mut s Scanner) text_scan() Token {
 	mut has_closed := false
 	mut chrs := []byte{}
 	for {
 		s.move(false, false)
 		if s.pos >= s.text.len {
 			break
 		}
 		ch := s.text[s.pos]
 		if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`) && ch == `"` {
 			has_closed = true
 			break
 		} else if (s.pos - 1 >= 0 && s.text[s.pos - 1] != `\\`)
 			&& ch in json2.important_escapable_chars {
 			return s.error('character must be escaped with a backslash')
 		} else if s.pos == s.text.len - 1 && ch == `\\` {
 			return s.error('invalid backslash escape')
 		} else if s.pos + 1 < s.text.len && ch == `\\` {
 			peek := s.text[s.pos + 1]
 			if peek in json2.valid_unicode_escapes {
 				chrs << json2.unicode_transform_escapes[int(peek)]
 				s.move(false, false)
 				continue
 			} else if peek == `u` {
 				if s.pos + 5 < s.text.len {
 					s.move(false, false)
 					mut codepoint := []byte{}
 					codepoint_start := s.pos
 					for s.pos < s.text.len && s.pos < codepoint_start + 4 {
 						s.move(false, false)
 						if s.text[s.pos] == `"` {
 							break
 						} else if !s.text[s.pos].is_hex_digit() {
 							return s.error('`${s.text[s.pos].ascii_str()}` is not a hex digit')
 						}
 						codepoint << s.text[s.pos]
 					}
 					if codepoint.len != 4 {
 						return s.error('unicode escape must have 4 hex digits')
 					}
 					chrs << byte(strconv.parse_uint(codepoint.bytestr(), 16, 32))
 					unsafe { codepoint.free() }
 					continue
 				} else {
 					return s.error('incomplete unicode escape')
 				}
 			} else if peek == `U` {
 				return s.error('unicode endpoints must be in lowercase `u`')
 			} else if peek == byte(229) {
 				return s.error('unicode endpoint not allowed')
 			} else {
 				return s.error('invalid backslash escape')
 			}
 		}
 		chrs << ch
 	}
 	tok := s.tokenize(chrs, .str_)
 	s.move_pos()
 	if !has_closed {
 		return s.error('missing double quotes in string closing')
 	}
 	return tok
 }
 // num_scan scans and returns an int/float token.
 fn (mut s Scanner) num_scan() Token {
 	// analyze json number structure
 	// -[digit][?[dot][digit]][?[E/e][?-/+][digit]]
 	mut is_fl := false
 	mut dot_index := -1
 	mut digits := []byte{}
 	if s.text[s.pos] == `-` {
 		digits << `-`
 		if !s.text[s.pos + 1].is_digit() {
 			return s.invalid_token()
 		}
 		s.move_pos_with_newlines()
 	}
 	if s.text[s.pos] == `0` && (s.pos + 1 < s.text.len && s.text[s.pos + 1].is_digit()) {
 		return s.error('leading zeroes in a number are not allowed')
 	}
 	for s.pos < s.text.len && (s.text[s.pos].is_digit() || (!is_fl && s.text[s.pos] == `.`)) {
 		digits << s.text[s.pos]
 		if s.text[s.pos] == `.` {
 			is_fl = true
 			dot_index = digits.len - 1
 		}
 		s.move_pos_with_newlines()
 	}
 	if dot_index + 1 < s.text.len && digits[dot_index + 1..].len == 0 {
 		return s.error('invalid float')
 	}
 	if s.pos < s.text.len && (s.text[s.pos] == `e` || s.text[s.pos] == `E`) {
 		digits << s.text[s.pos]
 		s.move_pos_with_newlines()
 		if s.pos < s.text.len && s.text[s.pos] in json2.exp_signs {
 			digits << s.text[s.pos]
 			s.move_pos_with_newlines()
 		}
 		mut exp_digits_count := 0
 		for s.pos < s.text.len && s.text[s.pos].is_digit() {
 			digits << s.text[s.pos]
 			exp_digits_count++
 			s.move_pos_with_newlines()
 		}
 		if exp_digits_count == 0 {
 			return s.error('invalid exponent')
 		}
 	}
 	kind := if is_fl { TokenKind.float } else { TokenKind.int_ }
 	return s.tokenize(digits, kind)
 }
 // invalid_token returns an error token with the invalid token message.
 fn (s Scanner) invalid_token() Token {
 	return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
 }
 // scan returns a token based on the scanner's current position.
 [manualfree]
 fn (mut s Scanner) scan() Token {
 	for s.pos < s.text.len && s.text[s.pos] == ` ` {
 		s.pos++
 	}
 	if s.pos >= s.text.len {
 		return s.tokenize([]byte{}, .eof)
 	} else if s.pos + 3 < s.text.len && (s.text[s.pos] == `t` || s.text[s.pos] == `n`) {
 		ident := s.text[s.pos..s.pos + 4].bytestr()
 		if ident == 'true' || ident == 'null' {
 			mut kind := TokenKind.null
 			if ident == 'true' {
 				kind = .bool_
 			}
 			unsafe { ident.free() }
 			val := s.text[s.pos..s.pos + 4]
 			tok := s.tokenize(val, kind)
 			s.move_pos()
 			s.move_pos()
 			s.move_pos()
 			s.move_pos()
 			return tok
 		}
 		unsafe { ident.free() }
 		return s.invalid_token()
 	} else if s.pos + 4 < s.text.len && s.text[s.pos] == `f` {
 		ident := s.text[s.pos..s.pos + 5].bytestr()
 		if ident == 'false' {
 			unsafe { ident.free() }
 			val := s.text[s.pos..s.pos + 5]
 			tok := s.tokenize(val, .bool_)
 			s.move_pos()
 			s.move_pos()
 			s.move_pos()
 			s.move_pos()
 			s.move_pos()
 			return tok
 		}
 		unsafe { ident.free() }
 		return s.invalid_token()
 	} else if s.text[s.pos] in json2.char_list {
 		chr := s.text[s.pos]
 		tok := s.tokenize([]byte{}, TokenKind(int(chr)))
 		s.move_pos()
 		return tok
 	} else if s.text[s.pos] == `"` {
 		return s.text_scan()
 	} else if s.text[s.pos].is_digit() || s.text[s.pos] == `-` {
 		return s.num_scan()
 	} else {
 		return s.invalid_token()
 	}
 }
--- a/vlib/x/json2/scanner_test.v
+++ b/vlib/x/json2/scanner_test.v
@ -0,0 +1,320 @@
 module json2
 fn test_str() {
 	mut sc := Scanner{
 		text: '"test"'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .str_
 	assert tok.lit.len == 4
 	assert tok.lit.bytestr() == 'test'
 }
 fn test_str_valid_unicode_escape() {
 	mut sc := Scanner{
 		text: r'"\u0048"'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .str_
 	assert tok.lit.len == 1
 	assert tok.lit.bytestr() == 'H'
 }
 fn test_str_invalid_escape() {
 	mut sc := Scanner{
 		text: r'"\z"'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid backslash escape'
 }
 fn test_str_invalid_must_be_escape() {
 	for char in important_escapable_chars {
 		mut sc := Scanner{
 			text: [byte(`"`), `t`, char, `"`]
 		}
 		tok := sc.scan()
 		assert tok.kind == .error
 		assert tok.lit.bytestr() == 'character must be escaped with a backslash'
 	}
 }
 fn test_str_invalid_unicode_escape() {
 	mut sc := Scanner{
 		text: r'"\u010G"'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == '`G` is not a hex digit'
 }
 fn test_str_invalid_unicode_escape_len() {
 	mut sc := Scanner{
 		text: r'"\u001"'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'unicode escape must have 4 hex digits'
 }
 fn test_str_invalid_uppercase_u() {
 	mut sc := Scanner{
 		text: r'"\U0000"'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'unicode endpoints must be in lowercase `u`'
 }
 fn test_str_missing_closing_bracket() {
 	mut sc := Scanner{
 		text: '"incomplete'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'missing double quotes in string closing'
 }
 fn test_int() {
 	mut sc := Scanner{
 		text: '10'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .int_
 	assert tok.lit.len == 2
 	assert tok.lit.bytestr() == '10'
 }
 fn test_int_negative() {
 	mut sc := Scanner{
 		text: '-10'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .int_
 	assert tok.lit.len == 3
 	assert tok.lit.bytestr() == '-10'
 }
 fn test_float() {
 	mut sc := Scanner{
 		text: '123.400'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .float
 	assert tok.lit.len == 7
 	assert tok.lit.bytestr() == '123.400'
 }
 fn test_float_negative() {
 	mut sc := Scanner{
 		text: '-123.400'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .float
 	assert tok.lit.len == 8
 	assert tok.lit.bytestr() == '-123.400'
 }
 fn test_int_exp() {
 	mut sc := Scanner{
 		text: '1E22'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .int_
 	assert tok.lit.len == 4
 	assert tok.lit.bytestr() == '1E22'
 }
 fn test_int_exp_negative() {
 	mut sc := Scanner{
 		text: '1E-2'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .int_
 	assert tok.lit.len == 4
 	assert tok.lit.bytestr() == '1E-2'
 }
 fn test_int_exp_positive() {
 	mut sc := Scanner{
 		text: '1E+2'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .int_
 	assert tok.lit.len == 4
 	assert tok.lit.bytestr() == '1E+2'
 }
 fn test_float_exp() {
 	mut sc := Scanner{
 		text: '123.456e78'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .float
 	assert tok.lit.len == 10
 	assert tok.lit.bytestr() == '123.456e78'
 }
 fn test_float_exp_negative() {
 	mut sc := Scanner{
 		text: '20.56e-5'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .float
 	assert tok.lit.len == 8
 	assert tok.lit.bytestr() == '20.56e-5'
 }
 fn test_float_exp_positive() {
 	mut sc := Scanner{
 		text: '20.56e+5'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .float
 	assert tok.lit.len == 8
 	assert tok.lit.bytestr() == '20.56e+5'
 }
 fn test_number_with_space() {
 	mut sc := Scanner{
 		text: ' 4'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .int_
 	assert tok.lit.len == 1
 	assert tok.lit.bytestr() == '4'
 }
 fn test_number_invalid_leading_zero() {
 	mut sc := Scanner{
 		text: '0010'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'leading zeroes in a number are not allowed'
 }
 fn test_number_invalid_leading_zero_negative() {
 	mut sc := Scanner{
 		text: '-0010'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'leading zeroes in a number are not allowed'
 }
 fn test_number_invalid_start_char() {
 	mut sc := Scanner{
 		text: '+1'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid token `+`'
 }
 fn test_number_invalid_char() {
 	mut sc := Scanner{
 		text: '122x'.bytes()
 	}
 	sc.scan()
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid token `x`'
 }
 fn test_number_invalid_char_float() {
 	mut sc := Scanner{
 		text: '122x.1'.bytes()
 	}
 	sc.scan()
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid token `x`'
 }
 fn test_number_invalid_multiple_dot() {
 	mut sc := Scanner{
 		text: '122.108.10'.bytes()
 	}
 	sc.scan()
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid token `.`'
 }
 fn test_number_invalid_exp() {
 	mut sc := Scanner{
 		text: '0.3e'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid exponent'
 }
 fn test_number_invalid_exp_with_sign() {
 	mut sc := Scanner{
 		text: '0.3e+'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid exponent'
 }
 fn test_number_invalid_zero_exp() {
 	mut sc := Scanner{
 		text: '0e'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid exponent'
 }
 fn test_number_invalid_dot_exp() {
 	mut sc := Scanner{
 		text: '0.e'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid float'
 }
 fn test_number_invalid_double_exp() {
 	mut sc := Scanner{
 		text: '2eE'.bytes()
 	}
 	sc.scan()
 	tok := sc.scan()
 	assert tok.kind == .error
 	assert tok.lit.bytestr() == 'invalid token `E`'
 }
 fn test_null() {
 	mut sc := Scanner{
 		text: 'null'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .null
 	assert tok.lit.len == 4
 	assert tok.lit.bytestr() == 'null'
 }
 fn test_bool_true() {
 	mut sc := Scanner{
 		text: 'true'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .bool_
 	assert tok.lit.len == 4
 	assert tok.lit.bytestr() == 'true'
 }
 fn test_bool_false() {
 	mut sc := Scanner{
 		text: 'false'.bytes()
 	}
 	tok := sc.scan()
 	assert tok.kind == .bool_
 	assert tok.lit.len == 5
 	assert tok.lit.bytestr() == 'false'
 }