x.json2: skip whitespace before scanning (#9508)

pull/9531/head
Ned Palacios 2021-03-30 15:40:20 +08:00 committed by GitHub
parent c4e389be41
commit c5302bfcf5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 67 additions and 26 deletions

View File

@ -4,7 +4,8 @@
module json2 module json2
// `Any` is a sum type that lists the possible types to be decoded and used. // `Any` is a sum type that lists the possible types to be decoded and used.
pub type Any = Null | []Any | bool | f32 | f64 | i64 | u64 | int | map[string]Any | string pub type Any = Null | []Any | bool | f32 | f64 | i64 | int | map[string]Any | string |
u64
// `Null` struct is a simple representation of the `null` value in JSON. // `Null` struct is a simple representation of the `null` value in JSON.
pub struct Null { pub struct Null {

View File

@ -67,3 +67,14 @@ fn test_raw_decode_string_with_dollarsign() {
} }
assert str.str() == r'Hello $world' assert str.str() == r'Hello $world'
} }
fn test_raw_decode_map_with_whitespaces() {
raw_mp := json2.raw_decode(' \n\t{"name":"Bob","age":20}\n\t') or {
eprintln(err.msg)
assert false
json2.Any{}
}
mp := raw_mp.as_map()
assert mp['name'].str() == 'Bob'
assert mp['age'].int() == 20
}

View File

@ -124,7 +124,7 @@ pub mut:
last_name string [json: lastName] last_name string [json: lastName]
is_registered bool [json: IsRegistered] is_registered bool [json: IsRegistered]
typ int [json: 'type'] typ int [json: 'type']
pets string [raw; json: 'pet_animals'] pets string [json: 'pet_animals'; raw]
} }
fn (mut u User) from_json(an json2.Any) { fn (mut u User) from_json(an json2.Any) {

View File

@ -64,16 +64,16 @@ const (
) )
// move_pos proceeds to the next position. // move_pos proceeds to the next position.
fn (mut s Scanner) move_pos() { fn (mut s Scanner) move() {
s.move(true, true) s.move_pos(true, true)
} }
// move_pos_with_newlines is the same as move_pos but only enables newline checking. // move_pos_with_newlines is the same as move_pos but only enables newline checking.
fn (mut s Scanner) move_pos_with_newlines() { fn (mut s Scanner) move_pos_with_newlines() {
s.move(false, true) s.move_pos(false, true)
} }
fn (mut s Scanner) move(include_space bool, include_newlines bool) { fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) {
s.pos++ s.pos++
if s.pos < s.text.len { if s.pos < s.text.len {
if include_newlines && s.text[s.pos] in json2.newlines { if include_newlines && s.text[s.pos] in json2.newlines {
@ -83,13 +83,13 @@ fn (mut s Scanner) move(include_space bool, include_newlines bool) {
s.pos++ s.pos++
} }
for s.pos < s.text.len && s.text[s.pos] in json2.newlines { for s.pos < s.text.len && s.text[s.pos] in json2.newlines {
s.move_pos() s.move()
} }
} else if include_space && s.text[s.pos] == ` ` { } else if include_space && s.text[s.pos] == ` ` {
s.pos++ s.pos++
s.col++ s.col++
for s.pos < s.text.len && s.text[s.pos] == ` ` { for s.pos < s.text.len && s.text[s.pos] == ` ` {
s.move_pos() s.move()
} }
} }
} else { } else {
@ -118,7 +118,8 @@ fn (mut s Scanner) text_scan() Token {
mut has_closed := false mut has_closed := false
mut chrs := []byte{} mut chrs := []byte{}
for { for {
s.move(false, false) s.pos++
s.col++
if s.pos >= s.text.len { if s.pos >= s.text.len {
break break
} }
@ -135,15 +136,18 @@ fn (mut s Scanner) text_scan() Token {
peek := s.text[s.pos + 1] peek := s.text[s.pos + 1]
if peek in json2.valid_unicode_escapes { if peek in json2.valid_unicode_escapes {
chrs << json2.unicode_transform_escapes[int(peek)] chrs << json2.unicode_transform_escapes[int(peek)]
s.move(false, false) s.pos++
s.col++
continue continue
} else if peek == `u` { } else if peek == `u` {
if s.pos + 5 < s.text.len { if s.pos + 5 < s.text.len {
s.move(false, false) s.pos++
s.col++
mut codepoint := []byte{} mut codepoint := []byte{}
codepoint_start := s.pos codepoint_start := s.pos
for s.pos < s.text.len && s.pos < codepoint_start + 4 { for s.pos < s.text.len && s.pos < codepoint_start + 4 {
s.move(false, false) s.pos++
s.col++
if s.text[s.pos] == `"` { if s.text[s.pos] == `"` {
break break
} else if !s.text[s.pos].is_hex_digit() { } else if !s.text[s.pos].is_hex_digit() {
@ -178,7 +182,7 @@ fn (mut s Scanner) text_scan() Token {
chrs << ch chrs << ch
} }
tok := s.tokenize(chrs, .str_) tok := s.tokenize(chrs, .str_)
s.move_pos() s.move()
if !has_closed { if !has_closed {
return s.error('missing double quotes in string closing') return s.error('missing double quotes in string closing')
} }
@ -236,14 +240,18 @@ fn (mut s Scanner) num_scan() Token {
// invalid_token returns an error token with the invalid token message. // invalid_token returns an error token with the invalid token message.
fn (s Scanner) invalid_token() Token { fn (s Scanner) invalid_token() Token {
return s.error('invalid token `${s.text[s.pos].ascii_str()}`') if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 {
return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
} else {
return s.error('invalid token ${s.text[s.pos].str_escaped()}')
}
} }
// scan returns a token based on the scanner's current position. // scan returns a token based on the scanner's current position.
[manualfree] [manualfree]
fn (mut s Scanner) scan() Token { fn (mut s Scanner) scan() Token {
for s.pos < s.text.len && s.text[s.pos] == ` ` { if s.pos < s.text.len && (s.text[s.pos] == ` ` || s.text[s.pos] in json2.newlines) {
s.pos++ s.move()
} }
if s.pos >= s.text.len { if s.pos >= s.text.len {
return s.tokenize([]byte{}, .eof) return s.tokenize([]byte{}, .eof)
@ -257,10 +265,10 @@ fn (mut s Scanner) scan() Token {
unsafe { ident.free() } unsafe { ident.free() }
val := s.text[s.pos..s.pos + 4] val := s.text[s.pos..s.pos + 4]
tok := s.tokenize(val, kind) tok := s.tokenize(val, kind)
s.move_pos() s.move() // n / t
s.move_pos() s.move() // u / r
s.move_pos() s.move() // l / u
s.move_pos() s.move() // l / e
return tok return tok
} }
unsafe { ident.free() } unsafe { ident.free() }
@ -271,11 +279,11 @@ fn (mut s Scanner) scan() Token {
unsafe { ident.free() } unsafe { ident.free() }
val := s.text[s.pos..s.pos + 5] val := s.text[s.pos..s.pos + 5]
tok := s.tokenize(val, .bool_) tok := s.tokenize(val, .bool_)
s.move_pos() s.move() // f
s.move_pos() s.move() // a
s.move_pos() s.move() // l
s.move_pos() s.move() // s
s.move_pos() s.move() // e
return tok return tok
} }
unsafe { ident.free() } unsafe { ident.free() }
@ -283,7 +291,7 @@ fn (mut s Scanner) scan() Token {
} else if s.text[s.pos] in json2.char_list { } else if s.text[s.pos] in json2.char_list {
chr := s.text[s.pos] chr := s.text[s.pos]
tok := s.tokenize([]byte{}, TokenKind(int(chr))) tok := s.tokenize([]byte{}, TokenKind(int(chr)))
s.move_pos() s.move()
return tok return tok
} else if s.text[s.pos] == `"` { } else if s.text[s.pos] == `"` {
return s.text_scan() return s.text_scan()

View File

@ -328,3 +328,24 @@ fn test_bool_false() {
assert tok.lit.len == 5 assert tok.lit.len == 5
assert tok.lit.bytestr() == 'false' assert tok.lit.bytestr() == 'false'
} }
fn test_json_with_whitespace_start() {
mut sc := Scanner{
text: ' \n \n\t {'.bytes()
}
tok := sc.scan()
eprintln(tok)
assert tok.kind == .lcbr
assert tok.lit.len == 0
}
fn test_json_with_whitespace_end() {
mut sc := Scanner{
text: '} \n\t'.bytes()
}
tok := sc.scan()
assert tok.kind == .rcbr
tok2 := sc.scan()
eprintln(tok2)
assert tok2.kind == .eof
}