x.json2: skip whitespace before scanning (#9508)

pull/9531/head
Ned Palacios 2021-03-30 15:40:20 +08:00 committed by GitHub
parent c4e389be41
commit c5302bfcf5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 67 additions and 26 deletions

View File

@ -4,7 +4,8 @@
module json2
// `Any` is a sum type that lists the possible types to be decoded and used.
pub type Any = Null | []Any | bool | f32 | f64 | i64 | u64 | int | map[string]Any | string
pub type Any = Null | []Any | bool | f32 | f64 | i64 | int | map[string]Any | string |
u64
// `Null` struct is a simple representation of the `null` value in JSON.
pub struct Null {

View File

@ -67,3 +67,14 @@ fn test_raw_decode_string_with_dollarsign() {
}
assert str.str() == r'Hello $world'
}
fn test_raw_decode_map_with_whitespaces() {
raw_mp := json2.raw_decode(' \n\t{"name":"Bob","age":20}\n\t') or {
eprintln(err.msg)
assert false
json2.Any{}
}
mp := raw_mp.as_map()
assert mp['name'].str() == 'Bob'
assert mp['age'].int() == 20
}

View File

@ -124,7 +124,7 @@ pub mut:
last_name string [json: lastName]
is_registered bool [json: IsRegistered]
typ int [json: 'type']
pets string [raw; json: 'pet_animals']
pets string [json: 'pet_animals'; raw]
}
fn (mut u User) from_json(an json2.Any) {

View File

@ -64,16 +64,16 @@ const (
)
// move_pos proceeds to the next position.
fn (mut s Scanner) move_pos() {
s.move(true, true)
fn (mut s Scanner) move() {
s.move_pos(true, true)
}
// move_pos_with_newlines is the same as move_pos but only enables newline checking.
fn (mut s Scanner) move_pos_with_newlines() {
s.move(false, true)
s.move_pos(false, true)
}
fn (mut s Scanner) move(include_space bool, include_newlines bool) {
fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) {
s.pos++
if s.pos < s.text.len {
if include_newlines && s.text[s.pos] in json2.newlines {
@ -83,13 +83,13 @@ fn (mut s Scanner) move(include_space bool, include_newlines bool) {
s.pos++
}
for s.pos < s.text.len && s.text[s.pos] in json2.newlines {
s.move_pos()
s.move()
}
} else if include_space && s.text[s.pos] == ` ` {
s.pos++
s.col++
for s.pos < s.text.len && s.text[s.pos] == ` ` {
s.move_pos()
s.move()
}
}
} else {
@ -118,7 +118,8 @@ fn (mut s Scanner) text_scan() Token {
mut has_closed := false
mut chrs := []byte{}
for {
s.move(false, false)
s.pos++
s.col++
if s.pos >= s.text.len {
break
}
@ -135,15 +136,18 @@ fn (mut s Scanner) text_scan() Token {
peek := s.text[s.pos + 1]
if peek in json2.valid_unicode_escapes {
chrs << json2.unicode_transform_escapes[int(peek)]
s.move(false, false)
s.pos++
s.col++
continue
} else if peek == `u` {
if s.pos + 5 < s.text.len {
s.move(false, false)
s.pos++
s.col++
mut codepoint := []byte{}
codepoint_start := s.pos
for s.pos < s.text.len && s.pos < codepoint_start + 4 {
s.move(false, false)
s.pos++
s.col++
if s.text[s.pos] == `"` {
break
} else if !s.text[s.pos].is_hex_digit() {
@ -178,7 +182,7 @@ fn (mut s Scanner) text_scan() Token {
chrs << ch
}
tok := s.tokenize(chrs, .str_)
s.move_pos()
s.move()
if !has_closed {
return s.error('missing double quotes in string closing')
}
@ -236,14 +240,18 @@ fn (mut s Scanner) num_scan() Token {
// invalid_token returns an error token with the invalid token message.
fn (s Scanner) invalid_token() Token {
return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 {
return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
} else {
return s.error('invalid token ${s.text[s.pos].str_escaped()}')
}
}
// scan returns a token based on the scanner's current position.
[manualfree]
fn (mut s Scanner) scan() Token {
for s.pos < s.text.len && s.text[s.pos] == ` ` {
s.pos++
if s.pos < s.text.len && (s.text[s.pos] == ` ` || s.text[s.pos] in json2.newlines) {
s.move()
}
if s.pos >= s.text.len {
return s.tokenize([]byte{}, .eof)
@ -257,10 +265,10 @@ fn (mut s Scanner) scan() Token {
unsafe { ident.free() }
val := s.text[s.pos..s.pos + 4]
tok := s.tokenize(val, kind)
s.move_pos()
s.move_pos()
s.move_pos()
s.move_pos()
s.move() // n / t
s.move() // u / r
s.move() // l / u
s.move() // l / e
return tok
}
unsafe { ident.free() }
@ -271,11 +279,11 @@ fn (mut s Scanner) scan() Token {
unsafe { ident.free() }
val := s.text[s.pos..s.pos + 5]
tok := s.tokenize(val, .bool_)
s.move_pos()
s.move_pos()
s.move_pos()
s.move_pos()
s.move_pos()
s.move() // f
s.move() // a
s.move() // l
s.move() // s
s.move() // e
return tok
}
unsafe { ident.free() }
@ -283,7 +291,7 @@ fn (mut s Scanner) scan() Token {
} else if s.text[s.pos] in json2.char_list {
chr := s.text[s.pos]
tok := s.tokenize([]byte{}, TokenKind(int(chr)))
s.move_pos()
s.move()
return tok
} else if s.text[s.pos] == `"` {
return s.text_scan()

View File

@ -328,3 +328,24 @@ fn test_bool_false() {
assert tok.lit.len == 5
assert tok.lit.bytestr() == 'false'
}
fn test_json_with_whitespace_start() {
mut sc := Scanner{
text: ' \n \n\t {'.bytes()
}
tok := sc.scan()
eprintln(tok)
assert tok.kind == .lcbr
assert tok.lit.len == 0
}
fn test_json_with_whitespace_end() {
mut sc := Scanner{
text: '} \n\t'.bytes()
}
tok := sc.scan()
assert tok.kind == .rcbr
tok2 := sc.scan()
eprintln(tok2)
assert tok2.kind == .eof
}