x.json2: skip whitespace before scanning (#9508)
parent
c4e389be41
commit
c5302bfcf5
|
@ -4,7 +4,8 @@
|
|||
module json2
|
||||
|
||||
// `Any` is a sum type that lists the possible types to be decoded and used.
|
||||
pub type Any = Null | []Any | bool | f32 | f64 | i64 | u64 | int | map[string]Any | string
|
||||
pub type Any = Null | []Any | bool | f32 | f64 | i64 | int | map[string]Any | string |
|
||||
u64
|
||||
|
||||
// `Null` struct is a simple representation of the `null` value in JSON.
|
||||
pub struct Null {
|
||||
|
|
|
@ -67,3 +67,14 @@ fn test_raw_decode_string_with_dollarsign() {
|
|||
}
|
||||
assert str.str() == r'Hello $world'
|
||||
}
|
||||
|
||||
fn test_raw_decode_map_with_whitespaces() {
|
||||
raw_mp := json2.raw_decode(' \n\t{"name":"Bob","age":20}\n\t') or {
|
||||
eprintln(err.msg)
|
||||
assert false
|
||||
json2.Any{}
|
||||
}
|
||||
mp := raw_mp.as_map()
|
||||
assert mp['name'].str() == 'Bob'
|
||||
assert mp['age'].int() == 20
|
||||
}
|
||||
|
|
|
@ -124,7 +124,7 @@ pub mut:
|
|||
last_name string [json: lastName]
|
||||
is_registered bool [json: IsRegistered]
|
||||
typ int [json: 'type']
|
||||
pets string [raw; json: 'pet_animals']
|
||||
pets string [json: 'pet_animals'; raw]
|
||||
}
|
||||
|
||||
fn (mut u User) from_json(an json2.Any) {
|
||||
|
|
|
@ -64,16 +64,16 @@ const (
|
|||
)
|
||||
|
||||
// move_pos proceeds to the next position.
|
||||
fn (mut s Scanner) move_pos() {
|
||||
s.move(true, true)
|
||||
fn (mut s Scanner) move() {
|
||||
s.move_pos(true, true)
|
||||
}
|
||||
|
||||
// move_pos_with_newlines is the same as move_pos but only enables newline checking.
|
||||
fn (mut s Scanner) move_pos_with_newlines() {
|
||||
s.move(false, true)
|
||||
s.move_pos(false, true)
|
||||
}
|
||||
|
||||
fn (mut s Scanner) move(include_space bool, include_newlines bool) {
|
||||
fn (mut s Scanner) move_pos(include_space bool, include_newlines bool) {
|
||||
s.pos++
|
||||
if s.pos < s.text.len {
|
||||
if include_newlines && s.text[s.pos] in json2.newlines {
|
||||
|
@ -83,13 +83,13 @@ fn (mut s Scanner) move(include_space bool, include_newlines bool) {
|
|||
s.pos++
|
||||
}
|
||||
for s.pos < s.text.len && s.text[s.pos] in json2.newlines {
|
||||
s.move_pos()
|
||||
s.move()
|
||||
}
|
||||
} else if include_space && s.text[s.pos] == ` ` {
|
||||
s.pos++
|
||||
s.col++
|
||||
for s.pos < s.text.len && s.text[s.pos] == ` ` {
|
||||
s.move_pos()
|
||||
s.move()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -118,7 +118,8 @@ fn (mut s Scanner) text_scan() Token {
|
|||
mut has_closed := false
|
||||
mut chrs := []byte{}
|
||||
for {
|
||||
s.move(false, false)
|
||||
s.pos++
|
||||
s.col++
|
||||
if s.pos >= s.text.len {
|
||||
break
|
||||
}
|
||||
|
@ -135,15 +136,18 @@ fn (mut s Scanner) text_scan() Token {
|
|||
peek := s.text[s.pos + 1]
|
||||
if peek in json2.valid_unicode_escapes {
|
||||
chrs << json2.unicode_transform_escapes[int(peek)]
|
||||
s.move(false, false)
|
||||
s.pos++
|
||||
s.col++
|
||||
continue
|
||||
} else if peek == `u` {
|
||||
if s.pos + 5 < s.text.len {
|
||||
s.move(false, false)
|
||||
s.pos++
|
||||
s.col++
|
||||
mut codepoint := []byte{}
|
||||
codepoint_start := s.pos
|
||||
for s.pos < s.text.len && s.pos < codepoint_start + 4 {
|
||||
s.move(false, false)
|
||||
s.pos++
|
||||
s.col++
|
||||
if s.text[s.pos] == `"` {
|
||||
break
|
||||
} else if !s.text[s.pos].is_hex_digit() {
|
||||
|
@ -178,7 +182,7 @@ fn (mut s Scanner) text_scan() Token {
|
|||
chrs << ch
|
||||
}
|
||||
tok := s.tokenize(chrs, .str_)
|
||||
s.move_pos()
|
||||
s.move()
|
||||
if !has_closed {
|
||||
return s.error('missing double quotes in string closing')
|
||||
}
|
||||
|
@ -236,14 +240,18 @@ fn (mut s Scanner) num_scan() Token {
|
|||
|
||||
// invalid_token returns an error token with the invalid token message.
|
||||
fn (s Scanner) invalid_token() Token {
|
||||
return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
|
||||
if s.text[s.pos] >= 32 && s.text[s.pos] <= 126 {
|
||||
return s.error('invalid token `${s.text[s.pos].ascii_str()}`')
|
||||
} else {
|
||||
return s.error('invalid token ${s.text[s.pos].str_escaped()}')
|
||||
}
|
||||
}
|
||||
|
||||
// scan returns a token based on the scanner's current position.
|
||||
[manualfree]
|
||||
fn (mut s Scanner) scan() Token {
|
||||
for s.pos < s.text.len && s.text[s.pos] == ` ` {
|
||||
s.pos++
|
||||
if s.pos < s.text.len && (s.text[s.pos] == ` ` || s.text[s.pos] in json2.newlines) {
|
||||
s.move()
|
||||
}
|
||||
if s.pos >= s.text.len {
|
||||
return s.tokenize([]byte{}, .eof)
|
||||
|
@ -257,10 +265,10 @@ fn (mut s Scanner) scan() Token {
|
|||
unsafe { ident.free() }
|
||||
val := s.text[s.pos..s.pos + 4]
|
||||
tok := s.tokenize(val, kind)
|
||||
s.move_pos()
|
||||
s.move_pos()
|
||||
s.move_pos()
|
||||
s.move_pos()
|
||||
s.move() // n / t
|
||||
s.move() // u / r
|
||||
s.move() // l / u
|
||||
s.move() // l / e
|
||||
return tok
|
||||
}
|
||||
unsafe { ident.free() }
|
||||
|
@ -271,11 +279,11 @@ fn (mut s Scanner) scan() Token {
|
|||
unsafe { ident.free() }
|
||||
val := s.text[s.pos..s.pos + 5]
|
||||
tok := s.tokenize(val, .bool_)
|
||||
s.move_pos()
|
||||
s.move_pos()
|
||||
s.move_pos()
|
||||
s.move_pos()
|
||||
s.move_pos()
|
||||
s.move() // f
|
||||
s.move() // a
|
||||
s.move() // l
|
||||
s.move() // s
|
||||
s.move() // e
|
||||
return tok
|
||||
}
|
||||
unsafe { ident.free() }
|
||||
|
@ -283,7 +291,7 @@ fn (mut s Scanner) scan() Token {
|
|||
} else if s.text[s.pos] in json2.char_list {
|
||||
chr := s.text[s.pos]
|
||||
tok := s.tokenize([]byte{}, TokenKind(int(chr)))
|
||||
s.move_pos()
|
||||
s.move()
|
||||
return tok
|
||||
} else if s.text[s.pos] == `"` {
|
||||
return s.text_scan()
|
||||
|
|
|
@ -328,3 +328,24 @@ fn test_bool_false() {
|
|||
assert tok.lit.len == 5
|
||||
assert tok.lit.bytestr() == 'false'
|
||||
}
|
||||
|
||||
fn test_json_with_whitespace_start() {
|
||||
mut sc := Scanner{
|
||||
text: ' \n \n\t {'.bytes()
|
||||
}
|
||||
tok := sc.scan()
|
||||
eprintln(tok)
|
||||
assert tok.kind == .lcbr
|
||||
assert tok.lit.len == 0
|
||||
}
|
||||
|
||||
fn test_json_with_whitespace_end() {
|
||||
mut sc := Scanner{
|
||||
text: '} \n\t'.bytes()
|
||||
}
|
||||
tok := sc.scan()
|
||||
assert tok.kind == .rcbr
|
||||
tok2 := sc.scan()
|
||||
eprintln(tok2)
|
||||
assert tok2.kind == .eof
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue