toml: fix bug in unicode decoding (#12643)

pull/12664/head
Larpon 2021-12-02 10:19:12 +01:00 committed by GitHub
parent 799d7b843c
commit ebfacca252
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 12 deletions

View File

@ -159,16 +159,20 @@ pub fn decode_quoted_escapes(mut q ast.Quoted) ? {
&& byte(s.peek(3)).is_hex_digit() && byte(s.peek(4)).is_hex_digit() && byte(s.peek(3)).is_hex_digit() && byte(s.peek(4)).is_hex_digit()
if is_valid_short { if is_valid_short {
// is_valid_long := byte(s.peek(5)).is_hex_digit() && byte(s.peek(6)).is_hex_digit() && byte(s.peek(7)).is_hex_digit() && byte(s.peek(8)).is_hex_digit() is_valid_long := byte(s.peek(5)).is_hex_digit()
// Long type Unicode (\UXXXXXXXX) is a maximum of 10 chars: '\' + 'U' + 8 hex characters && byte(s.peek(6)).is_hex_digit() && byte(s.peek(7)).is_hex_digit()
&& byte(s.peek(8)).is_hex_digit()
// If it's a long type Unicode (\UXXXXXXXX) with a maximum of 10 chars: '\' + 'U' + 8 hex characters
// we pass in 10 characters from the `u`/`U` which is the longest possible sequence // we pass in 10 characters from the `u`/`U` which is the longest possible sequence
// of 9 chars plus one extra. // of 9 chars plus one extra.
// Else it's a short sequence (\uXXXX) with a maximum of 6 chars: '\' + 'U' + 4 hex characters.
mut decoded := '' mut decoded := ''
mut sequence_length := 0 mut sequence_length := 0
mut unicode_val := 0 mut unicode_val := 0
if s.remaining() >= 10 { mut slen := if is_valid_long { 10 } else { 6 }
if slen <= s.remaining() {
pos := s.state().pos pos := s.state().pos
sequence := s.text[pos..pos + 11] sequence := s.text[pos..pos + slen + 1]
decoded, unicode_val, sequence_length = decode_unicode_escape(sequence) or { decoded, unicode_val, sequence_length = decode_unicode_escape(sequence) or {
decoded_s += escape decoded_s += escape
@ -184,11 +188,9 @@ pub fn decode_quoted_escapes(mut q ast.Quoted) ? {
decoded_s += escape decoded_s += escape
continue continue
} }
if unicode_val in [0x7F, 0x1F, 0x5C, 0x75] {
sequence_length -= 2
}
decoded_s += decoded decoded_s += decoded
s.skip_n(s.text[pos..pos + 2 + sequence_length + 1].len) replacement := s.text[pos..pos + sequence_length + 1]
s.skip_n(replacement.len)
continue continue
} else { } else {
pos := s.state().pos pos := s.state().pos

View File

@ -20,7 +20,6 @@ const (
] ]
valid_value_exceptions = [ valid_value_exceptions = [
'valid/unicode-escape.toml',
// These have correct values, and should've passed, but the format of arrays is *mixed* in the JSON ?? // These have correct values, and should've passed, but the format of arrays is *mixed* in the JSON ??
'valid/example2.toml', 'valid/example2.toml',
] ]

View File

@ -19,9 +19,7 @@ const (
valid_exceptions = []string{} valid_exceptions = []string{}
invalid_exceptions = []string{} invalid_exceptions = []string{}
valid_value_exceptions = [ valid_value_exceptions = []string{}
'values/spec-string-basic.toml',
]
yaml_value_exceptions = [ yaml_value_exceptions = [
'values/spec-float-5.toml', // YAML: "1e6", V: 1000000 'values/spec-float-5.toml', // YAML: "1e6", V: 1000000