toml: fix bug in unicode decoding (#12643)

pull/12664/head
Larpon 2021-12-02 10:19:12 +01:00 committed by GitHub
parent 799d7b843c
commit ebfacca252
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 12 deletions

View File

@ -159,16 +159,20 @@ pub fn decode_quoted_escapes(mut q ast.Quoted) ? {
&& byte(s.peek(3)).is_hex_digit() && byte(s.peek(4)).is_hex_digit()
if is_valid_short {
// is_valid_long := byte(s.peek(5)).is_hex_digit() && byte(s.peek(6)).is_hex_digit() && byte(s.peek(7)).is_hex_digit() && byte(s.peek(8)).is_hex_digit()
// Long type Unicode (\UXXXXXXXX) is a maximum of 10 chars: '\' + 'U' + 8 hex characters
is_valid_long := byte(s.peek(5)).is_hex_digit()
&& byte(s.peek(6)).is_hex_digit() && byte(s.peek(7)).is_hex_digit()
&& byte(s.peek(8)).is_hex_digit()
// If it's a long type Unicode (\UXXXXXXXX) with a maximum of 10 chars: '\' + 'U' + 8 hex characters
// we pass in 10 characters from the `u`/`U` which is the longest possible sequence
// of 9 chars plus one extra.
// Else it's a short sequence (\uXXXX) with a maximum of 6 chars: '\' + 'U' + 4 hex characters.
mut decoded := ''
mut sequence_length := 0
mut unicode_val := 0
if s.remaining() >= 10 {
mut slen := if is_valid_long { 10 } else { 6 }
if slen <= s.remaining() {
pos := s.state().pos
sequence := s.text[pos..pos + 11]
sequence := s.text[pos..pos + slen + 1]
decoded, unicode_val, sequence_length = decode_unicode_escape(sequence) or {
decoded_s += escape
@ -184,11 +188,9 @@ pub fn decode_quoted_escapes(mut q ast.Quoted) ? {
decoded_s += escape
continue
}
if unicode_val in [0x7F, 0x1F, 0x5C, 0x75] {
sequence_length -= 2
}
decoded_s += decoded
s.skip_n(s.text[pos..pos + 2 + sequence_length + 1].len)
replacement := s.text[pos..pos + sequence_length + 1]
s.skip_n(replacement.len)
continue
} else {
pos := s.state().pos

View File

@ -20,7 +20,6 @@ const (
]
valid_value_exceptions = [
'valid/unicode-escape.toml',
// These have correct values, and should've passed, but the format of arrays is *mixed* in the JSON ??
'valid/example2.toml',
]

View File

@ -19,9 +19,7 @@ const (
valid_exceptions = []string{}
invalid_exceptions = []string{}
valid_value_exceptions = [
'values/spec-string-basic.toml',
]
valid_value_exceptions = []string{}
yaml_value_exceptions = [
'values/spec-float-5.toml', // YAML: "1e6", V: 1000000