From ebfacca252b620951266032863b27dd73f889061 Mon Sep 17 00:00:00 2001 From: Larpon Date: Thu, 2 Dec 2021 10:19:12 +0100 Subject: [PATCH] toml: fix bug in unicode decoding (#12643) --- vlib/toml/decoder/decoder.v | 18 ++++++++++-------- .../tests/alexcrichton.toml-rs-tests_test.v | 1 - vlib/toml/tests/iarna.toml-spec-tests_test.v | 4 +--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/vlib/toml/decoder/decoder.v b/vlib/toml/decoder/decoder.v index cd5fe942da..ede7920e5d 100644 --- a/vlib/toml/decoder/decoder.v +++ b/vlib/toml/decoder/decoder.v @@ -159,16 +159,20 @@ pub fn decode_quoted_escapes(mut q ast.Quoted) ? { && byte(s.peek(3)).is_hex_digit() && byte(s.peek(4)).is_hex_digit() if is_valid_short { - // is_valid_long := byte(s.peek(5)).is_hex_digit() && byte(s.peek(6)).is_hex_digit() && byte(s.peek(7)).is_hex_digit() && byte(s.peek(8)).is_hex_digit() - // Long type Unicode (\UXXXXXXXX) is a maximum of 10 chars: '\' + 'U' + 8 hex characters + is_valid_long := byte(s.peek(5)).is_hex_digit() + && byte(s.peek(6)).is_hex_digit() && byte(s.peek(7)).is_hex_digit() + && byte(s.peek(8)).is_hex_digit() + // If it's a long type Unicode (\UXXXXXXXX) with a maximum of 10 chars: '\' + 'U' + 8 hex characters // we pass in 10 characters from the `u`/`U` which is the longest possible sequence // of 9 chars plus one extra. + // Else it's a short sequence (\uXXXX) with a maximum of 6 chars: '\' + 'U' + 4 hex characters. mut decoded := '' mut sequence_length := 0 mut unicode_val := 0 - if s.remaining() >= 10 { + mut slen := if is_valid_long { 10 } else { 6 } + if slen <= s.remaining() { pos := s.state().pos - sequence := s.text[pos..pos + 11] + sequence := s.text[pos..pos + slen + 1] decoded, unicode_val, sequence_length = decode_unicode_escape(sequence) or { decoded_s += escape @@ -184,11 +188,9 @@ pub fn decode_quoted_escapes(mut q ast.Quoted) ? { decoded_s += escape continue } - if unicode_val in [0x7F, 0x1F, 0x5C, 0x75] { - sequence_length -= 2 - } decoded_s += decoded - s.skip_n(s.text[pos..pos + 2 + sequence_length + 1].len) + replacement := s.text[pos..pos + sequence_length + 1] + s.skip_n(replacement.len) continue } else { pos := s.state().pos diff --git a/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v b/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v index 6299be500c..2a9f075394 100644 --- a/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v +++ b/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v @@ -20,7 +20,6 @@ const ( ] valid_value_exceptions = [ - 'valid/unicode-escape.toml', // These have correct values, and should've passed, but the format of arrays is *mixed* in the JSON ?? 'valid/example2.toml', ] diff --git a/vlib/toml/tests/iarna.toml-spec-tests_test.v b/vlib/toml/tests/iarna.toml-spec-tests_test.v index a399c8da19..7c6c3a2a38 100644 --- a/vlib/toml/tests/iarna.toml-spec-tests_test.v +++ b/vlib/toml/tests/iarna.toml-spec-tests_test.v @@ -19,9 +19,7 @@ const ( valid_exceptions = []string{} invalid_exceptions = []string{} - valid_value_exceptions = [ - 'values/spec-string-basic.toml', - ] + valid_value_exceptions = []string{} yaml_value_exceptions = [ 'values/spec-float-5.toml', // YAML: "1e6", V: 1000000