From 724942c4e6c4eaff2fa0b29839eb98456b8b1d02 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Sun, 5 Sep 2021 03:48:59 +0200 Subject: [PATCH] regex: bug fixes (#11394) --- vlib/regex/regex.v | 29 +++++++++++++++++++++++++++++ vlib/regex/regex_test.v | 28 ++++++++++++++++++++++++++++ vlib/regex/regex_util.v | 3 ++- 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index 9e630e1d5d..68ea823f5d 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -890,6 +890,10 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) { return -2, true, name, i } +const ( + quntifier_chars = [rune(`+`), `*`, `?`, `{`] +) + // // main compiler // @@ -1036,20 +1040,37 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) { // Quantifiers if char_len == 1 && pc > 0 { + mut char_next := rune(0) + mut char_next_len := 0 + if (char_len + i) < in_txt.len { + char_next, char_next_len = re.get_char(in_txt, i + char_len) + } mut quant_flag := true match byte(char_tmp) { `?` { // println("q: ${char_tmp:c}") + // check illegal quantifier sequences + if char_next_len == 1 && char_next in regex.quntifier_chars { + return regex.err_syntax_error, i + } re.prog[pc - 1].rep_min = 0 re.prog[pc - 1].rep_max = 1 } `+` { // println("q: ${char_tmp:c}") + // check illegal quantifier sequences + if char_next_len == 1 && char_next in regex.quntifier_chars { + return regex.err_syntax_error, i + } re.prog[pc - 1].rep_min = 1 re.prog[pc - 1].rep_max = regex.max_quantifier } `*` { // println("q: ${char_tmp:c}") + // check illegal quantifier sequences + if char_next_len == 1 && char_next in regex.quntifier_chars { + return regex.err_syntax_error, i + } re.prog[pc - 1].rep_min = 0 re.prog[pc - 1].rep_max = regex.max_quantifier } @@ -1062,10 +1083,18 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) { re.prog[pc - 1].rep_min = min re.prog[pc - 1].rep_max = max re.prog[pc - 1].greedy = greedy + // check illegal quantifier sequences + if i <= in_txt.len { + char_next, char_next_len = re.get_char(in_txt, i) + if char_next_len == 1 && char_next in regex.quntifier_chars { + return regex.err_syntax_error, i + } + } continue } else { return min, i } + // TODO: decide if the open bracket can be conform without the close bracket /* // no conform, parse as normal char diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index aa6bf79f74..1239b250e4 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -340,6 +340,12 @@ find_all_test_suite = [ r"@for.+@endfor", [0, 22, 23, 50, 63, 80, 89, 117], ['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire piĆ¹ @endfor'] + }, + Test_find_all{ + "+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++", + r"\+{3}.*\+{3}", + [0, 11, 18, 32, 33, 44], + ['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++'] } ] @@ -605,4 +611,26 @@ fn test_regex_func_replace(){ eprintln(txt2) } assert result == txt2 +} + +// test quantifier wrong sequences +const( + test_quantifier_sequences_list = [ + r'+{3}.*+{3}', + r'+{3}.*?{3}', + r'+{3}.**{3}', + r'+{3}.*\+{3}*', + r'+{3}.*\+{3}+', + r'+{3}.*\+{3}??', + r'+{3}.*\+{3}{4}' + ] +) +fn test_quantifier_sequences(){ + for pattern in test_quantifier_sequences_list { + re, re_err, err_pos := regex.regex_base(pattern) + if re_err != regex.err_syntax_error { + eprintln("pattern: $pattern => $re_err") + } + assert re_err == regex.err_syntax_error + } } \ No newline at end of file diff --git a/vlib/regex/regex_util.v b/vlib/regex/regex_util.v index 0bf1a81269..bad5d727fc 100644 --- a/vlib/regex/regex_util.v +++ b/vlib/regex/regex_util.v @@ -283,8 +283,9 @@ pub fn (mut re RE) find_all_str(in_txt string) []string { if s >= 0 && e > s { tmp_str := tos(in_txt.str + i, in_txt.len - i) + mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e } // println("Found: $s:$e [${tmp_str[s..e]}]") - res << tmp_str[..e] + res << tmp_str[..tmp_e] i += e continue }