From 646c1e15e26030a41c97de30e551e422f60fa75e Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Wed, 14 Jul 2021 21:20:05 +0200 Subject: [PATCH] regex: fix a bug with find_all, fixes #10799 (#10801) --- vlib/regex/regex.v | 8 ++++ vlib/regex/regex_test.v | 13 +++++++ vlib/regex/regex_util.v | 85 +++++++++++++++++++---------------------- 3 files changed, 60 insertions(+), 46 deletions(-) diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index b84e45eeeb..12e86dab96 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -334,6 +334,10 @@ fn (mut re RE) reset() { if re.group_csave_flag == true { re.group_csave.clear() // = []int{} } + + // reset state list + re.state_list.clear() + re.group_stack.clear() } // reset for search mode fail @@ -1787,6 +1791,10 @@ pub fn (mut re RE) match_base(in_txt &byte, in_txt_len int) (int, int) { re.reset_src() state.match_index = -1 state.first_match = -1 + + // reset state list + re.reset() + continue } diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index 3e1912ab71..242dc84fbe 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -326,7 +326,20 @@ find_all_test_suite = [ r".*#[.#]{4}##[.#]{4}##[.#]{4}###", [0, 49], ['#.#......##.#..#..##........##....###...##...####'] + }, + Test_find_all{ + "1234 Aa dddd Aaf 12334 Aa opopo Aaf", + r"Aa.+Aaf", + [5, 16, 23, 35], + ['Aa dddd Aaf', 'Aa opopo Aaf'] + }, + Test_find_all{ + "@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo", + r"@for.+@endfor", + [0, 22, 23, 50, 63, 80, 89, 117], + ['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor'] } + ] ) diff --git a/vlib/regex/regex_util.v b/vlib/regex/regex_util.v index 6f29dfcdd1..0bf1a81269 100644 --- a/vlib/regex/regex_util.v +++ b/vlib/regex/regex_util.v @@ -178,21 +178,18 @@ pub fn (mut re RE) find(in_txt string) (int, int) { mut i := 0 for i < in_txt.len { - //--- speed references --- mut s := -1 mut e := -1 unsafe { - tmp_str := tos(in_txt.str + i, in_txt.len - i) - s, e = re.match_string(tmp_str) - } - //------------------------ - // s,e := re.find_imp(in_txt[i..]) - //------------------------ - if s >= 0 && e > s { - // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") - // re.flag = old_flag - return i + s, i + e - } else { + // tmp_str := tos(in_txt.str + i, in_txt.len - i) + // println("Check: [$tmp_str]") + s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1) + + if s >= 0 && e > s { + // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") + // re.flag = old_flag + return i + s, i + e + } i++ } } @@ -239,33 +236,28 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) { [direct_array_access] pub fn (mut re RE) find_all(in_txt string) []int { // old_flag := re.flag - // re.flag |= f_src // enable search mode + // re.flag |= f_src // enable search mode mut i := 0 mut res := []int{} - mut ls := -1 for i < in_txt.len { - //--- speed references --- mut s := -1 mut e := -1 unsafe { - tmp_str := tos(in_txt.str + i, in_txt.len - i) - s, e = re.match_string(tmp_str) - } - //------------------------ - // s,e := re.find_imp(in_txt[i..]) - //------------------------ - if s >= 0 && e > s && i + s > ls { - // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls") - res << i + s - res << i + e - ls = i + s - i = i + e - continue - } else { - i++ + // tmp_str := in_txt[i..] + // tmp_str := tos(in_txt.str + i, in_txt.len - i) + // println("Check: [$tmp_str]") + s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) + + if s >= 0 && e > s { + res << i + s + res << i + e + i += e + continue + } } + i++ } // re.flag = old_flag return res @@ -274,31 +266,32 @@ pub fn (mut re RE) find_all(in_txt string) []int { // find_all_str find all the non overlapping occurrences of the match pattern, return a string list [direct_array_access] pub fn (mut re RE) find_all_str(in_txt string) []string { + // old_flag := re.flag + // re.flag |= f_src // enable search mode + mut i := 0 mut res := []string{} - mut ls := -1 for i < in_txt.len { - //--- speed references --- mut s := -1 mut e := -1 unsafe { - tmp_str := tos(in_txt.str + i, in_txt.len - i) - s, e = re.find(tmp_str) - } - //------------------------ - // s,e := re.find(in_txt[i..]) - //------------------------ - if s >= 0 && e > s && i + s > ls { - // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls") - res << in_txt[i + s..i + e] - ls = i + s - i = i + e - continue - } else { - i++ + // tmp_str := in_txt[i..] + // tmp_str := tos(in_txt.str + i, in_txt.len - i) + // println("Check: [$tmp_str]") + s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i) + + if s >= 0 && e > s { + tmp_str := tos(in_txt.str + i, in_txt.len - i) + // println("Found: $s:$e [${tmp_str[s..e]}]") + res << tmp_str[..e] + i += e + continue + } } + i++ } + // re.flag = old_flag return res }