From 443ae1d76e83db73ec5db8b89bc5aacd5b9287a1 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Sun, 3 Jan 2021 16:59:00 +0100 Subject: [PATCH] regex: fix a bug in find and find_all (#7839) --- vlib/regex/regex_test.v | 16 +++++++++++++++- vlib/regex/regex_util.v | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index 1ed54769cc..972351838f 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -271,6 +271,18 @@ find_all_test_suite = [ r"url *= *https?://.*"+'\n', [5, 45], ['url = https://github.com/dario/pig.html\n'] + }, + Test_find_all{ + "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", + r"#[.#]{4}##[.#]{4}##[.#]{4}###", + [29, 49], + ['#....###...##...####'] + }, + Test_find_all{ + "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", + r".*#[.#]{4}##[.#]{4}##[.#]{4}###", + [0, 49], + ['#.#......##.#..#..##........##....###...##...####'] } ] ) @@ -283,7 +295,9 @@ fn test_regex(){ // check capturing groups for c,to in cgroups_test_suite { // debug print - if debug { println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") } + if debug { + println("$c [${to.src}] [q${to.q}] (${to.s}, ${to.e})") + } mut re := regex.regex_opt(to.q) or { eprintln('err: $err') diff --git a/vlib/regex/regex_util.v b/vlib/regex/regex_util.v index 27918d0ab3..ff7eddf983 100644 --- a/vlib/regex/regex_util.v +++ b/vlib/regex/regex_util.v @@ -116,9 +116,10 @@ pub fn (re RE) get_group_list() []Re_group { * Finders * ******************************************************************************/ -// find try to find the first match in the input string +/* +// find internal implementation [direct_array_access] -pub fn (mut re RE) find(in_txt string) (int,int) { +fn (mut re RE) find_imp(in_txt string) (int,int) { old_flag := re.flag re.flag |= f_src // enable search mode @@ -134,6 +135,33 @@ pub fn (mut re RE) find(in_txt string) (int,int) { } return no_match_found, 0 } +*/ + +// find try to find the first match in the input string +[direct_array_access] +pub fn (mut re RE) find(in_txt string) (int,int) { + mut i := 0 + for i < in_txt.len { + //--- speed references --- + mut s := -1 + mut e := -1 + unsafe { + tmp_str := tos(in_txt.str+i, in_txt.len-i) + s,e = re.match_string(tmp_str) + } + //------------------------ + //s,e := re.find_imp(in_txt[i..]) + //------------------------ + if s >= 0 && e > s { + //println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") + return i+s, i+e + } else { + i++ + } + + } + return -1, -1 +} // find_all find all the non overlapping occurrences of the match pattern [direct_array_access] @@ -148,10 +176,10 @@ pub fn (mut re RE) find_all(in_txt string) []int { mut e := -1 unsafe { tmp_str := tos(in_txt.str+i, in_txt.len-i) - s,e = re.find(tmp_str) + s,e = re.match_string(tmp_str) } //------------------------ - //s,e := re.find(in_txt[i..]) + //s,e := re.find_imp(in_txt[i..]) //------------------------ if s >= 0 && e > s && i+s > ls { //println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")