From c9171ebe9a8c89c36e60326168a36e3c51247ca8 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Mon, 21 Dec 2020 05:36:14 +0100 Subject: [PATCH] regex: fixes and optimizations (#7431) --- vlib/regex/README.md | 1 + vlib/regex/regex.v | 9 +++++++-- vlib/regex/regex_test.v | 9 +++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/vlib/regex/README.md b/vlib/regex/README.md index eba818ffa3..3645f7589a 100644 --- a/vlib/regex/README.md +++ b/vlib/regex/README.md @@ -475,6 +475,7 @@ pub fn new() RE #### **Custom initialization** For some particular need it is possible initialize a fully customized regex: ```v ignore +pattern = r"ab(.*)(ac)" // init custom regex mut re := regex.RE{} re.prog = []Token {len: pattern.len + 1} // max program length, can not be longer then the pattern diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index 210319dde2..d0ca5742f5 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -1825,9 +1825,9 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { } // manage here dot char - //println("Here we are, with stop: state buffer: [${state_list.len}]") if state_list.len > 0 { + //println("Here we are, with stop: state buffer: [${state_list.len}]") state = state_list.pop() state.match_flag = true @@ -1898,6 +1898,10 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { re.groups[g_index+1] = state.i + if g_index > 0 && re.groups[g_index] <= re.groups[g_index-1] { + re.groups[g_index] = re.groups[g_index-1] + } + if re.groups[g_index+1] >= in_txt_len { //println("clamp group!") re.groups[g_index+1] = in_txt_len-1 @@ -1946,6 +1950,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { // if we are done with max go on dot char are dedicated case!! if re.prog[state.pc].rep >= re.prog[state.pc].rep_max { + state_list.pop() m_state = .ist_next continue } @@ -1986,7 +1991,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { } } - + // check if we must continue or pass to the next IST if next_check_flag == true { //println("save the state!!") diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index 21a967db05..b421866f96 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -138,6 +138,9 @@ match_test_suite = [ TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5}, TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0}, TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0}, + + // particular groups + TestItem{"ababababac", r"ab(.*)(ac)",0,10}, ] ) @@ -229,6 +232,12 @@ cgroups_test_suite = [ [0, 3, 4, 5, 5, 7], map[string]int{} }, + TestItemCGroup{ + "ababababac", + r"ab(.*)(ac)",0,10, + [2, 8, 8, 10], + map[string]int{} + }, ] )