regex: fixes and optimizations (#7431)
parent
82d1b6a5b7
commit
c9171ebe9a
|
@ -475,6 +475,7 @@ pub fn new() RE
|
||||||
#### **Custom initialization**
|
#### **Custom initialization**
|
||||||
For some particular need it is possible initialize a fully customized regex:
|
For some particular need it is possible initialize a fully customized regex:
|
||||||
```v ignore
|
```v ignore
|
||||||
|
pattern = r"ab(.*)(ac)"
|
||||||
// init custom regex
|
// init custom regex
|
||||||
mut re := regex.RE{}
|
mut re := regex.RE{}
|
||||||
re.prog = []Token {len: pattern.len + 1} // max program length, can not be longer then the pattern
|
re.prog = []Token {len: pattern.len + 1} // max program length, can not be longer then the pattern
|
||||||
|
|
|
@ -1825,9 +1825,9 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// manage here dot char
|
// manage here dot char
|
||||||
//println("Here we are, with stop: state buffer: [${state_list.len}]")
|
|
||||||
|
|
||||||
if state_list.len > 0 {
|
if state_list.len > 0 {
|
||||||
|
//println("Here we are, with stop: state buffer: [${state_list.len}]")
|
||||||
state = state_list.pop()
|
state = state_list.pop()
|
||||||
|
|
||||||
state.match_flag = true
|
state.match_flag = true
|
||||||
|
@ -1898,6 +1898,10 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
|
|
||||||
re.groups[g_index+1] = state.i
|
re.groups[g_index+1] = state.i
|
||||||
|
|
||||||
|
if g_index > 0 && re.groups[g_index] <= re.groups[g_index-1] {
|
||||||
|
re.groups[g_index] = re.groups[g_index-1]
|
||||||
|
}
|
||||||
|
|
||||||
if re.groups[g_index+1] >= in_txt_len {
|
if re.groups[g_index+1] >= in_txt_len {
|
||||||
//println("clamp group!")
|
//println("clamp group!")
|
||||||
re.groups[g_index+1] = in_txt_len-1
|
re.groups[g_index+1] = in_txt_len-1
|
||||||
|
@ -1946,6 +1950,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// if we are done with max go on dot char are dedicated case!!
|
// if we are done with max go on dot char are dedicated case!!
|
||||||
if re.prog[state.pc].rep >= re.prog[state.pc].rep_max
|
if re.prog[state.pc].rep >= re.prog[state.pc].rep_max
|
||||||
{
|
{
|
||||||
|
state_list.pop()
|
||||||
m_state = .ist_next
|
m_state = .ist_next
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -1986,7 +1991,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we must continue or pass to the next IST
|
// check if we must continue or pass to the next IST
|
||||||
if next_check_flag == true {
|
if next_check_flag == true {
|
||||||
//println("save the state!!")
|
//println("save the state!!")
|
||||||
|
|
|
@ -138,6 +138,9 @@ match_test_suite = [
|
||||||
TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5},
|
TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5},
|
||||||
TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
||||||
TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
||||||
|
|
||||||
|
// particular groups
|
||||||
|
TestItem{"ababababac", r"ab(.*)(ac)",0,10},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -229,6 +232,12 @@ cgroups_test_suite = [
|
||||||
[0, 3, 4, 5, 5, 7],
|
[0, 3, 4, 5, 5, 7],
|
||||||
map[string]int{}
|
map[string]int{}
|
||||||
},
|
},
|
||||||
|
TestItemCGroup{
|
||||||
|
"ababababac",
|
||||||
|
r"ab(.*)(ac)",0,10,
|
||||||
|
[2, 8, 8, 10],
|
||||||
|
map[string]int{}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue