regex: fixes and optimizations (#7431)

pull/7448/head
penguindark 2020-12-21 05:36:14 +01:00 committed by GitHub
parent 82d1b6a5b7
commit c9171ebe9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 2 deletions

View File

@ -475,6 +475,7 @@ pub fn new() RE
#### **Custom initialization**
For some particular need it is possible initialize a fully customized regex:
```v ignore
pattern = r"ab(.*)(ac)"
// init custom regex
mut re := regex.RE{}
re.prog = []Token {len: pattern.len + 1} // max program length, can not be longer then the pattern

View File

@ -1825,9 +1825,9 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
}
// manage here dot char
//println("Here we are, with stop: state buffer: [${state_list.len}]")
if state_list.len > 0 {
//println("Here we are, with stop: state buffer: [${state_list.len}]")
state = state_list.pop()
state.match_flag = true
@ -1898,6 +1898,10 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
re.groups[g_index+1] = state.i
if g_index > 0 && re.groups[g_index] <= re.groups[g_index-1] {
re.groups[g_index] = re.groups[g_index-1]
}
if re.groups[g_index+1] >= in_txt_len {
//println("clamp group!")
re.groups[g_index+1] = in_txt_len-1
@ -1946,6 +1950,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// if we are done with max go on dot char are dedicated case!!
if re.prog[state.pc].rep >= re.prog[state.pc].rep_max
{
state_list.pop()
m_state = .ist_next
continue
}
@ -1986,7 +1991,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
}
}
// check if we must continue or pass to the next IST
if next_check_flag == true {
//println("save the state!!")

View File

@ -138,6 +138,9 @@ match_test_suite = [
TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5},
TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
// particular groups
TestItem{"ababababac", r"ab(.*)(ac)",0,10},
]
)
@ -229,6 +232,12 @@ cgroups_test_suite = [
[0, 3, 4, 5, 5, 7],
map[string]int{}
},
TestItemCGroup{
"ababababac",
r"ab(.*)(ac)",0,10,
[2, 8, 8, 10],
map[string]int{}
},
]
)