regex: fix a bug with find_all, fixes #10799 (#10801)

pull/10804/head
penguindark 2021-07-14 21:20:05 +02:00 committed by GitHub
parent fe1cf2ea26
commit 646c1e15e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 46 deletions

View File

@ -334,6 +334,10 @@ fn (mut re RE) reset() {
if re.group_csave_flag == true { if re.group_csave_flag == true {
re.group_csave.clear() // = []int{} re.group_csave.clear() // = []int{}
} }
// reset state list
re.state_list.clear()
re.group_stack.clear()
} }
// reset for search mode fail // reset for search mode fail
@ -1787,6 +1791,10 @@ pub fn (mut re RE) match_base(in_txt &byte, in_txt_len int) (int, int) {
re.reset_src() re.reset_src()
state.match_index = -1 state.match_index = -1
state.first_match = -1 state.first_match = -1
// reset state list
re.reset()
continue continue
} }

View File

@ -326,7 +326,20 @@ find_all_test_suite = [
r".*#[.#]{4}##[.#]{4}##[.#]{4}###", r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
[0, 49], [0, 49],
['#.#......##.#..#..##........##....###...##...####'] ['#.#......##.#..#..##........##....###...##...####']
},
Test_find_all{
"1234 Aa dddd Aaf 12334 Aa opopo Aaf",
r"Aa.+Aaf",
[5, 16, 23, 35],
['Aa dddd Aaf', 'Aa opopo Aaf']
},
Test_find_all{
"@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo",
r"@for.+@endfor",
[0, 22, 23, 50, 63, 80, 89, 117],
['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
} }
] ]
) )

View File

@ -178,21 +178,18 @@ pub fn (mut re RE) find(in_txt string) (int, int) {
mut i := 0 mut i := 0
for i < in_txt.len { for i < in_txt.len {
//--- speed references ---
mut s := -1 mut s := -1
mut e := -1 mut e := -1
unsafe { unsafe {
tmp_str := tos(in_txt.str + i, in_txt.len - i) // tmp_str := tos(in_txt.str + i, in_txt.len - i)
s, e = re.match_string(tmp_str) // println("Check: [$tmp_str]")
} s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1)
//------------------------
// s,e := re.find_imp(in_txt[i..]) if s >= 0 && e > s {
//------------------------ // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
if s >= 0 && e > s { // re.flag = old_flag
// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") return i + s, i + e
// re.flag = old_flag }
return i + s, i + e
} else {
i++ i++
} }
} }
@ -239,33 +236,28 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) {
[direct_array_access] [direct_array_access]
pub fn (mut re RE) find_all(in_txt string) []int { pub fn (mut re RE) find_all(in_txt string) []int {
// old_flag := re.flag // old_flag := re.flag
// re.flag |= f_src // enable search mode // re.flag |= f_src // enable search mode
mut i := 0 mut i := 0
mut res := []int{} mut res := []int{}
mut ls := -1
for i < in_txt.len { for i < in_txt.len {
//--- speed references ---
mut s := -1 mut s := -1
mut e := -1 mut e := -1
unsafe { unsafe {
tmp_str := tos(in_txt.str + i, in_txt.len - i) // tmp_str := in_txt[i..]
s, e = re.match_string(tmp_str) // tmp_str := tos(in_txt.str + i, in_txt.len - i)
} // println("Check: [$tmp_str]")
//------------------------ s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
// s,e := re.find_imp(in_txt[i..])
//------------------------ if s >= 0 && e > s {
if s >= 0 && e > s && i + s > ls { res << i + s
// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls") res << i + e
res << i + s i += e
res << i + e continue
ls = i + s }
i = i + e
continue
} else {
i++
} }
i++
} }
// re.flag = old_flag // re.flag = old_flag
return res return res
@ -274,31 +266,32 @@ pub fn (mut re RE) find_all(in_txt string) []int {
// find_all_str find all the non overlapping occurrences of the match pattern, return a string list // find_all_str find all the non overlapping occurrences of the match pattern, return a string list
[direct_array_access] [direct_array_access]
pub fn (mut re RE) find_all_str(in_txt string) []string { pub fn (mut re RE) find_all_str(in_txt string) []string {
// old_flag := re.flag
// re.flag |= f_src // enable search mode
mut i := 0 mut i := 0
mut res := []string{} mut res := []string{}
mut ls := -1
for i < in_txt.len { for i < in_txt.len {
//--- speed references ---
mut s := -1 mut s := -1
mut e := -1 mut e := -1
unsafe { unsafe {
tmp_str := tos(in_txt.str + i, in_txt.len - i) // tmp_str := in_txt[i..]
s, e = re.find(tmp_str) // tmp_str := tos(in_txt.str + i, in_txt.len - i)
} // println("Check: [$tmp_str]")
//------------------------ s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
// s,e := re.find(in_txt[i..])
//------------------------ if s >= 0 && e > s {
if s >= 0 && e > s && i + s > ls { tmp_str := tos(in_txt.str + i, in_txt.len - i)
// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls") // println("Found: $s:$e [${tmp_str[s..e]}]")
res << in_txt[i + s..i + e] res << tmp_str[..e]
ls = i + s i += e
i = i + e continue
continue }
} else {
i++
} }
i++
} }
// re.flag = old_flag
return res return res
} }