regex: fix a bug with find_all, fixes #10799 (#10801)

pull/10804/head
penguindark 2021-07-14 21:20:05 +02:00 committed by GitHub
parent fe1cf2ea26
commit 646c1e15e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 60 additions and 46 deletions

View File

@ -334,6 +334,10 @@ fn (mut re RE) reset() {
if re.group_csave_flag == true {
re.group_csave.clear() // = []int{}
}
// reset state list
re.state_list.clear()
re.group_stack.clear()
}
// reset for search mode fail
@ -1787,6 +1791,10 @@ pub fn (mut re RE) match_base(in_txt &byte, in_txt_len int) (int, int) {
re.reset_src()
state.match_index = -1
state.first_match = -1
// reset state list
re.reset()
continue
}

View File

@ -326,7 +326,20 @@ find_all_test_suite = [
r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
[0, 49],
['#.#......##.#..#..##........##....###...##...####']
},
Test_find_all{
"1234 Aa dddd Aaf 12334 Aa opopo Aaf",
r"Aa.+Aaf",
[5, 16, 23, 35],
['Aa dddd Aaf', 'Aa opopo Aaf']
},
Test_find_all{
"@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo",
r"@for.+@endfor",
[0, 22, 23, 50, 63, 80, 89, 117],
['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
}
]
)

View File

@ -178,21 +178,18 @@ pub fn (mut re RE) find(in_txt string) (int, int) {
mut i := 0
for i < in_txt.len {
//--- speed references ---
mut s := -1
mut e := -1
unsafe {
tmp_str := tos(in_txt.str + i, in_txt.len - i)
s, e = re.match_string(tmp_str)
}
//------------------------
// s,e := re.find_imp(in_txt[i..])
//------------------------
if s >= 0 && e > s {
// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
// re.flag = old_flag
return i + s, i + e
} else {
// tmp_str := tos(in_txt.str + i, in_txt.len - i)
// println("Check: [$tmp_str]")
s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1)
if s >= 0 && e > s {
// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
// re.flag = old_flag
return i + s, i + e
}
i++
}
}
@ -239,33 +236,28 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) {
[direct_array_access]
pub fn (mut re RE) find_all(in_txt string) []int {
// old_flag := re.flag
// re.flag |= f_src // enable search mode
// re.flag |= f_src // enable search mode
mut i := 0
mut res := []int{}
mut ls := -1
for i < in_txt.len {
//--- speed references ---
mut s := -1
mut e := -1
unsafe {
tmp_str := tos(in_txt.str + i, in_txt.len - i)
s, e = re.match_string(tmp_str)
}
//------------------------
// s,e := re.find_imp(in_txt[i..])
//------------------------
if s >= 0 && e > s && i + s > ls {
// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
res << i + s
res << i + e
ls = i + s
i = i + e
continue
} else {
i++
// tmp_str := in_txt[i..]
// tmp_str := tos(in_txt.str + i, in_txt.len - i)
// println("Check: [$tmp_str]")
s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
if s >= 0 && e > s {
res << i + s
res << i + e
i += e
continue
}
}
i++
}
// re.flag = old_flag
return res
@ -274,31 +266,32 @@ pub fn (mut re RE) find_all(in_txt string) []int {
// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
[direct_array_access]
pub fn (mut re RE) find_all_str(in_txt string) []string {
// old_flag := re.flag
// re.flag |= f_src // enable search mode
mut i := 0
mut res := []string{}
mut ls := -1
for i < in_txt.len {
//--- speed references ---
mut s := -1
mut e := -1
unsafe {
tmp_str := tos(in_txt.str + i, in_txt.len - i)
s, e = re.find(tmp_str)
}
//------------------------
// s,e := re.find(in_txt[i..])
//------------------------
if s >= 0 && e > s && i + s > ls {
// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
res << in_txt[i + s..i + e]
ls = i + s
i = i + e
continue
} else {
i++
// tmp_str := in_txt[i..]
// tmp_str := tos(in_txt.str + i, in_txt.len - i)
// println("Check: [$tmp_str]")
s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
if s >= 0 && e > s {
tmp_str := tos(in_txt.str + i, in_txt.len - i)
// println("Found: $s:$e [${tmp_str[s..e]}]")
res << tmp_str[..e]
i += e
continue
}
}
i++
}
// re.flag = old_flag
return res
}