regex: fix a bug in find and find_all (#7839)

pull/7844/head
penguindark 2021-01-03 16:59:00 +01:00 committed by GitHub
parent 9332f7cac2
commit 443ae1d76e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 5 deletions

View File

@ -271,6 +271,18 @@ find_all_test_suite = [
r"url *= *https?://.*"+'\n', r"url *= *https?://.*"+'\n',
[5, 45], [5, 45],
['url = https://github.com/dario/pig.html\n'] ['url = https://github.com/dario/pig.html\n']
},
Test_find_all{
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
r"#[.#]{4}##[.#]{4}##[.#]{4}###",
[29, 49],
['#....###...##...####']
},
Test_find_all{
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
[0, 49],
['#.#......##.#..#..##........##....###...##...####']
} }
] ]
) )
@ -283,7 +295,9 @@ fn test_regex(){
// check capturing groups // check capturing groups
for c,to in cgroups_test_suite { for c,to in cgroups_test_suite {
// debug print // debug print
if debug { println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") } if debug {
println("$c [${to.src}] [q${to.q}] (${to.s}, ${to.e})")
}
mut re := regex.regex_opt(to.q) or { mut re := regex.regex_opt(to.q) or {
eprintln('err: $err') eprintln('err: $err')

View File

@ -116,9 +116,10 @@ pub fn (re RE) get_group_list() []Re_group {
* Finders * Finders
* *
******************************************************************************/ ******************************************************************************/
// find try to find the first match in the input string /*
// find internal implementation
[direct_array_access] [direct_array_access]
pub fn (mut re RE) find(in_txt string) (int,int) { fn (mut re RE) find_imp(in_txt string) (int,int) {
old_flag := re.flag old_flag := re.flag
re.flag |= f_src // enable search mode re.flag |= f_src // enable search mode
@ -134,6 +135,33 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
} }
return no_match_found, 0 return no_match_found, 0
} }
*/
// find try to find the first match in the input string
[direct_array_access]
pub fn (mut re RE) find(in_txt string) (int,int) {
mut i := 0
for i < in_txt.len {
//--- speed references ---
mut s := -1
mut e := -1
unsafe {
tmp_str := tos(in_txt.str+i, in_txt.len-i)
s,e = re.match_string(tmp_str)
}
//------------------------
//s,e := re.find_imp(in_txt[i..])
//------------------------
if s >= 0 && e > s {
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
return i+s, i+e
} else {
i++
}
}
return -1, -1
}
// find_all find all the non overlapping occurrences of the match pattern // find_all find all the non overlapping occurrences of the match pattern
[direct_array_access] [direct_array_access]
@ -148,10 +176,10 @@ pub fn (mut re RE) find_all(in_txt string) []int {
mut e := -1 mut e := -1
unsafe { unsafe {
tmp_str := tos(in_txt.str+i, in_txt.len-i) tmp_str := tos(in_txt.str+i, in_txt.len-i)
s,e = re.find(tmp_str) s,e = re.match_string(tmp_str)
} }
//------------------------ //------------------------
//s,e := re.find(in_txt[i..]) //s,e := re.find_imp(in_txt[i..])
//------------------------ //------------------------
if s >= 0 && e > s && i+s > ls { if s >= 0 && e > s && i+s > ls {
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls") //println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")