regex: fix a bug in find and find_all (#7839)
parent
9332f7cac2
commit
443ae1d76e
|
@ -271,6 +271,18 @@ find_all_test_suite = [
|
||||||
r"url *= *https?://.*"+'\n',
|
r"url *= *https?://.*"+'\n',
|
||||||
[5, 45],
|
[5, 45],
|
||||||
['url = https://github.com/dario/pig.html\n']
|
['url = https://github.com/dario/pig.html\n']
|
||||||
|
},
|
||||||
|
Test_find_all{
|
||||||
|
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
|
||||||
|
r"#[.#]{4}##[.#]{4}##[.#]{4}###",
|
||||||
|
[29, 49],
|
||||||
|
['#....###...##...####']
|
||||||
|
},
|
||||||
|
Test_find_all{
|
||||||
|
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
|
||||||
|
r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
|
||||||
|
[0, 49],
|
||||||
|
['#.#......##.#..#..##........##....###...##...####']
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -283,7 +295,9 @@ fn test_regex(){
|
||||||
// check capturing groups
|
// check capturing groups
|
||||||
for c,to in cgroups_test_suite {
|
for c,to in cgroups_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
if debug { println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") }
|
if debug {
|
||||||
|
println("$c [${to.src}] [q${to.q}] (${to.s}, ${to.e})")
|
||||||
|
}
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
|
|
@ -116,9 +116,10 @@ pub fn (re RE) get_group_list() []Re_group {
|
||||||
* Finders
|
* Finders
|
||||||
*
|
*
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
// find try to find the first match in the input string
|
/*
|
||||||
|
// find internal implementation
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
pub fn (mut re RE) find(in_txt string) (int,int) {
|
fn (mut re RE) find_imp(in_txt string) (int,int) {
|
||||||
old_flag := re.flag
|
old_flag := re.flag
|
||||||
re.flag |= f_src // enable search mode
|
re.flag |= f_src // enable search mode
|
||||||
|
|
||||||
|
@ -134,6 +135,33 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
}
|
}
|
||||||
return no_match_found, 0
|
return no_match_found, 0
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// find try to find the first match in the input string
|
||||||
|
[direct_array_access]
|
||||||
|
pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
|
mut i := 0
|
||||||
|
for i < in_txt.len {
|
||||||
|
//--- speed references ---
|
||||||
|
mut s := -1
|
||||||
|
mut e := -1
|
||||||
|
unsafe {
|
||||||
|
tmp_str := tos(in_txt.str+i, in_txt.len-i)
|
||||||
|
s,e = re.match_string(tmp_str)
|
||||||
|
}
|
||||||
|
//------------------------
|
||||||
|
//s,e := re.find_imp(in_txt[i..])
|
||||||
|
//------------------------
|
||||||
|
if s >= 0 && e > s {
|
||||||
|
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
|
||||||
|
return i+s, i+e
|
||||||
|
} else {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
|
||||||
// find_all find all the non overlapping occurrences of the match pattern
|
// find_all find all the non overlapping occurrences of the match pattern
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
|
@ -148,10 +176,10 @@ pub fn (mut re RE) find_all(in_txt string) []int {
|
||||||
mut e := -1
|
mut e := -1
|
||||||
unsafe {
|
unsafe {
|
||||||
tmp_str := tos(in_txt.str+i, in_txt.len-i)
|
tmp_str := tos(in_txt.str+i, in_txt.len-i)
|
||||||
s,e = re.find(tmp_str)
|
s,e = re.match_string(tmp_str)
|
||||||
}
|
}
|
||||||
//------------------------
|
//------------------------
|
||||||
//s,e := re.find(in_txt[i..])
|
//s,e := re.find_imp(in_txt[i..])
|
||||||
//------------------------
|
//------------------------
|
||||||
if s >= 0 && e > s && i+s > ls {
|
if s >= 0 && e > s && i+s > ls {
|
||||||
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
||||||
|
|
Loading…
Reference in New Issue