regex: fix a bug in find and find_all (#7839)
parent
9332f7cac2
commit
443ae1d76e
|
@ -271,6 +271,18 @@ find_all_test_suite = [
|
|||
r"url *= *https?://.*"+'\n',
|
||||
[5, 45],
|
||||
['url = https://github.com/dario/pig.html\n']
|
||||
},
|
||||
Test_find_all{
|
||||
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
|
||||
r"#[.#]{4}##[.#]{4}##[.#]{4}###",
|
||||
[29, 49],
|
||||
['#....###...##...####']
|
||||
},
|
||||
Test_find_all{
|
||||
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
|
||||
r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
|
||||
[0, 49],
|
||||
['#.#......##.#..#..##........##....###...##...####']
|
||||
}
|
||||
]
|
||||
)
|
||||
|
@ -283,7 +295,9 @@ fn test_regex(){
|
|||
// check capturing groups
|
||||
for c,to in cgroups_test_suite {
|
||||
// debug print
|
||||
if debug { println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") }
|
||||
if debug {
|
||||
println("$c [${to.src}] [q${to.q}] (${to.s}, ${to.e})")
|
||||
}
|
||||
|
||||
mut re := regex.regex_opt(to.q) or {
|
||||
eprintln('err: $err')
|
||||
|
|
|
@ -116,9 +116,10 @@ pub fn (re RE) get_group_list() []Re_group {
|
|||
* Finders
|
||||
*
|
||||
******************************************************************************/
|
||||
// find try to find the first match in the input string
|
||||
/*
|
||||
// find internal implementation
|
||||
[direct_array_access]
|
||||
pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||
fn (mut re RE) find_imp(in_txt string) (int,int) {
|
||||
old_flag := re.flag
|
||||
re.flag |= f_src // enable search mode
|
||||
|
||||
|
@ -134,6 +135,33 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
|
|||
}
|
||||
return no_match_found, 0
|
||||
}
|
||||
*/
|
||||
|
||||
// find try to find the first match in the input string
|
||||
[direct_array_access]
|
||||
pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||
mut i := 0
|
||||
for i < in_txt.len {
|
||||
//--- speed references ---
|
||||
mut s := -1
|
||||
mut e := -1
|
||||
unsafe {
|
||||
tmp_str := tos(in_txt.str+i, in_txt.len-i)
|
||||
s,e = re.match_string(tmp_str)
|
||||
}
|
||||
//------------------------
|
||||
//s,e := re.find_imp(in_txt[i..])
|
||||
//------------------------
|
||||
if s >= 0 && e > s {
|
||||
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
|
||||
return i+s, i+e
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
|
||||
}
|
||||
return -1, -1
|
||||
}
|
||||
|
||||
// find_all find all the non overlapping occurrences of the match pattern
|
||||
[direct_array_access]
|
||||
|
@ -148,10 +176,10 @@ pub fn (mut re RE) find_all(in_txt string) []int {
|
|||
mut e := -1
|
||||
unsafe {
|
||||
tmp_str := tos(in_txt.str+i, in_txt.len-i)
|
||||
s,e = re.find(tmp_str)
|
||||
s,e = re.match_string(tmp_str)
|
||||
}
|
||||
//------------------------
|
||||
//s,e := re.find(in_txt[i..])
|
||||
//s,e := re.find_imp(in_txt[i..])
|
||||
//------------------------
|
||||
if s >= 0 && e > s && i+s > ls {
|
||||
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
||||
|
|
Loading…
Reference in New Issue