regex: fix a bug in find and find_all (#7839)
							parent
							
								
									9332f7cac2
								
							
						
					
					
						commit
						443ae1d76e
					
				| 
						 | 
				
			
			@ -271,6 +271,18 @@ find_all_test_suite = [
 | 
			
		|||
		r"url *= *https?://.*"+'\n',
 | 
			
		||||
		[5, 45],
 | 
			
		||||
		['url = https://github.com/dario/pig.html\n']
 | 
			
		||||
	},
 | 
			
		||||
	Test_find_all{
 | 
			
		||||
		"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
 | 
			
		||||
		r"#[.#]{4}##[.#]{4}##[.#]{4}###",
 | 
			
		||||
		[29, 49],
 | 
			
		||||
		['#....###...##...####']
 | 
			
		||||
	},
 | 
			
		||||
		Test_find_all{
 | 
			
		||||
		"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
 | 
			
		||||
		r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
 | 
			
		||||
		[0, 49],
 | 
			
		||||
		['#.#......##.#..#..##........##....###...##...####']
 | 
			
		||||
	}
 | 
			
		||||
]
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -283,7 +295,9 @@ fn test_regex(){
 | 
			
		|||
	// check capturing groups
 | 
			
		||||
	for c,to in cgroups_test_suite {
 | 
			
		||||
		// debug print
 | 
			
		||||
		if debug { println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") }
 | 
			
		||||
		if debug {
 | 
			
		||||
			println("$c [${to.src}] [q${to.q}] (${to.s}, ${to.e})") 
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		mut re := regex.regex_opt(to.q) or {
 | 
			
		||||
			eprintln('err: $err')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -116,9 +116,10 @@ pub fn (re RE) get_group_list() []Re_group {
 | 
			
		|||
* Finders
 | 
			
		||||
*
 | 
			
		||||
******************************************************************************/
 | 
			
		||||
// find try to find the first match in the input string
 | 
			
		||||
/*
 | 
			
		||||
// find internal implementation
 | 
			
		||||
[direct_array_access]
 | 
			
		||||
pub fn (mut re RE) find(in_txt string) (int,int) {
 | 
			
		||||
fn (mut re RE) find_imp(in_txt string) (int,int) {
 | 
			
		||||
	old_flag := re.flag
 | 
			
		||||
	re.flag |= f_src  // enable search mode
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -134,6 +135,33 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
 | 
			
		|||
	}
 | 
			
		||||
	return no_match_found, 0
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
// find try to find the first match in the input string
 | 
			
		||||
[direct_array_access]
 | 
			
		||||
pub fn (mut re RE) find(in_txt string) (int,int) {
 | 
			
		||||
	mut i := 0
 | 
			
		||||
	for i < in_txt.len {
 | 
			
		||||
		//--- speed references ---
 | 
			
		||||
		mut s := -1
 | 
			
		||||
		mut e := -1
 | 
			
		||||
		unsafe {
 | 
			
		||||
			tmp_str := tos(in_txt.str+i, in_txt.len-i)
 | 
			
		||||
			s,e = re.match_string(tmp_str)
 | 
			
		||||
		}
 | 
			
		||||
		//------------------------
 | 
			
		||||
		//s,e := re.find_imp(in_txt[i..])
 | 
			
		||||
		//------------------------
 | 
			
		||||
		if s >= 0 && e > s {
 | 
			
		||||
			//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
 | 
			
		||||
			return i+s, i+e
 | 
			
		||||
		} else {
 | 
			
		||||
			i++
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
	return -1, -1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// find_all find all the non overlapping occurrences of the match pattern
 | 
			
		||||
[direct_array_access]
 | 
			
		||||
| 
						 | 
				
			
			@ -148,10 +176,10 @@ pub fn (mut re RE) find_all(in_txt string) []int {
 | 
			
		|||
		mut e := -1
 | 
			
		||||
		unsafe {
 | 
			
		||||
			tmp_str := tos(in_txt.str+i, in_txt.len-i)
 | 
			
		||||
			s,e = re.find(tmp_str)
 | 
			
		||||
			s,e = re.match_string(tmp_str)
 | 
			
		||||
		}
 | 
			
		||||
		//------------------------
 | 
			
		||||
		//s,e := re.find(in_txt[i..])
 | 
			
		||||
		//s,e := re.find_imp(in_txt[i..])
 | 
			
		||||
		//------------------------
 | 
			
		||||
		if s >= 0 && e > s && i+s > ls {
 | 
			
		||||
			//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue