regex: fix a bug in find and find_all (#7839)
							parent
							
								
									9332f7cac2
								
							
						
					
					
						commit
						443ae1d76e
					
				| 
						 | 
					@ -271,6 +271,18 @@ find_all_test_suite = [
 | 
				
			||||||
		r"url *= *https?://.*"+'\n',
 | 
							r"url *= *https?://.*"+'\n',
 | 
				
			||||||
		[5, 45],
 | 
							[5, 45],
 | 
				
			||||||
		['url = https://github.com/dario/pig.html\n']
 | 
							['url = https://github.com/dario/pig.html\n']
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						Test_find_all{
 | 
				
			||||||
 | 
							"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
 | 
				
			||||||
 | 
							r"#[.#]{4}##[.#]{4}##[.#]{4}###",
 | 
				
			||||||
 | 
							[29, 49],
 | 
				
			||||||
 | 
							['#....###...##...####']
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
							Test_find_all{
 | 
				
			||||||
 | 
							"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
 | 
				
			||||||
 | 
							r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
 | 
				
			||||||
 | 
							[0, 49],
 | 
				
			||||||
 | 
							['#.#......##.#..#..##........##....###...##...####']
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
| 
						 | 
					@ -283,7 +295,9 @@ fn test_regex(){
 | 
				
			||||||
	// check capturing groups
 | 
						// check capturing groups
 | 
				
			||||||
	for c,to in cgroups_test_suite {
 | 
						for c,to in cgroups_test_suite {
 | 
				
			||||||
		// debug print
 | 
							// debug print
 | 
				
			||||||
		if debug { println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") }
 | 
							if debug {
 | 
				
			||||||
 | 
								println("$c [${to.src}] [q${to.q}] (${to.s}, ${to.e})") 
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		mut re := regex.regex_opt(to.q) or {
 | 
							mut re := regex.regex_opt(to.q) or {
 | 
				
			||||||
			eprintln('err: $err')
 | 
								eprintln('err: $err')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -116,9 +116,10 @@ pub fn (re RE) get_group_list() []Re_group {
 | 
				
			||||||
* Finders
 | 
					* Finders
 | 
				
			||||||
*
 | 
					*
 | 
				
			||||||
******************************************************************************/
 | 
					******************************************************************************/
 | 
				
			||||||
// find try to find the first match in the input string
 | 
					/*
 | 
				
			||||||
 | 
					// find internal implementation
 | 
				
			||||||
[direct_array_access]
 | 
					[direct_array_access]
 | 
				
			||||||
pub fn (mut re RE) find(in_txt string) (int,int) {
 | 
					fn (mut re RE) find_imp(in_txt string) (int,int) {
 | 
				
			||||||
	old_flag := re.flag
 | 
						old_flag := re.flag
 | 
				
			||||||
	re.flag |= f_src  // enable search mode
 | 
						re.flag |= f_src  // enable search mode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,6 +135,33 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return no_match_found, 0
 | 
						return no_match_found, 0
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// find try to find the first match in the input string
 | 
				
			||||||
 | 
					[direct_array_access]
 | 
				
			||||||
 | 
					pub fn (mut re RE) find(in_txt string) (int,int) {
 | 
				
			||||||
 | 
						mut i := 0
 | 
				
			||||||
 | 
						for i < in_txt.len {
 | 
				
			||||||
 | 
							//--- speed references ---
 | 
				
			||||||
 | 
							mut s := -1
 | 
				
			||||||
 | 
							mut e := -1
 | 
				
			||||||
 | 
							unsafe {
 | 
				
			||||||
 | 
								tmp_str := tos(in_txt.str+i, in_txt.len-i)
 | 
				
			||||||
 | 
								s,e = re.match_string(tmp_str)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							//------------------------
 | 
				
			||||||
 | 
							//s,e := re.find_imp(in_txt[i..])
 | 
				
			||||||
 | 
							//------------------------
 | 
				
			||||||
 | 
							if s >= 0 && e > s {
 | 
				
			||||||
 | 
								//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
 | 
				
			||||||
 | 
								return i+s, i+e
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								i++
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return -1, -1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// find_all find all the non overlapping occurrences of the match pattern
 | 
					// find_all find all the non overlapping occurrences of the match pattern
 | 
				
			||||||
[direct_array_access]
 | 
					[direct_array_access]
 | 
				
			||||||
| 
						 | 
					@ -148,10 +176,10 @@ pub fn (mut re RE) find_all(in_txt string) []int {
 | 
				
			||||||
		mut e := -1
 | 
							mut e := -1
 | 
				
			||||||
		unsafe {
 | 
							unsafe {
 | 
				
			||||||
			tmp_str := tos(in_txt.str+i, in_txt.len-i)
 | 
								tmp_str := tos(in_txt.str+i, in_txt.len-i)
 | 
				
			||||||
			s,e = re.find(tmp_str)
 | 
								s,e = re.match_string(tmp_str)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		//------------------------
 | 
							//------------------------
 | 
				
			||||||
		//s,e := re.find(in_txt[i..])
 | 
							//s,e := re.find_imp(in_txt[i..])
 | 
				
			||||||
		//------------------------
 | 
							//------------------------
 | 
				
			||||||
		if s >= 0 && e > s && i+s > ls {
 | 
							if s >= 0 && e > s && i+s > ls {
 | 
				
			||||||
			//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
 | 
								//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue