regex: bug fixes (#11394)
							parent
							
								
									87934ecf39
								
							
						
					
					
						commit
						724942c4e6
					
				|  | @ -890,6 +890,10 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) { | |||
| 	return -2, true, name, i | ||||
| } | ||||
| 
 | ||||
| const ( | ||||
| 	quntifier_chars = [rune(`+`), `*`, `?`, `{`] | ||||
| ) | ||||
| 
 | ||||
| //
 | ||||
| // main compiler
 | ||||
| //
 | ||||
|  | @ -1036,20 +1040,37 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) { | |||
| 
 | ||||
| 		// Quantifiers
 | ||||
| 		if char_len == 1 && pc > 0 { | ||||
| 			mut char_next := rune(0) | ||||
| 			mut char_next_len := 0 | ||||
| 			if (char_len + i) < in_txt.len { | ||||
| 				char_next, char_next_len = re.get_char(in_txt, i + char_len) | ||||
| 			} | ||||
| 			mut quant_flag := true | ||||
| 			match byte(char_tmp) { | ||||
| 				`?` { | ||||
| 					// println("q: ${char_tmp:c}")
 | ||||
| 					// check illegal quantifier sequences
 | ||||
| 					if char_next_len == 1 && char_next in regex.quntifier_chars { | ||||
| 						return regex.err_syntax_error, i | ||||
| 					} | ||||
| 					re.prog[pc - 1].rep_min = 0 | ||||
| 					re.prog[pc - 1].rep_max = 1 | ||||
| 				} | ||||
| 				`+` { | ||||
| 					// println("q: ${char_tmp:c}")
 | ||||
| 					// check illegal quantifier sequences
 | ||||
| 					if char_next_len == 1 && char_next in regex.quntifier_chars { | ||||
| 						return regex.err_syntax_error, i | ||||
| 					} | ||||
| 					re.prog[pc - 1].rep_min = 1 | ||||
| 					re.prog[pc - 1].rep_max = regex.max_quantifier | ||||
| 				} | ||||
| 				`*` { | ||||
| 					// println("q: ${char_tmp:c}")
 | ||||
| 					// check illegal quantifier sequences
 | ||||
| 					if char_next_len == 1 && char_next in regex.quntifier_chars { | ||||
| 						return regex.err_syntax_error, i | ||||
| 					} | ||||
| 					re.prog[pc - 1].rep_min = 0 | ||||
| 					re.prog[pc - 1].rep_max = regex.max_quantifier | ||||
| 				} | ||||
|  | @ -1062,10 +1083,18 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) { | |||
| 						re.prog[pc - 1].rep_min = min | ||||
| 						re.prog[pc - 1].rep_max = max | ||||
| 						re.prog[pc - 1].greedy = greedy | ||||
| 						// check illegal quantifier sequences
 | ||||
| 						if i <= in_txt.len { | ||||
| 							char_next, char_next_len = re.get_char(in_txt, i) | ||||
| 							if char_next_len == 1 && char_next in regex.quntifier_chars { | ||||
| 								return regex.err_syntax_error, i | ||||
| 							} | ||||
| 						} | ||||
| 						continue | ||||
| 					} else { | ||||
| 						return min, i | ||||
| 					} | ||||
| 
 | ||||
| 					// TODO: decide if the open bracket can be conform without the close bracket
 | ||||
| 					/* | ||||
| 					// no conform, parse as normal char
 | ||||
|  |  | |||
|  | @ -340,6 +340,12 @@ find_all_test_suite = [ | |||
| 		r"@for.+@endfor", | ||||
| 		[0, 22, 23, 50, 63, 80, 89, 117], | ||||
| 		['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor'] | ||||
| 	}, | ||||
| 	Test_find_all{ | ||||
| 		"+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++", | ||||
| 		r"\+{3}.*\+{3}", | ||||
| 		[0, 11, 18, 32, 33, 44], | ||||
| 		['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++'] | ||||
| 	} | ||||
| 
 | ||||
| ] | ||||
|  | @ -605,4 +611,26 @@ fn test_regex_func_replace(){ | |||
| 		eprintln(txt2) | ||||
| 	} | ||||
| 	assert result == txt2 | ||||
| } | ||||
| 
 | ||||
| // test quantifier wrong sequences
 | ||||
| const( | ||||
| 	test_quantifier_sequences_list = [ | ||||
| 		r'+{3}.*+{3}',  | ||||
| 		r'+{3}.*?{3}',  | ||||
| 		r'+{3}.**{3}', | ||||
| 		r'+{3}.*\+{3}*', | ||||
| 		r'+{3}.*\+{3}+', | ||||
| 		r'+{3}.*\+{3}??', | ||||
| 		r'+{3}.*\+{3}{4}' | ||||
| 	] | ||||
| ) | ||||
| fn test_quantifier_sequences(){ | ||||
| 	for pattern in test_quantifier_sequences_list { | ||||
| 		re, re_err, err_pos := regex.regex_base(pattern) | ||||
| 		if re_err != regex.err_syntax_error { | ||||
| 			eprintln("pattern: $pattern => $re_err") | ||||
| 		} | ||||
| 		assert re_err == regex.err_syntax_error | ||||
| 	} | ||||
| } | ||||
|  | @ -283,8 +283,9 @@ pub fn (mut re RE) find_all_str(in_txt string) []string { | |||
| 
 | ||||
| 			if s >= 0 && e > s { | ||||
| 				tmp_str := tos(in_txt.str + i, in_txt.len - i) | ||||
| 				mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e } | ||||
| 				// println("Found: $s:$e [${tmp_str[s..e]}]")
 | ||||
| 				res << tmp_str[..e] | ||||
| 				res << tmp_str[..tmp_e] | ||||
| 				i += e | ||||
| 				continue | ||||
| 			} | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue