regex: bug fixes (#11394)
							parent
							
								
									87934ecf39
								
							
						
					
					
						commit
						724942c4e6
					
				| 
						 | 
				
			
			@ -890,6 +890,10 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
 | 
			
		|||
	return -2, true, name, i
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	quntifier_chars = [rune(`+`), `*`, `?`, `{`]
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// main compiler
 | 
			
		||||
//
 | 
			
		||||
| 
						 | 
				
			
			@ -1036,20 +1040,37 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
 | 
			
		|||
 | 
			
		||||
		// Quantifiers
 | 
			
		||||
		if char_len == 1 && pc > 0 {
 | 
			
		||||
			mut char_next := rune(0)
 | 
			
		||||
			mut char_next_len := 0
 | 
			
		||||
			if (char_len + i) < in_txt.len {
 | 
			
		||||
				char_next, char_next_len = re.get_char(in_txt, i + char_len)
 | 
			
		||||
			}
 | 
			
		||||
			mut quant_flag := true
 | 
			
		||||
			match byte(char_tmp) {
 | 
			
		||||
				`?` {
 | 
			
		||||
					// println("q: ${char_tmp:c}")
 | 
			
		||||
					// check illegal quantifier sequences
 | 
			
		||||
					if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
			
		||||
						return regex.err_syntax_error, i
 | 
			
		||||
					}
 | 
			
		||||
					re.prog[pc - 1].rep_min = 0
 | 
			
		||||
					re.prog[pc - 1].rep_max = 1
 | 
			
		||||
				}
 | 
			
		||||
				`+` {
 | 
			
		||||
					// println("q: ${char_tmp:c}")
 | 
			
		||||
					// check illegal quantifier sequences
 | 
			
		||||
					if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
			
		||||
						return regex.err_syntax_error, i
 | 
			
		||||
					}
 | 
			
		||||
					re.prog[pc - 1].rep_min = 1
 | 
			
		||||
					re.prog[pc - 1].rep_max = regex.max_quantifier
 | 
			
		||||
				}
 | 
			
		||||
				`*` {
 | 
			
		||||
					// println("q: ${char_tmp:c}")
 | 
			
		||||
					// check illegal quantifier sequences
 | 
			
		||||
					if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
			
		||||
						return regex.err_syntax_error, i
 | 
			
		||||
					}
 | 
			
		||||
					re.prog[pc - 1].rep_min = 0
 | 
			
		||||
					re.prog[pc - 1].rep_max = regex.max_quantifier
 | 
			
		||||
				}
 | 
			
		||||
| 
						 | 
				
			
			@ -1062,10 +1083,18 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
 | 
			
		|||
						re.prog[pc - 1].rep_min = min
 | 
			
		||||
						re.prog[pc - 1].rep_max = max
 | 
			
		||||
						re.prog[pc - 1].greedy = greedy
 | 
			
		||||
						// check illegal quantifier sequences
 | 
			
		||||
						if i <= in_txt.len {
 | 
			
		||||
							char_next, char_next_len = re.get_char(in_txt, i)
 | 
			
		||||
							if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
			
		||||
								return regex.err_syntax_error, i
 | 
			
		||||
							}
 | 
			
		||||
						}
 | 
			
		||||
						continue
 | 
			
		||||
					} else {
 | 
			
		||||
						return min, i
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
					// TODO: decide if the open bracket can be conform without the close bracket
 | 
			
		||||
					/*
 | 
			
		||||
					// no conform, parse as normal char
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -340,6 +340,12 @@ find_all_test_suite = [
 | 
			
		|||
		r"@for.+@endfor",
 | 
			
		||||
		[0, 22, 23, 50, 63, 80, 89, 117],
 | 
			
		||||
		['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
 | 
			
		||||
	},
 | 
			
		||||
	Test_find_all{
 | 
			
		||||
		"+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++",
 | 
			
		||||
		r"\+{3}.*\+{3}",
 | 
			
		||||
		[0, 11, 18, 32, 33, 44],
 | 
			
		||||
		['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++']
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
]
 | 
			
		||||
| 
						 | 
				
			
			@ -606,3 +612,25 @@ fn test_regex_func_replace(){
 | 
			
		|||
	}
 | 
			
		||||
	assert result == txt2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// test quantifier wrong sequences
 | 
			
		||||
const(
 | 
			
		||||
	test_quantifier_sequences_list = [
 | 
			
		||||
		r'+{3}.*+{3}', 
 | 
			
		||||
		r'+{3}.*?{3}', 
 | 
			
		||||
		r'+{3}.**{3}',
 | 
			
		||||
		r'+{3}.*\+{3}*',
 | 
			
		||||
		r'+{3}.*\+{3}+',
 | 
			
		||||
		r'+{3}.*\+{3}??',
 | 
			
		||||
		r'+{3}.*\+{3}{4}'
 | 
			
		||||
	]
 | 
			
		||||
)
 | 
			
		||||
fn test_quantifier_sequences(){
 | 
			
		||||
	for pattern in test_quantifier_sequences_list {
 | 
			
		||||
		re, re_err, err_pos := regex.regex_base(pattern)
 | 
			
		||||
		if re_err != regex.err_syntax_error {
 | 
			
		||||
			eprintln("pattern: $pattern => $re_err")
 | 
			
		||||
		}
 | 
			
		||||
		assert re_err == regex.err_syntax_error
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -283,8 +283,9 @@ pub fn (mut re RE) find_all_str(in_txt string) []string {
 | 
			
		|||
 | 
			
		||||
			if s >= 0 && e > s {
 | 
			
		||||
				tmp_str := tos(in_txt.str + i, in_txt.len - i)
 | 
			
		||||
				mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e }
 | 
			
		||||
				// println("Found: $s:$e [${tmp_str[s..e]}]")
 | 
			
		||||
				res << tmp_str[..e]
 | 
			
		||||
				res << tmp_str[..tmp_e]
 | 
			
		||||
				i += e
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue