regex: bug fixes (#11394)
parent
87934ecf39
commit
724942c4e6
|
@ -890,6 +890,10 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
|
|||
return -2, true, name, i
|
||||
}
|
||||
|
||||
const (
|
||||
quntifier_chars = [rune(`+`), `*`, `?`, `{`]
|
||||
)
|
||||
|
||||
//
|
||||
// main compiler
|
||||
//
|
||||
|
@ -1036,20 +1040,37 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
|
|||
|
||||
// Quantifiers
|
||||
if char_len == 1 && pc > 0 {
|
||||
mut char_next := rune(0)
|
||||
mut char_next_len := 0
|
||||
if (char_len + i) < in_txt.len {
|
||||
char_next, char_next_len = re.get_char(in_txt, i + char_len)
|
||||
}
|
||||
mut quant_flag := true
|
||||
match byte(char_tmp) {
|
||||
`?` {
|
||||
// println("q: ${char_tmp:c}")
|
||||
// check illegal quantifier sequences
|
||||
if char_next_len == 1 && char_next in regex.quntifier_chars {
|
||||
return regex.err_syntax_error, i
|
||||
}
|
||||
re.prog[pc - 1].rep_min = 0
|
||||
re.prog[pc - 1].rep_max = 1
|
||||
}
|
||||
`+` {
|
||||
// println("q: ${char_tmp:c}")
|
||||
// check illegal quantifier sequences
|
||||
if char_next_len == 1 && char_next in regex.quntifier_chars {
|
||||
return regex.err_syntax_error, i
|
||||
}
|
||||
re.prog[pc - 1].rep_min = 1
|
||||
re.prog[pc - 1].rep_max = regex.max_quantifier
|
||||
}
|
||||
`*` {
|
||||
// println("q: ${char_tmp:c}")
|
||||
// check illegal quantifier sequences
|
||||
if char_next_len == 1 && char_next in regex.quntifier_chars {
|
||||
return regex.err_syntax_error, i
|
||||
}
|
||||
re.prog[pc - 1].rep_min = 0
|
||||
re.prog[pc - 1].rep_max = regex.max_quantifier
|
||||
}
|
||||
|
@ -1062,10 +1083,18 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
|
|||
re.prog[pc - 1].rep_min = min
|
||||
re.prog[pc - 1].rep_max = max
|
||||
re.prog[pc - 1].greedy = greedy
|
||||
// check illegal quantifier sequences
|
||||
if i <= in_txt.len {
|
||||
char_next, char_next_len = re.get_char(in_txt, i)
|
||||
if char_next_len == 1 && char_next in regex.quntifier_chars {
|
||||
return regex.err_syntax_error, i
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
return min, i
|
||||
}
|
||||
|
||||
// TODO: decide if the open bracket can be conform without the close bracket
|
||||
/*
|
||||
// no conform, parse as normal char
|
||||
|
|
|
@ -340,6 +340,12 @@ find_all_test_suite = [
|
|||
r"@for.+@endfor",
|
||||
[0, 22, 23, 50, 63, 80, 89, 117],
|
||||
['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
|
||||
},
|
||||
Test_find_all{
|
||||
"+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++",
|
||||
r"\+{3}.*\+{3}",
|
||||
[0, 11, 18, 32, 33, 44],
|
||||
['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++']
|
||||
}
|
||||
|
||||
]
|
||||
|
@ -606,3 +612,25 @@ fn test_regex_func_replace(){
|
|||
}
|
||||
assert result == txt2
|
||||
}
|
||||
|
||||
// test quantifier wrong sequences
|
||||
const(
|
||||
test_quantifier_sequences_list = [
|
||||
r'+{3}.*+{3}',
|
||||
r'+{3}.*?{3}',
|
||||
r'+{3}.**{3}',
|
||||
r'+{3}.*\+{3}*',
|
||||
r'+{3}.*\+{3}+',
|
||||
r'+{3}.*\+{3}??',
|
||||
r'+{3}.*\+{3}{4}'
|
||||
]
|
||||
)
|
||||
fn test_quantifier_sequences(){
|
||||
for pattern in test_quantifier_sequences_list {
|
||||
re, re_err, err_pos := regex.regex_base(pattern)
|
||||
if re_err != regex.err_syntax_error {
|
||||
eprintln("pattern: $pattern => $re_err")
|
||||
}
|
||||
assert re_err == regex.err_syntax_error
|
||||
}
|
||||
}
|
|
@ -283,8 +283,9 @@ pub fn (mut re RE) find_all_str(in_txt string) []string {
|
|||
|
||||
if s >= 0 && e > s {
|
||||
tmp_str := tos(in_txt.str + i, in_txt.len - i)
|
||||
mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e }
|
||||
// println("Found: $s:$e [${tmp_str[s..e]}]")
|
||||
res << tmp_str[..e]
|
||||
res << tmp_str[..tmp_e]
|
||||
i += e
|
||||
continue
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue