regex: bug fixes (#11394)

pull/11403/head
penguindark 2021-09-05 03:48:59 +02:00 committed by GitHub
parent 87934ecf39
commit 724942c4e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 1 deletions

View File

@ -890,6 +890,10 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
return -2, true, name, i return -2, true, name, i
} }
const (
quntifier_chars = [rune(`+`), `*`, `?`, `{`]
)
// //
// main compiler // main compiler
// //
@ -1036,20 +1040,37 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
// Quantifiers // Quantifiers
if char_len == 1 && pc > 0 { if char_len == 1 && pc > 0 {
mut char_next := rune(0)
mut char_next_len := 0
if (char_len + i) < in_txt.len {
char_next, char_next_len = re.get_char(in_txt, i + char_len)
}
mut quant_flag := true mut quant_flag := true
match byte(char_tmp) { match byte(char_tmp) {
`?` { `?` {
// println("q: ${char_tmp:c}") // println("q: ${char_tmp:c}")
// check illegal quantifier sequences
if char_next_len == 1 && char_next in regex.quntifier_chars {
return regex.err_syntax_error, i
}
re.prog[pc - 1].rep_min = 0 re.prog[pc - 1].rep_min = 0
re.prog[pc - 1].rep_max = 1 re.prog[pc - 1].rep_max = 1
} }
`+` { `+` {
// println("q: ${char_tmp:c}") // println("q: ${char_tmp:c}")
// check illegal quantifier sequences
if char_next_len == 1 && char_next in regex.quntifier_chars {
return regex.err_syntax_error, i
}
re.prog[pc - 1].rep_min = 1 re.prog[pc - 1].rep_min = 1
re.prog[pc - 1].rep_max = regex.max_quantifier re.prog[pc - 1].rep_max = regex.max_quantifier
} }
`*` { `*` {
// println("q: ${char_tmp:c}") // println("q: ${char_tmp:c}")
// check illegal quantifier sequences
if char_next_len == 1 && char_next in regex.quntifier_chars {
return regex.err_syntax_error, i
}
re.prog[pc - 1].rep_min = 0 re.prog[pc - 1].rep_min = 0
re.prog[pc - 1].rep_max = regex.max_quantifier re.prog[pc - 1].rep_max = regex.max_quantifier
} }
@ -1062,10 +1083,18 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
re.prog[pc - 1].rep_min = min re.prog[pc - 1].rep_min = min
re.prog[pc - 1].rep_max = max re.prog[pc - 1].rep_max = max
re.prog[pc - 1].greedy = greedy re.prog[pc - 1].greedy = greedy
// check illegal quantifier sequences
if i <= in_txt.len {
char_next, char_next_len = re.get_char(in_txt, i)
if char_next_len == 1 && char_next in regex.quntifier_chars {
return regex.err_syntax_error, i
}
}
continue continue
} else { } else {
return min, i return min, i
} }
// TODO: decide if the open bracket can be conform without the close bracket // TODO: decide if the open bracket can be conform without the close bracket
/* /*
// no conform, parse as normal char // no conform, parse as normal char

View File

@ -340,6 +340,12 @@ find_all_test_suite = [
r"@for.+@endfor", r"@for.+@endfor",
[0, 22, 23, 50, 63, 80, 89, 117], [0, 22, 23, 50, 63, 80, 89, 117],
['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor'] ['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
},
Test_find_all{
"+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++",
r"\+{3}.*\+{3}",
[0, 11, 18, 32, 33, 44],
['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++']
} }
] ]
@ -605,4 +611,26 @@ fn test_regex_func_replace(){
eprintln(txt2) eprintln(txt2)
} }
assert result == txt2 assert result == txt2
}
// test quantifier wrong sequences
const(
test_quantifier_sequences_list = [
r'+{3}.*+{3}',
r'+{3}.*?{3}',
r'+{3}.**{3}',
r'+{3}.*\+{3}*',
r'+{3}.*\+{3}+',
r'+{3}.*\+{3}??',
r'+{3}.*\+{3}{4}'
]
)
fn test_quantifier_sequences(){
for pattern in test_quantifier_sequences_list {
re, re_err, err_pos := regex.regex_base(pattern)
if re_err != regex.err_syntax_error {
eprintln("pattern: $pattern => $re_err")
}
assert re_err == regex.err_syntax_error
}
} }

View File

@ -283,8 +283,9 @@ pub fn (mut re RE) find_all_str(in_txt string) []string {
if s >= 0 && e > s { if s >= 0 && e > s {
tmp_str := tos(in_txt.str + i, in_txt.len - i) tmp_str := tos(in_txt.str + i, in_txt.len - i)
mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e }
// println("Found: $s:$e [${tmp_str[s..e]}]") // println("Found: $s:$e [${tmp_str[s..e]}]")
res << tmp_str[..e] res << tmp_str[..tmp_e]
i += e i += e
continue continue
} }