regex: added default support for long queries (#12635)
parent
f86710dcc7
commit
519ca90cfa
|
@ -40,7 +40,7 @@ pub fn regex_opt(pattern string) ?RE {
|
||||||
re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
|
re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
|
||||||
re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
|
re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
|
||||||
re.group_csave_flag = false // enable continuos group saving
|
re.group_csave_flag = false // enable continuos group saving
|
||||||
re.group_max_nested = 128 // set max 128 group nested
|
re.group_max_nested = pattern.len >> 1 // set max 128 group nested
|
||||||
re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
|
re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
|
||||||
|
|
||||||
re.group_stack = []int{len: re.group_max, init: -1}
|
re.group_stack = []int{len: re.group_max, init: -1}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import regex
|
import regex
|
||||||
import rand
|
import rand
|
||||||
|
import strings
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
*
|
*
|
||||||
|
@ -720,3 +721,42 @@ fn test_errors(){
|
||||||
}
|
}
|
||||||
assert count == err_query_list.len
|
assert count == err_query_list.len
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn test_long_query() {
|
||||||
|
test_len := 32768
|
||||||
|
mut buf := strings.new_builder(test_len * 3)
|
||||||
|
base_string := rand.string(test_len)
|
||||||
|
|
||||||
|
for c in base_string {
|
||||||
|
buf.write_b(`(`)
|
||||||
|
buf.write_b(c)
|
||||||
|
buf.write_b(`)`)
|
||||||
|
}
|
||||||
|
|
||||||
|
mut query := buf.str()
|
||||||
|
|
||||||
|
//println(base_string)
|
||||||
|
//println(buf.str())
|
||||||
|
|
||||||
|
// test 1
|
||||||
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
mut start, mut end := re.match_string(base_string)
|
||||||
|
//println("$start, $end")
|
||||||
|
assert start >= 0 && end == base_string.len
|
||||||
|
|
||||||
|
// test 2
|
||||||
|
buf.clear()
|
||||||
|
for c in base_string {
|
||||||
|
buf.write_b(`(`)
|
||||||
|
buf.write_b(c)
|
||||||
|
}
|
||||||
|
for _ in 0..base_string.len {
|
||||||
|
buf.write_b(`)`)
|
||||||
|
}
|
||||||
|
query = buf.str()
|
||||||
|
re = regex.regex_opt(query) or { panic(err) }
|
||||||
|
start, end = re.match_string(base_string)
|
||||||
|
//println("$start, $end")
|
||||||
|
assert start >= 0 && end == base_string.len
|
||||||
|
}
|
||||||
|
|
|
@ -21,7 +21,7 @@ pub fn regex_base(pattern string) (RE, int, int) {
|
||||||
re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
|
re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
|
||||||
re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
|
re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
|
||||||
re.group_csave_flag = false // enable continuos group saving
|
re.group_csave_flag = false // enable continuos group saving
|
||||||
re.group_max_nested = 128 // set max 128 group nested
|
re.group_max_nested = pattern.len >> 1 // set max 128 group nested
|
||||||
re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
|
re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
|
||||||
|
|
||||||
re.group_stack = []int{len: re.group_max, init: -1}
|
re.group_stack = []int{len: re.group_max, init: -1}
|
||||||
|
@ -63,7 +63,7 @@ pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
|
||||||
// get_group_by_id get a group string by its id
|
// get_group_by_id get a group string by its id
|
||||||
pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
|
pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
|
||||||
if group_id < (re.groups.len >> 1) {
|
if group_id < (re.groups.len >> 1) {
|
||||||
index := group_id << 1
|
index := group_id * 2
|
||||||
start := re.groups[index]
|
start := re.groups[index]
|
||||||
end := re.groups[index + 1]
|
end := re.groups[index + 1]
|
||||||
if start >= 0 && end > start {
|
if start >= 0 && end > start {
|
||||||
|
@ -76,7 +76,7 @@ pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
|
||||||
// get_group_by_id get a group boundaries by its id
|
// get_group_by_id get a group boundaries by its id
|
||||||
pub fn (re RE) get_group_bounds_by_id(group_id int) (int, int) {
|
pub fn (re RE) get_group_bounds_by_id(group_id int) (int, int) {
|
||||||
if group_id < re.group_count {
|
if group_id < re.group_count {
|
||||||
index := group_id << 1
|
index := group_id * 2
|
||||||
return re.groups[index], re.groups[index + 1]
|
return re.groups[index], re.groups[index + 1]
|
||||||
}
|
}
|
||||||
return -1, -1
|
return -1, -1
|
||||||
|
@ -366,8 +366,8 @@ pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
for g_i in 0 .. re.group_count {
|
for g_i in 0 .. re.group_count {
|
||||||
re.groups[g_i << 1] += i
|
re.groups[g_i * 2] += i
|
||||||
re.groups[(g_i << 1) + 1] += i
|
re.groups[(g_i * 2) + 1] += i
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
repl := repl_fn(re, in_txt, s, e)
|
repl := repl_fn(re, in_txt, s, e)
|
||||||
|
@ -428,8 +428,8 @@ pub fn (mut re RE) replace(in_txt string, repl_str string) string {
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
for g_i in 0 .. re.group_count {
|
for g_i in 0 .. re.group_count {
|
||||||
re.groups[g_i << 1] += i
|
re.groups[g_i * 2] += i
|
||||||
re.groups[(g_i << 1) + 1] += i
|
re.groups[(g_i * 2) + 1] += i
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
// repl := repl_fn(re, in_txt, s, e)
|
// repl := repl_fn(re, in_txt, s, e)
|
||||||
|
|
Loading…
Reference in New Issue