regex: code cleaning, add more tests (#7402)
* added carsywulf tests to the suite, removed deprecated repeat for init arrays * some clean in the code, fix typo in README.mdpull/7405/head^2
parent
ff2cfd4f38
commit
1e4b3a7b27
|
@ -413,7 +413,7 @@ These functions are helpers to query the captured groups
|
||||||
// get_group_bounds_by_name get a group boundaries by its name
|
// get_group_bounds_by_name get a group boundaries by its name
|
||||||
pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int)
|
pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int)
|
||||||
|
|
||||||
// get_group_by_name get a group boundaries by its name
|
// get_group_by_name get a group string by its name
|
||||||
pub fn (re RE) get_group_by_name(group_name string) string
|
pub fn (re RE) get_group_by_name(group_name string) string
|
||||||
|
|
||||||
// get_group_by_id get a group boundaries by its id
|
// get_group_by_id get a group boundaries by its id
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
|
|
||||||
regex 0.9h
|
regex 1.0 alpha
|
||||||
|
|
||||||
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||||
Use of this source code is governed by an MIT license
|
Use of this source code is governed by an MIT license
|
||||||
|
@ -280,7 +280,6 @@ pub const (
|
||||||
f_bin = 0x00000200 // work only on bytes, ignore utf-8
|
f_bin = 0x00000200 // work only on bytes, ignore utf-8
|
||||||
|
|
||||||
// behaviour modifier flags
|
// behaviour modifier flags
|
||||||
//f_or = 0x00010000 // the OR work with concatenation like PCRE
|
|
||||||
f_src = 0x00020000 // search mode enabled
|
f_src = 0x00020000 // search mode enabled
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -334,7 +333,11 @@ fn (mut re RE) reset(){
|
||||||
re.prog[i].rep = 0 // clear repetition of the token
|
re.prog[i].rep = 0 // clear repetition of the token
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
re.groups = [-1].repeat(re.group_count*2)
|
|
||||||
|
// init groups array
|
||||||
|
if re.group_count > 0 {
|
||||||
|
re.groups = []int{len: re.group_count*2, init: -1}
|
||||||
|
}
|
||||||
|
|
||||||
// reset group_csave
|
// reset group_csave
|
||||||
re.group_csave = []int{}
|
re.group_csave = []int{}
|
||||||
|
@ -723,7 +726,6 @@ fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int, bool) {
|
||||||
// single value {4}
|
// single value {4}
|
||||||
if status == .min_parse && ch == `}` {
|
if status == .min_parse && ch == `}` {
|
||||||
q_max = q_min
|
q_max = q_min
|
||||||
|
|
||||||
status = .greedy
|
status = .greedy
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -731,7 +733,6 @@ fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int, bool) {
|
||||||
// end without max
|
// end without max
|
||||||
if status == .comma_checked && ch == `}` {
|
if status == .comma_checked && ch == `}` {
|
||||||
q_max = max_quantifier
|
q_max = max_quantifier
|
||||||
|
|
||||||
status = .greedy
|
status = .greedy
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -900,8 +901,8 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
|
|
||||||
// group management variables
|
// group management variables
|
||||||
mut group_count := -1
|
mut group_count := -1
|
||||||
mut group_stack := [0 ].repeat(re.group_max_nested)
|
mut group_stack := []int{len: re.group_max_nested, init: 0}
|
||||||
mut group_stack_txt_index := [-1].repeat(re.group_max_nested)
|
mut group_stack_txt_index := []int{len: re.group_max_nested, init: -1}
|
||||||
mut group_stack_index := -1
|
mut group_stack_index := -1
|
||||||
|
|
||||||
re.query = in_txt // save the query string
|
re.query = in_txt // save the query string
|
||||||
|
@ -987,7 +988,6 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
|
|
||||||
pc = pc + 1
|
pc = pc + 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ist_group_end
|
// ist_group_end
|
||||||
|
@ -1566,8 +1566,6 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
|
|
||||||
mut state_list := []StateObj{}
|
mut state_list := []StateObj{}
|
||||||
|
|
||||||
//mut group_stack := [-1].repeat(re.group_max)
|
|
||||||
//mut group_data := [-1].repeat(re.group_max)
|
|
||||||
mut group_stack := []int{len: re.group_max, init: -1}
|
mut group_stack := []int{len: re.group_max, init: -1}
|
||||||
mut group_data := []int{len: re.group_max, init: -1}
|
mut group_data := []int{len: re.group_max, init: -1}
|
||||||
|
|
||||||
|
@ -1677,7 +1675,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//******************************************
|
//******************************************
|
||||||
|
|
||||||
if ist == ist_prog_end {
|
if ist == ist_prog_end {
|
||||||
//println("HERE")
|
//println("HERE we end!")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1719,9 +1717,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
|
|
||||||
// continuous save, save until we have space
|
// continuous save, save until we have space
|
||||||
re.group_continuous_save(g_index)
|
re.group_continuous_save(g_index)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
state.group_index--
|
state.group_index--
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1968,7 +1964,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//println("Check [ist_simple_char] [${re.prog[chk_pc].ch}]==[${ch_t:c}] => $next_check_flag")
|
//println("Check [ist_simple_char] [${re.prog[chk_pc].ch}]==[${ch_t:c}] => $next_check_flag")
|
||||||
}
|
}
|
||||||
|
|
||||||
// char class IST
|
// char char_class
|
||||||
else if re.prog[chk_pc].ist == ist_char_class_pos || re.prog[chk_pc].ist == ist_char_class_neg {
|
else if re.prog[chk_pc].ist == ist_char_class_pos || re.prog[chk_pc].ist == ist_char_class_neg {
|
||||||
mut cc_neg := false
|
mut cc_neg := false
|
||||||
if re.prog[chk_pc].ist == ist_char_class_neg {
|
if re.prog[chk_pc].ist == ist_char_class_neg {
|
||||||
|
@ -1993,7 +1989,6 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
|
|
||||||
// check if we must continue or pass to the next IST
|
// check if we must continue or pass to the next IST
|
||||||
if next_check_flag == true {
|
if next_check_flag == true {
|
||||||
// if re.prog[state.pc].rep >= re.prog[state.pc].rep_max {
|
|
||||||
//println("save the state!!")
|
//println("save the state!!")
|
||||||
state_list << StateObj {
|
state_list << StateObj {
|
||||||
group_index: state.group_index
|
group_index: state.group_index
|
||||||
|
@ -2382,8 +2377,8 @@ Public functions
|
||||||
[deprecated]
|
[deprecated]
|
||||||
pub fn regex(in_query string) (RE,int,int){
|
pub fn regex(in_query string) (RE,int,int){
|
||||||
mut re := RE{}
|
mut re := RE{}
|
||||||
re.prog = [Token{}].repeat(in_query.len+1)
|
re.prog = []Token {len: in_query.len+1}
|
||||||
re.cc = [CharClass{}].repeat(in_query.len+1)
|
re.cc = []CharClass{len: in_query.len+1}
|
||||||
re.group_max_nested = 8
|
re.group_max_nested = 8
|
||||||
|
|
||||||
re_err,err_pos := re.compile(in_query)
|
re_err,err_pos := re.compile(in_query)
|
||||||
|
@ -2403,8 +2398,8 @@ pub fn new_regex_by_size(mult int) RE {
|
||||||
}
|
}
|
||||||
fn impl_new_regex_by_size(mult int) RE {
|
fn impl_new_regex_by_size(mult int) RE {
|
||||||
mut re := RE{}
|
mut re := RE{}
|
||||||
re.prog = [Token{}].repeat(max_code_len*mult) // max program length, default 256 istructions
|
re.prog = []Token {len: max_code_len*mult} // max program length, default 256 istructions
|
||||||
re.cc = [CharClass{}].repeat(max_code_len*mult) // char class list
|
re.cc = []CharClass{len: max_code_len*mult} // char class list
|
||||||
re.group_max_nested = 3*mult // max nested group
|
re.group_max_nested = 3*mult // max nested group
|
||||||
|
|
||||||
return re
|
return re
|
||||||
|
|
|
@ -128,6 +128,16 @@ match_test_suite = [
|
||||||
TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
|
TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
|
||||||
TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
|
TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
|
||||||
|
|
||||||
|
// Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()])
|
||||||
|
TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},
|
||||||
|
TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0},
|
||||||
|
TestItem{"*1*1", r"(?:^|[*])(\d+)([*])(\d+)",0,4},
|
||||||
|
TestItem{"*1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
|
||||||
|
TestItem{")1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
|
||||||
|
TestItem{"(1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
|
||||||
|
TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5},
|
||||||
|
TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
||||||
|
TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue