v/vlib/regex/regex_util.v

127 lines
3.4 KiB
V

/*
regex 1.0 alpha
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license
that can be found in the LICENSE file.
*/
module regex
/******************************************************************************
*
* Inits
*
******************************************************************************/
// regex create a regex object from the query string
[deprecated]
pub fn regex(in_query string) (RE,int,int){
mut re := RE{}
re.prog = []Token {len: in_query.len+1}
re.cc = []CharClass{len: in_query.len+1}
re.group_max_nested = 8
re_err,err_pos := re.compile(in_query)
return re, re_err, err_pos
}
// new_regex create a RE of small size, usually sufficient for ordinary use
[deprecated]
pub fn new_regex() RE {
return impl_new_regex_by_size(1)
}
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
[deprecated]
pub fn new_regex_by_size(mult int) RE {
return impl_new_regex_by_size(mult)
}
fn impl_new_regex_by_size(mult int) RE {
mut re := RE{}
re.prog = []Token {len: max_code_len*mult} // max program length, default 256 istructions
re.cc = []CharClass{len: max_code_len*mult} // char class list
re.group_max_nested = 3*mult // max nested group
return re
}
/******************************************************************************
*
* Utilities
*
******************************************************************************/
// get_group_bounds_by_name get a group boundaries by its name
pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {
if group_name in re.group_map {
tmp_index := re.group_map[group_name]-1
start := re.groups[tmp_index * 2]
end := re.groups[tmp_index * 2 + 1]
return start,end
}
return -1, -1
}
// get_group_by_name get a group boundaries by its name
pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
if group_name in re.group_map {
tmp_index := re.group_map[group_name]-1
start := re.groups[tmp_index * 2]
end := re.groups[tmp_index * 2 + 1]
return in_txt[start..end]
}
return ""
}
// get_group_by_id get a group string by its id
pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
if group_id < (re.groups.len >> 1) {
index := group_id << 1
start := re.groups[index]
end := re.groups[index + 1]
return in_txt[start..end]
}
return ""
}
// get_group_by_id get a group boundaries by its id
pub fn (re RE) get_group_bounds_by_id(group_id int) (int,int) {
if group_id < (re.groups.len >> 1) {
index := group_id << 1
return re.groups[index], re.groups[index + 1]
}
return -1, -1
}
pub
struct Re_group {
pub:
start int = -1
end int = -1
}
// get_group_list return a list of Re_group for the found groups
pub fn (re RE) get_group_list() []Re_group {
mut res := []Re_group{len: re.groups.len >> 1}
mut gi := 0
//println("len: ${re.groups.len} groups: ${re.groups}")
for gi < re.groups.len {
if re.groups[gi] >= 0 {
txt_st := re.groups[gi]
txt_en := re.groups[gi+1]
//println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
if txt_st >= 0 && txt_en > txt_st {
tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}
//println(tmp)
res[gi >> 1] = tmp
} else {
res[gi >> 1] = Re_group{}
}
}
gi += 2
}
return res
}