regex: refactoring, documentation, examples (#7418)
parent
8278af4ee8
commit
b29bcb3fbe
|
@ -471,10 +471,23 @@ pub fn regex_opt(in_query string) ?RE
|
||||||
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
||||||
pub fn new() RE
|
pub fn new() RE
|
||||||
|
|
||||||
// new_regex_by_size create a REgex of large size, mult specify the scale factor of the memory that will be allocated
|
|
||||||
pub fn new_by_size(mult int) RE
|
|
||||||
```
|
```
|
||||||
After a base initializer is used, the regex expression must be compiled with:
|
#### **Custom initialization**
|
||||||
|
For some particular need it is possible initialize a fully customized regex:
|
||||||
|
```v ignore
|
||||||
|
// init custom regex
|
||||||
|
mut re := regex.RE{}
|
||||||
|
re.prog = []Token {len: pattern.len + 1} // max program length, can not be longer then the pattern
|
||||||
|
re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
|
||||||
|
|
||||||
|
re.group_csave_flag = false // true enable continuos group saving if needed
|
||||||
|
re.group_max_nested = 128 // set max 128 group nested possible
|
||||||
|
re.group_max = pattern.len>>1 // we can't have more groups than the half of the pattern legth
|
||||||
|
```
|
||||||
|
### Compiling
|
||||||
|
|
||||||
|
After an initializer is used, the regex expression must be compiled with:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// compile compiles the REgex returning an error if the compilation fails
|
// compile compiles the REgex returning an error if the compilation fails
|
||||||
pub fn (re mut RE) compile_opt(in_txt string) ?
|
pub fn (re mut RE) compile_opt(in_txt string) ?
|
||||||
|
@ -500,11 +513,38 @@ pub fn (re mut RE) replace(in_txt string, repl string) string
|
||||||
|
|
||||||
## Find and Replace
|
## Find and Replace
|
||||||
|
|
||||||
|
There are the following find and replace functions:
|
||||||
|
|
||||||
|
#### Find functions
|
||||||
|
|
||||||
|
```v ignore
|
||||||
|
// find try to find the first match in the input string, return start and end index if found else start is -1
|
||||||
|
pub fn (re mut RE) find(in_txt string) (int,int)
|
||||||
|
|
||||||
|
// find_all find all the "non overlapping" occurrences of the matching pattern
|
||||||
|
// return a list of start end indexes like: [3,4,6,8]
|
||||||
|
// the matches are [3,4] and [6,8]
|
||||||
|
pub fn (re mut RE) find_all(in_txt string) []int
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Replace functions
|
||||||
|
|
||||||
|
```v ignore
|
||||||
|
// replace return a string where the matches are replaced with the replace string, only non overlapped matches are used
|
||||||
|
pub fn (re mut RE) replace(in_txt string, repl string) string
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Custom replace function
|
||||||
|
|
||||||
For complex find and replace operations it is available the function `replace_by_fn` .
|
For complex find and replace operations it is available the function `replace_by_fn` .
|
||||||
The`replace_by_fn` use a custom replace function making possible customizations.
|
The`replace_by_fn` use a custom replace function making possible customizations.
|
||||||
**The custom function is called for every non overlapped find.**
|
**The custom function is called for every non overlapped find.**
|
||||||
The custom function must be of the type:
|
The custom function must be of the type:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
|
// re RE struct
|
||||||
|
// in_txt all the text passed to the regex expression
|
||||||
|
// the match is: in_txt[start..end]
|
||||||
fn (re RE, in_txt string, start int, end int) string
|
fn (re RE, in_txt string, start int, end int) string
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -671,7 +711,7 @@ re.log_func = custom_print
|
||||||
|
|
||||||
## Example code
|
## Example code
|
||||||
|
|
||||||
Here there is a simple code to perform some basically match of strings
|
Here an example that perform some basically match of strings
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
import regex
|
import regex
|
||||||
|
@ -698,5 +738,63 @@ fn main(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
Here an example of total customization of the regex environment creation:
|
||||||
|
```v ignore
|
||||||
|
import regex
|
||||||
|
|
||||||
more example code is available in the test code for the `regex` module `vlib\regex\regex_test.v`.
|
fn main(){
|
||||||
|
txt := "today John is gone to his house with Jack and Marie."
|
||||||
|
query := r"(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+"
|
||||||
|
|
||||||
|
// init regex
|
||||||
|
mut re := regex.RE{}
|
||||||
|
re.prog = []regex.Token {len: query.len + 1} // max program length, can not be longer then the query
|
||||||
|
re.cc = []regex.CharClass{len: query.len} // can not be more char class the the length of the query
|
||||||
|
re.prog = []regex.Token {len: query.len+1}
|
||||||
|
re.group_csave_flag = true // enable continuos group saving
|
||||||
|
re.group_max_nested = 128 // set max 128 group nested
|
||||||
|
re.group_max = query.len>>1 // we can't have more groups than the half of the query legth
|
||||||
|
|
||||||
|
// compile the query
|
||||||
|
re.compile_opt(query) or { panic(err) }
|
||||||
|
|
||||||
|
start, end := re.match_string(txt)
|
||||||
|
if start >= 0 {
|
||||||
|
println("Match ($start, $end) => [${txt[start..end]}]")
|
||||||
|
} else {
|
||||||
|
println("No Match")
|
||||||
|
}
|
||||||
|
|
||||||
|
// show results for continuos group saving
|
||||||
|
if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0{
|
||||||
|
println("cg: $re.group_csave")
|
||||||
|
mut cs_i := 1
|
||||||
|
for cs_i < re.group_csave[0]*3 {
|
||||||
|
g_id := re.group_csave[cs_i]
|
||||||
|
st := re.group_csave[cs_i+1]
|
||||||
|
en := re.group_csave[cs_i+2]
|
||||||
|
println("cg[$g_id] $st $en:[${txt[st..en]}]")
|
||||||
|
cs_i += 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// show results for captured groups
|
||||||
|
if start >= 0 {
|
||||||
|
println("Match ($start, $end) => [${txt[start..end]}]")
|
||||||
|
for g_index := 0; g_index < re.group_count ; g_index++ {
|
||||||
|
println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
|
||||||
|
bounds: ${re.get_group_bounds_by_id(g_index)}")
|
||||||
|
}
|
||||||
|
for name in re.group_map.keys() {
|
||||||
|
println("group:'$name' \t=> [${re.get_group_by_name(txt, name)}] \
|
||||||
|
bounds: ${re.get_group_bounds_by_name(name)}")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println("No Match")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
more example code is available in the test code for the `regex` module `vlib\regex\regex_test.v`.
|
|
@ -266,11 +266,11 @@ fn (mut tok Token) reset() {
|
||||||
tok.rep = 0
|
tok.rep = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/******************************************************************************
|
||||||
|
*
|
||||||
Regex struct
|
* Regex struct
|
||||||
|
*
|
||||||
*/
|
******************************************************************************/
|
||||||
pub const (
|
pub const (
|
||||||
f_nl = 0x00000001 // end the match when find a new line symbol
|
f_nl = 0x00000001 // end the match when find a new line symbol
|
||||||
f_ms = 0x00000002 // match true only if the match is at the start of the string
|
f_ms = 0x00000002 // match true only if the match is at the start of the string
|
||||||
|
@ -354,11 +354,11 @@ fn (mut re RE) reset_src(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/******************************************************************************
|
||||||
|
*
|
||||||
Backslashes chars
|
* Backslashes chars
|
||||||
|
*
|
||||||
*/
|
******************************************************************************/
|
||||||
struct BslsStruct {
|
struct BslsStruct {
|
||||||
ch rune // meta char
|
ch rune // meta char
|
||||||
validator FnValidator // validator function pointer
|
validator FnValidator // validator function pointer
|
||||||
|
@ -430,11 +430,11 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
|
||||||
return err_syntax_error, i
|
return err_syntax_error, i
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/******************************************************************************
|
||||||
|
*
|
||||||
Char class
|
* Char class
|
||||||
|
*
|
||||||
*/
|
******************************************************************************/
|
||||||
const(
|
const(
|
||||||
cc_null = 0 // empty cc token
|
cc_null = 0 // empty cc token
|
||||||
cc_char = 1 // simple char: a
|
cc_char = 1 // simple char: a
|
||||||
|
@ -653,11 +653,11 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
|
||||||
return err_syntax_error,0,u32(0)
|
return err_syntax_error,0,u32(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/******************************************************************************
|
||||||
|
*
|
||||||
Re Compiler
|
* Re Compiler
|
||||||
|
*
|
||||||
*/
|
******************************************************************************/
|
||||||
//
|
//
|
||||||
// Quantifier
|
// Quantifier
|
||||||
//
|
//
|
||||||
|
@ -1462,11 +1462,11 @@ pub fn (re RE) get_query() string {
|
||||||
return res.str()
|
return res.str()
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/******************************************************************************
|
||||||
|
*
|
||||||
Groups saving utilities
|
* Groups saving utilities
|
||||||
|
*
|
||||||
*/
|
******************************************************************************/
|
||||||
[inline]
|
[inline]
|
||||||
fn (mut re RE) group_continuous_save(g_index int) {
|
fn (mut re RE) group_continuous_save(g_index int) {
|
||||||
if re.group_csave_flag == true {
|
if re.group_csave_flag == true {
|
||||||
|
@ -1500,12 +1500,12 @@ fn (mut re RE) group_continuous_save(g_index int) {
|
||||||
re.group_csave << re.groups[g_index+1] // end
|
re.group_csave << re.groups[g_index+1] // end
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
Matching
|
/******************************************************************************
|
||||||
|
*
|
||||||
*/
|
* Matching
|
||||||
|
*
|
||||||
|
******************************************************************************/
|
||||||
enum Match_state{
|
enum Match_state{
|
||||||
start = 0
|
start = 0
|
||||||
stop
|
stop
|
||||||
|
@ -2001,6 +2001,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
last_dot_pc: state.pc
|
last_dot_pc: state.pc
|
||||||
}
|
}
|
||||||
m_state = .ist_quant_n
|
m_state = .ist_quant_n
|
||||||
|
//println("dot_char stack len: $state_list.len")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2363,47 +2364,11 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
return no_match_found, 0
|
return no_match_found, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/******************************************************************************
|
||||||
|
*
|
||||||
Public functions
|
* Public functions
|
||||||
|
*
|
||||||
*/
|
******************************************************************************/
|
||||||
|
|
||||||
//
|
|
||||||
// Inits
|
|
||||||
//
|
|
||||||
|
|
||||||
// regex create a regex object from the query string
|
|
||||||
[deprecated]
|
|
||||||
pub fn regex(in_query string) (RE,int,int){
|
|
||||||
mut re := RE{}
|
|
||||||
re.prog = []Token {len: in_query.len+1}
|
|
||||||
re.cc = []CharClass{len: in_query.len+1}
|
|
||||||
re.group_max_nested = 8
|
|
||||||
|
|
||||||
re_err,err_pos := re.compile(in_query)
|
|
||||||
return re, re_err, err_pos
|
|
||||||
}
|
|
||||||
|
|
||||||
// new_regex create a RE of small size, usually sufficient for ordinary use
|
|
||||||
[deprecated]
|
|
||||||
pub fn new_regex() RE {
|
|
||||||
return impl_new_regex_by_size(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
|
||||||
[deprecated]
|
|
||||||
pub fn new_regex_by_size(mult int) RE {
|
|
||||||
return impl_new_regex_by_size(mult)
|
|
||||||
}
|
|
||||||
fn impl_new_regex_by_size(mult int) RE {
|
|
||||||
mut re := RE{}
|
|
||||||
re.prog = []Token {len: max_code_len*mult} // max program length, default 256 istructions
|
|
||||||
re.cc = []CharClass{len: max_code_len*mult} // char class list
|
|
||||||
re.group_max_nested = 3*mult // max nested group
|
|
||||||
|
|
||||||
return re
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Matchers
|
// Matchers
|
||||||
|
@ -2538,82 +2503,3 @@ pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
Utilities
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
// get_group_bounds_by_name get a group boundaries by its name
|
|
||||||
pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {
|
|
||||||
if group_name in re.group_map {
|
|
||||||
tmp_index := re.group_map[group_name]-1
|
|
||||||
start := re.groups[tmp_index * 2]
|
|
||||||
end := re.groups[tmp_index * 2 + 1]
|
|
||||||
return start,end
|
|
||||||
}
|
|
||||||
return -1, -1
|
|
||||||
}
|
|
||||||
|
|
||||||
// get_group_by_name get a group boundaries by its name
|
|
||||||
pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
|
|
||||||
if group_name in re.group_map {
|
|
||||||
tmp_index := re.group_map[group_name]-1
|
|
||||||
start := re.groups[tmp_index * 2]
|
|
||||||
end := re.groups[tmp_index * 2 + 1]
|
|
||||||
return in_txt[start..end]
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// get_group_by_id get a group string by its id
|
|
||||||
pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
|
|
||||||
if group_id < (re.groups.len >> 1) {
|
|
||||||
index := group_id << 1
|
|
||||||
start := re.groups[index]
|
|
||||||
end := re.groups[index + 1]
|
|
||||||
return in_txt[start..end]
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// get_group_by_id get a group boundaries by its id
|
|
||||||
pub fn (re RE) get_group_bounds_by_id(group_id int) (int,int) {
|
|
||||||
if group_id < (re.groups.len >> 1) {
|
|
||||||
index := group_id << 1
|
|
||||||
return re.groups[index], re.groups[index]
|
|
||||||
}
|
|
||||||
return -1, -1
|
|
||||||
}
|
|
||||||
|
|
||||||
pub
|
|
||||||
struct Re_group {
|
|
||||||
pub:
|
|
||||||
start int = -1
|
|
||||||
end int = -1
|
|
||||||
}
|
|
||||||
|
|
||||||
// get_group_list return a list of Re_group for the found groups
|
|
||||||
pub fn (re RE) get_group_list() []Re_group {
|
|
||||||
mut res := []Re_group{len: re.groups.len >> 1}
|
|
||||||
mut gi := 0
|
|
||||||
//println("len: ${re.groups.len} groups: ${re.groups}")
|
|
||||||
for gi < re.groups.len {
|
|
||||||
if re.groups[gi] >= 0 {
|
|
||||||
txt_st := re.groups[gi]
|
|
||||||
txt_en := re.groups[gi+1]
|
|
||||||
|
|
||||||
//println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
|
|
||||||
if txt_st >= 0 && txt_en > txt_st {
|
|
||||||
tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}
|
|
||||||
//println(tmp)
|
|
||||||
res[gi >> 1] = tmp
|
|
||||||
} else {
|
|
||||||
res[gi >> 1] = Re_group{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
gi += 2
|
|
||||||
}
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -17,18 +17,29 @@ pub fn (mut re RE) compile_opt(pattern string) ? {
|
||||||
}
|
}
|
||||||
|
|
||||||
// new_regex create a RE of small size, usually sufficient for ordinary use
|
// new_regex create a RE of small size, usually sufficient for ordinary use
|
||||||
|
[deprecated]
|
||||||
pub fn new() RE {
|
pub fn new() RE {
|
||||||
return impl_new_regex_by_size(1)
|
return impl_new_regex_by_size(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
||||||
|
[deprecated]
|
||||||
pub fn new_by_size(mult int) RE {
|
pub fn new_by_size(mult int) RE {
|
||||||
return impl_new_regex_by_size(mult)
|
return impl_new_regex_by_size(mult)
|
||||||
}
|
}
|
||||||
|
|
||||||
// regex_opt create new RE object from RE pattern string
|
// regex_opt create new RE object from RE pattern string
|
||||||
pub fn regex_opt(pattern string) ?RE {
|
pub fn regex_opt(pattern string) ?RE {
|
||||||
mut re := new()
|
// init regex
|
||||||
re.compile_opt(pattern)?
|
mut re := regex.RE{}
|
||||||
return re
|
re.prog = []Token {len: pattern.len + 1} // max program length, can not be longer then the pattern
|
||||||
|
re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
|
||||||
|
re.group_csave_flag = false // enable continuos group saving
|
||||||
|
re.group_max_nested = 128 // set max 128 group nested
|
||||||
|
re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
|
||||||
|
|
||||||
|
// compile the pattern
|
||||||
|
re.compile_opt(pattern)?
|
||||||
|
|
||||||
|
return re
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,126 @@
|
||||||
|
/*
|
||||||
|
|
||||||
|
regex 1.0 alpha
|
||||||
|
|
||||||
|
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||||
|
Use of this source code is governed by an MIT license
|
||||||
|
that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
*/
|
||||||
|
module regex
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
*
|
||||||
|
* Inits
|
||||||
|
*
|
||||||
|
******************************************************************************/
|
||||||
|
// regex create a regex object from the query string
|
||||||
|
[deprecated]
|
||||||
|
pub fn regex(in_query string) (RE,int,int){
|
||||||
|
mut re := RE{}
|
||||||
|
re.prog = []Token {len: in_query.len+1}
|
||||||
|
re.cc = []CharClass{len: in_query.len+1}
|
||||||
|
re.group_max_nested = 8
|
||||||
|
|
||||||
|
re_err,err_pos := re.compile(in_query)
|
||||||
|
return re, re_err, err_pos
|
||||||
|
}
|
||||||
|
|
||||||
|
// new_regex create a RE of small size, usually sufficient for ordinary use
|
||||||
|
[deprecated]
|
||||||
|
pub fn new_regex() RE {
|
||||||
|
return impl_new_regex_by_size(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
||||||
|
[deprecated]
|
||||||
|
pub fn new_regex_by_size(mult int) RE {
|
||||||
|
return impl_new_regex_by_size(mult)
|
||||||
|
}
|
||||||
|
fn impl_new_regex_by_size(mult int) RE {
|
||||||
|
mut re := RE{}
|
||||||
|
re.prog = []Token {len: max_code_len*mult} // max program length, default 256 istructions
|
||||||
|
re.cc = []CharClass{len: max_code_len*mult} // char class list
|
||||||
|
re.group_max_nested = 3*mult // max nested group
|
||||||
|
|
||||||
|
return re
|
||||||
|
}
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
*
|
||||||
|
* Utilities
|
||||||
|
*
|
||||||
|
******************************************************************************/
|
||||||
|
// get_group_bounds_by_name get a group boundaries by its name
|
||||||
|
pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {
|
||||||
|
if group_name in re.group_map {
|
||||||
|
tmp_index := re.group_map[group_name]-1
|
||||||
|
start := re.groups[tmp_index * 2]
|
||||||
|
end := re.groups[tmp_index * 2 + 1]
|
||||||
|
return start,end
|
||||||
|
}
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// get_group_by_name get a group boundaries by its name
|
||||||
|
pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
|
||||||
|
if group_name in re.group_map {
|
||||||
|
tmp_index := re.group_map[group_name]-1
|
||||||
|
start := re.groups[tmp_index * 2]
|
||||||
|
end := re.groups[tmp_index * 2 + 1]
|
||||||
|
return in_txt[start..end]
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// get_group_by_id get a group string by its id
|
||||||
|
pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
|
||||||
|
if group_id < (re.groups.len >> 1) {
|
||||||
|
index := group_id << 1
|
||||||
|
start := re.groups[index]
|
||||||
|
end := re.groups[index + 1]
|
||||||
|
return in_txt[start..end]
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// get_group_by_id get a group boundaries by its id
|
||||||
|
pub fn (re RE) get_group_bounds_by_id(group_id int) (int,int) {
|
||||||
|
if group_id < (re.groups.len >> 1) {
|
||||||
|
index := group_id << 1
|
||||||
|
return re.groups[index], re.groups[index + 1]
|
||||||
|
}
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
pub
|
||||||
|
struct Re_group {
|
||||||
|
pub:
|
||||||
|
start int = -1
|
||||||
|
end int = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// get_group_list return a list of Re_group for the found groups
|
||||||
|
pub fn (re RE) get_group_list() []Re_group {
|
||||||
|
mut res := []Re_group{len: re.groups.len >> 1}
|
||||||
|
mut gi := 0
|
||||||
|
//println("len: ${re.groups.len} groups: ${re.groups}")
|
||||||
|
for gi < re.groups.len {
|
||||||
|
if re.groups[gi] >= 0 {
|
||||||
|
txt_st := re.groups[gi]
|
||||||
|
txt_en := re.groups[gi+1]
|
||||||
|
|
||||||
|
//println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
|
||||||
|
if txt_st >= 0 && txt_en > txt_st {
|
||||||
|
tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}
|
||||||
|
//println(tmp)
|
||||||
|
res[gi >> 1] = tmp
|
||||||
|
} else {
|
||||||
|
res[gi >> 1] = Re_group{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gi += 2
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue