regex: refactoring of continuous capturing groups (#7310)
parent
89ef316db3
commit
4f986ccac4
|
@ -236,8 +236,8 @@ this is possible initializing the saving array field in `RE` struct: `group_csav
|
||||||
This feature allow to collect data in a continuous way.
|
This feature allow to collect data in a continuous way.
|
||||||
|
|
||||||
In the example we pass a text followed by a integer list that we want collect.
|
In the example we pass a text followed by a integer list that we want collect.
|
||||||
To achieve this task we can use the continuous saving of the group
|
To achieve this task we can use the continuous saving of the group
|
||||||
that save each captured group in a array that we set with: `re.group_csave = [-1].repeat(3*20+1)`.
|
enabling the right flag: `re.group_csave_flag = true`.
|
||||||
|
|
||||||
The array will be filled with the following logic:
|
The array will be filled with the following logic:
|
||||||
|
|
||||||
|
@ -250,14 +250,14 @@ The array will be filled with the following logic:
|
||||||
The regex save until finish or found that the array have no space.
|
The regex save until finish or found that the array have no space.
|
||||||
If the space ends no error is raised, further records will not be saved.
|
If the space ends no error is raised, further records will not be saved.
|
||||||
|
|
||||||
```v oksyntax
|
```v ignore
|
||||||
fn example2() {
|
fn example2() {
|
||||||
test_regex()
|
test_regex()
|
||||||
text := 'tst: 01,23,45 ,56, 78'
|
text := 'tst: 01,23,45 ,56, 78'
|
||||||
query := r'.*:(\s*\d+[\s,]*)+'
|
query := r'.*:(\s*\d+[\s,]*)+'
|
||||||
mut re := new() or { panic(err) }
|
mut re := new() or { panic(err) }
|
||||||
// re.debug = 2
|
// re.debug = 2
|
||||||
re.group_csave = [-1].repeat(3 * 20 + 1) // we expect max 20 records
|
re.group_csave_flag = true // enable continuous capture
|
||||||
re.compile_opt(query) or {
|
re.compile_opt(query) or {
|
||||||
println(err)
|
println(err)
|
||||||
return
|
return
|
||||||
|
@ -330,7 +330,7 @@ fn main() {
|
||||||
query := r'(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+'
|
query := r'(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+'
|
||||||
mut re := new()
|
mut re := new()
|
||||||
re.debug = 2
|
re.debug = 2
|
||||||
// must provide an array of the right size if want the continuos saving of the groups
|
// must provide an array of the right size if want the continuous saving of the groups
|
||||||
re.group_csave = [-1].repeat(3 * 20 + 1)
|
re.group_csave = [-1].repeat(3 * 20 + 1)
|
||||||
re.compile_opt(query) or {
|
re.compile_opt(query) or {
|
||||||
println(err)
|
println(err)
|
||||||
|
|
|
@ -314,8 +314,8 @@ pub mut:
|
||||||
group_max_nested int = 3 // max nested group
|
group_max_nested int = 3 // max nested group
|
||||||
group_max int = 8 // max allowed number of different groups
|
group_max int = 8 // max allowed number of different groups
|
||||||
|
|
||||||
group_csave []int = []int{} // groups continuous save array
|
group_csave_flag bool // flag to enable continuous saving
|
||||||
group_csave_index int = -1 // groups continuous save index
|
group_csave []int = []int{} // groups continuous save list
|
||||||
|
|
||||||
group_map map[string]int // groups names map
|
group_map map[string]int // groups names map
|
||||||
|
|
||||||
|
@ -344,10 +344,7 @@ fn (mut re RE) reset(){
|
||||||
re.state_stack_index = -1
|
re.state_stack_index = -1
|
||||||
|
|
||||||
// reset group_csave
|
// reset group_csave
|
||||||
if re.group_csave.len > 0 {
|
re.group_csave = []int{}
|
||||||
re.group_csave_index = 1
|
|
||||||
re.group_csave[0] = 0 // reset the capture count
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset for search mode fail
|
// reset for search mode fail
|
||||||
|
@ -1482,6 +1479,45 @@ pub fn (re RE) get_query() string {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
||||||
|
Groups saving utilities
|
||||||
|
|
||||||
|
*/
|
||||||
|
[inline]
|
||||||
|
fn (mut re RE) group_continuous_save(g_index int) {
|
||||||
|
if re.group_csave_flag == true {
|
||||||
|
// continuous save, save until we have space
|
||||||
|
|
||||||
|
// init the first element as counter
|
||||||
|
if re.group_csave.len == 0 {
|
||||||
|
re.group_csave << 0
|
||||||
|
}
|
||||||
|
|
||||||
|
gi := g_index >> 1
|
||||||
|
start := re.groups[g_index]
|
||||||
|
end := re.groups[g_index+1]
|
||||||
|
|
||||||
|
// check if we are simply increasing the size ot the found group
|
||||||
|
if re.group_csave.len >=4 &&
|
||||||
|
gi == re.group_csave[re.group_csave.len - 3] &&
|
||||||
|
start == re.group_csave[re.group_csave.len - 2]
|
||||||
|
{
|
||||||
|
re.group_csave[re.group_csave.len - 1] = end
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise append a new group to the list
|
||||||
|
|
||||||
|
// increment counter
|
||||||
|
re.group_csave[0]++
|
||||||
|
// save the record
|
||||||
|
re.group_csave << (g_index >> 1) // group id
|
||||||
|
re.group_csave << re.groups[g_index] // start
|
||||||
|
re.group_csave << re.groups[g_index+1] // end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
Matching
|
Matching
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
@ -1684,18 +1720,8 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// continuous save, save until we have space
|
// continuous save, save until we have space
|
||||||
if re.group_csave_index > 0 {
|
re.group_continuous_save(g_index)
|
||||||
// check if we have space to save the record
|
|
||||||
if (re.group_csave_index + 3) < re.group_csave.len {
|
|
||||||
// incrment counter
|
|
||||||
re.group_csave[0]++
|
|
||||||
// save the record
|
|
||||||
re.group_csave[re.group_csave_index++] = g_index >> 1 // group id
|
|
||||||
re.group_csave[re.group_csave_index++] = re.groups[g_index] // start
|
|
||||||
re.group_csave[re.group_csave_index++] = re.groups[g_index+1] // end
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
group_index--
|
group_index--
|
||||||
|
@ -1879,17 +1905,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]")
|
//println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]")
|
||||||
|
|
||||||
// continuous save, save until we have space
|
// continuous save, save until we have space
|
||||||
if re.group_csave_index > 0 {
|
re.group_continuous_save(g_index)
|
||||||
// check if we have space to save the record
|
|
||||||
if (re.group_csave_index + 3) < re.group_csave.len {
|
|
||||||
// incrment counter
|
|
||||||
re.group_csave[0]++
|
|
||||||
// save the record
|
|
||||||
re.group_csave[re.group_csave_index++] = g_index >> 1 // group id
|
|
||||||
re.group_csave[re.group_csave_index++] = re.groups[g_index] // start
|
|
||||||
re.group_csave[re.group_csave_index++] = re.groups[g_index+1] // end
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
re.prog[pc].group_rep++ // increase repetitions
|
re.prog[pc].group_rep++ // increase repetitions
|
||||||
|
|
|
@ -230,7 +230,8 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
|
|
||||||
if to.cgn.len > 0 {
|
if to.cgn.len > 0 {
|
||||||
re.group_csave = [-1].repeat(3*20+1)
|
re.group_csave_flag = true
|
||||||
|
//re.group_csave = [-1].repeat(3*20+1)
|
||||||
if debug { println("continuous save")}
|
if debug { println("continuous save")}
|
||||||
} else {
|
} else {
|
||||||
if debug { println("NO continuous save")}
|
if debug { println("NO continuous save")}
|
||||||
|
|
Loading…
Reference in New Issue