regex: refactoring of continuous capturing groups (#7310)

pull/7324/head
penguindark 2020-12-14 14:02:13 +01:00 committed by GitHub
parent 89ef316db3
commit 4f986ccac4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 35 deletions

View File

@ -236,8 +236,8 @@ this is possible initializing the saving array field in `RE` struct: `group_csav
This feature allow to collect data in a continuous way.
In the example we pass a text followed by a integer list that we want collect.
To achieve this task we can use the continuous saving of the group
that save each captured group in a array that we set with: `re.group_csave = [-1].repeat(3*20+1)`.
To achieve this task we can use the continuous saving of the group
enabling the right flag: `re.group_csave_flag = true`.
The array will be filled with the following logic:
@ -250,14 +250,14 @@ The array will be filled with the following logic:
The regex save until finish or found that the array have no space.
If the space ends no error is raised, further records will not be saved.
```v oksyntax
```v ignore
fn example2() {
test_regex()
text := 'tst: 01,23,45 ,56, 78'
query := r'.*:(\s*\d+[\s,]*)+'
mut re := new() or { panic(err) }
// re.debug = 2
re.group_csave = [-1].repeat(3 * 20 + 1) // we expect max 20 records
re.group_csave_flag = true // enable continuous capture
re.compile_opt(query) or {
println(err)
return
@ -330,7 +330,7 @@ fn main() {
query := r'(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+'
mut re := new()
re.debug = 2
// must provide an array of the right size if want the continuos saving of the groups
// must provide an array of the right size if want the continuous saving of the groups
re.group_csave = [-1].repeat(3 * 20 + 1)
re.compile_opt(query) or {
println(err)

View File

@ -314,8 +314,8 @@ pub mut:
group_max_nested int = 3 // max nested group
group_max int = 8 // max allowed number of different groups
group_csave []int = []int{} // groups continuous save array
group_csave_index int = -1 // groups continuous save index
group_csave_flag bool // flag to enable continuous saving
group_csave []int = []int{} // groups continuous save list
group_map map[string]int // groups names map
@ -344,10 +344,7 @@ fn (mut re RE) reset(){
re.state_stack_index = -1
// reset group_csave
if re.group_csave.len > 0 {
re.group_csave_index = 1
re.group_csave[0] = 0 // reset the capture count
}
re.group_csave = []int{}
}
// reset for search mode fail
@ -1482,6 +1479,45 @@ pub fn (re RE) get_query() string {
/*
Groups saving utilities
*/
[inline]
fn (mut re RE) group_continuous_save(g_index int) {
if re.group_csave_flag == true {
// continuous save, save until we have space
// init the first element as counter
if re.group_csave.len == 0 {
re.group_csave << 0
}
gi := g_index >> 1
start := re.groups[g_index]
end := re.groups[g_index+1]
// check if we are simply increasing the size ot the found group
if re.group_csave.len >=4 &&
gi == re.group_csave[re.group_csave.len - 3] &&
start == re.group_csave[re.group_csave.len - 2]
{
re.group_csave[re.group_csave.len - 1] = end
return
}
// otherwise append a new group to the list
// increment counter
re.group_csave[0]++
// save the record
re.group_csave << (g_index >> 1) // group id
re.group_csave << re.groups[g_index] // start
re.group_csave << re.groups[g_index+1] // end
}
}
/*
Matching
*/
@ -1684,18 +1720,8 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
}
// continuous save, save until we have space
if re.group_csave_index > 0 {
// check if we have space to save the record
if (re.group_csave_index + 3) < re.group_csave.len {
// incrment counter
re.group_csave[0]++
// save the record
re.group_csave[re.group_csave_index++] = g_index >> 1 // group id
re.group_csave[re.group_csave_index++] = re.groups[g_index] // start
re.group_csave[re.group_csave_index++] = re.groups[g_index+1] // end
}
}
re.group_continuous_save(g_index)
}
group_index--
@ -1879,17 +1905,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
//println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]")
// continuous save, save until we have space
if re.group_csave_index > 0 {
// check if we have space to save the record
if (re.group_csave_index + 3) < re.group_csave.len {
// incrment counter
re.group_csave[0]++
// save the record
re.group_csave[re.group_csave_index++] = g_index >> 1 // group id
re.group_csave[re.group_csave_index++] = re.groups[g_index] // start
re.group_csave[re.group_csave_index++] = re.groups[g_index+1] // end
}
}
re.group_continuous_save(g_index)
}
re.prog[pc].group_rep++ // increase repetitions

View File

@ -230,7 +230,8 @@ fn test_regex(){
}
if to.cgn.len > 0 {
re.group_csave = [-1].repeat(3*20+1)
re.group_csave_flag = true
//re.group_csave = [-1].repeat(3*20+1)
if debug { println("continuous save")}
} else {
if debug { println("NO continuous save")}