regex: add a find_all_str function (#7517)
parent
36dcace0a7
commit
2824e07baa
|
@ -473,7 +473,7 @@ pub fn new() RE
|
||||||
|
|
||||||
```
|
```
|
||||||
#### **Custom initialization**
|
#### **Custom initialization**
|
||||||
For some particular need it is possible initialize a fully customized regex:
|
For some particular needs it is possible initialize a fully manually customized regex:
|
||||||
```v ignore
|
```v ignore
|
||||||
pattern = r"ab(.*)(ac)"
|
pattern = r"ab(.*)(ac)"
|
||||||
// init custom regex
|
// init custom regex
|
||||||
|
@ -484,6 +484,8 @@ re.cc = []CharClass{len: pattern.len} // can not be more char class the th
|
||||||
re.group_csave_flag = false // true enable continuos group saving if needed
|
re.group_csave_flag = false // true enable continuos group saving if needed
|
||||||
re.group_max_nested = 128 // set max 128 group nested possible
|
re.group_max_nested = 128 // set max 128 group nested possible
|
||||||
re.group_max = pattern.len>>1 // we can't have more groups than the half of the pattern legth
|
re.group_max = pattern.len>>1 // we can't have more groups than the half of the pattern legth
|
||||||
|
re.group_stack = []int{len: re.group_max, init: -1}
|
||||||
|
re.group_data = []int{len: re.group_max, init: -1}
|
||||||
```
|
```
|
||||||
### Compiling
|
### Compiling
|
||||||
|
|
||||||
|
@ -494,22 +496,14 @@ After an initializer is used, the regex expression must be compiled with:
|
||||||
pub fn (re mut RE) compile_opt(in_txt string) ?
|
pub fn (re mut RE) compile_opt(in_txt string) ?
|
||||||
```
|
```
|
||||||
|
|
||||||
### Operative Functions
|
### Matching Functions
|
||||||
|
|
||||||
These are the operative functions
|
These are the matching functions
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// match_string try to match the input string, return start and end index if found else start is -1
|
// match_string try to match the input string, return start and end index if found else start is -1
|
||||||
pub fn (re mut RE) match_string(in_txt string) (int,int)
|
pub fn (re mut RE) match_string(in_txt string) (int,int)
|
||||||
|
|
||||||
// find try to find the first match in the input string, return start and end index if found else start is -1
|
|
||||||
pub fn (re mut RE) find(in_txt string) (int,int)
|
|
||||||
|
|
||||||
// find_all find all the "non overlapping" occurrences of the matching pattern, return a list of start end indexes
|
|
||||||
pub fn (re mut RE) find_all(in_txt string) []int
|
|
||||||
|
|
||||||
// replace return a string where the matches are replaced with the replace string, only non overlapped matches are used
|
|
||||||
pub fn (re mut RE) replace(in_txt string, repl string) string
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Find and Replace
|
## Find and Replace
|
||||||
|
@ -519,13 +513,19 @@ There are the following find and replace functions:
|
||||||
#### Find functions
|
#### Find functions
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// find try to find the first match in the input string, return start and end index if found else start is -1
|
// find try to find the first match in the input string
|
||||||
|
// return start and end index if found else start is -1
|
||||||
pub fn (re mut RE) find(in_txt string) (int,int)
|
pub fn (re mut RE) find(in_txt string) (int,int)
|
||||||
|
|
||||||
// find_all find all the "non overlapping" occurrences of the matching pattern
|
// find_all find all the "non overlapping" occurrences of the matching pattern
|
||||||
// return a list of start end indexes like: [3,4,6,8]
|
// return a list of start end indexes like: [3,4,6,8]
|
||||||
// the matches are [3,4] and [6,8]
|
// the matches are [3,4] and [6,8]
|
||||||
pub fn (re mut RE) find_all(in_txt string) []int
|
pub fn (re mut RE) find_all(in_txt string) []int
|
||||||
|
|
||||||
|
// find_all find all the "non overlapping" occurrences of the matching pattern
|
||||||
|
// return a list of strings
|
||||||
|
// the result is like ["first match","secon match"]
|
||||||
|
pub fn (mut re RE) find_all_str(in_txt string) []string
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Replace functions
|
#### Replace functions
|
||||||
|
@ -543,10 +543,12 @@ The`replace_by_fn` use a custom replace function making possible customizations.
|
||||||
The custom function must be of the type:
|
The custom function must be of the type:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// re RE struct
|
// type of function used for custom replace
|
||||||
// in_txt all the text passed to the regex expression
|
// in_txt source text
|
||||||
// the match is: in_txt[start..end]
|
// start index of the start of the match in in_txt
|
||||||
fn (re RE, in_txt string, start int, end int) string
|
// end index of the end of the match in in_txt
|
||||||
|
// --- the match is in in_txt[start..end] ---
|
||||||
|
fn (re RE, in_txt string, start int, end int) string
|
||||||
```
|
```
|
||||||
|
|
||||||
The following example will clarify the use:
|
The following example will clarify the use:
|
||||||
|
|
|
@ -1554,14 +1554,14 @@ fn state_str(s Match_state) string {
|
||||||
|
|
||||||
struct StateObj {
|
struct StateObj {
|
||||||
pub mut:
|
pub mut:
|
||||||
group_index int = -1 // group id used to know how many groups are open
|
group_index int = -1 // group id used to know how many groups are open
|
||||||
match_flag bool
|
match_flag bool // indicate if we are in a match condition
|
||||||
match_index int = -1
|
match_index int = -1 // index of the last match
|
||||||
first_match int = -1 //index of the first match
|
first_match int = -1 // index of the first match
|
||||||
pc int = -1 // program counter
|
pc int = -1 // program counter
|
||||||
i int = -1 // source string index
|
i int = -1 // source string index
|
||||||
char_len int
|
char_len int // last char legth
|
||||||
last_dot_pc int = -1 // last dot chat pc
|
last_dot_pc int = -1 // last dot chat pc
|
||||||
}
|
}
|
||||||
|
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
|
@ -1579,13 +1579,6 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
mut ist := rune(0) // actual instruction
|
mut ist := rune(0) // actual instruction
|
||||||
mut l_ist := rune(0) // last matched instruction
|
mut l_ist := rune(0) // last matched instruction
|
||||||
|
|
||||||
//mut state_list := []StateObj{}
|
|
||||||
|
|
||||||
//mut group_stack := []int{len: re.group_max, init: -1}
|
|
||||||
//mut group_data := []int{len: re.group_max, init: -1}
|
|
||||||
|
|
||||||
//mut group_index := -1 // group id used to know how many groups are open
|
|
||||||
|
|
||||||
mut step_count := 0 // stats for debug
|
mut step_count := 0 // stats for debug
|
||||||
mut dbg_line := 0 // count debug line printed
|
mut dbg_line := 0 // count debug line printed
|
||||||
|
|
||||||
|
@ -1900,7 +1893,6 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//println("g.id: ${re.prog[state.pc].group_id} group_index: ${state.group_index}")
|
//println("g.id: ${re.prog[state.pc].group_id} group_index: ${state.group_index}")
|
||||||
if state.group_index >= 0 && re.prog[state.pc].group_id >= 0 {
|
if state.group_index >= 0 && re.prog[state.pc].group_id >= 0 {
|
||||||
start_i := re.group_stack[state.group_index]
|
start_i := re.group_stack[state.group_index]
|
||||||
//re.group_stack[state.group_index]=-1
|
|
||||||
|
|
||||||
// save group results
|
// save group results
|
||||||
g_index := re.prog[state.pc].group_id*2
|
g_index := re.prog[state.pc].group_id*2
|
||||||
|
@ -1960,8 +1952,6 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// check next token to be false
|
// check next token to be false
|
||||||
mut next_check_flag := false
|
mut next_check_flag := false
|
||||||
|
|
||||||
//if re.prog[state.pc].rep >= re.prog[state.pc].rep_min &&
|
|
||||||
|
|
||||||
// if we are done with max go on dot char are dedicated case!!
|
// if we are done with max go on dot char are dedicated case!!
|
||||||
if re.prog[state.pc].rep >= re.prog[state.pc].rep_max
|
if re.prog[state.pc].rep >= re.prog[state.pc].rep_max
|
||||||
{
|
{
|
||||||
|
@ -2415,113 +2405,3 @@ pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
||||||
}
|
}
|
||||||
return start, end
|
return start, end
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// Finders
|
|
||||||
//
|
|
||||||
|
|
||||||
// find try to find the first match in the input string
|
|
||||||
[direct_array_access]
|
|
||||||
pub fn (mut re RE) find(in_txt string) (int,int) {
|
|
||||||
old_flag := re.flag
|
|
||||||
|
|
||||||
re.flag |= f_src // enable search mode
|
|
||||||
start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
|
|
||||||
//print("Find [$start,$end] '${in_txt[start..end]}'")
|
|
||||||
if end > in_txt.len {
|
|
||||||
end = in_txt.len
|
|
||||||
}
|
|
||||||
re.flag = old_flag
|
|
||||||
|
|
||||||
if start >= 0 && end > start {
|
|
||||||
return start, end
|
|
||||||
}
|
|
||||||
return no_match_found, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// find all the non overlapping occurrences of the match pattern
|
|
||||||
[direct_array_access]
|
|
||||||
pub fn (mut re RE) find_all(in_txt string) []int {
|
|
||||||
mut i := 0
|
|
||||||
mut res := []int{}
|
|
||||||
mut ls := -1
|
|
||||||
for i < in_txt.len {
|
|
||||||
s,e := re.find(in_txt[i..])
|
|
||||||
if s >= 0 && e > s && i+s > ls {
|
|
||||||
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
|
||||||
res << i+s
|
|
||||||
res << i+e
|
|
||||||
ls = i+s
|
|
||||||
i = i+e
|
|
||||||
continue
|
|
||||||
} else {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
|
|
||||||
// replace return a string where the matches are replaced with the replace string
|
|
||||||
pub fn (mut re RE) replace(in_txt string, repl string) string {
|
|
||||||
pos := re.find_all(in_txt)
|
|
||||||
if pos.len > 0 {
|
|
||||||
mut res := ""
|
|
||||||
mut i := 0
|
|
||||||
|
|
||||||
mut s1 := 0
|
|
||||||
mut e1 := in_txt.len
|
|
||||||
|
|
||||||
for i < pos.len {
|
|
||||||
e1 = pos[i]
|
|
||||||
res += in_txt[s1..e1] + repl
|
|
||||||
s1 = pos[i+1]
|
|
||||||
i += 2
|
|
||||||
}
|
|
||||||
|
|
||||||
res += in_txt[s1..]
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
return in_txt
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
|
|
||||||
|
|
||||||
// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
|
|
||||||
pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
|
|
||||||
mut i := 0
|
|
||||||
mut res := ""
|
|
||||||
mut ls := -1
|
|
||||||
|
|
||||||
mut s1 := 0
|
|
||||||
//mut e1 := in_txt.len
|
|
||||||
|
|
||||||
for i < in_txt.len {
|
|
||||||
s,e := re.find(in_txt[i..])
|
|
||||||
if s >= 0 && e > s && i+s > ls {
|
|
||||||
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
|
||||||
start := i + s
|
|
||||||
end := i + e
|
|
||||||
// update grups index diplacement
|
|
||||||
mut gi := 0
|
|
||||||
for gi < re.groups.len {
|
|
||||||
re.groups[gi] += i
|
|
||||||
gi++
|
|
||||||
}
|
|
||||||
repl := repl_fn(re, in_txt, start, end)
|
|
||||||
|
|
||||||
res += in_txt[s1..start] + repl
|
|
||||||
s1 = end
|
|
||||||
|
|
||||||
ls = i + s
|
|
||||||
i = i + e
|
|
||||||
continue
|
|
||||||
} else {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
res += in_txt[s1..]
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,19 @@ pub fn (mut re RE) compile_opt(pattern string) ? {
|
||||||
}
|
}
|
||||||
|
|
||||||
// new_regex create a RE of small size, usually sufficient for ordinary use
|
// new_regex create a RE of small size, usually sufficient for ordinary use
|
||||||
[deprecated]
|
|
||||||
pub fn new() RE {
|
pub fn new() RE {
|
||||||
return impl_new_regex_by_size(1)
|
// init regex
|
||||||
|
mut re := regex.RE{}
|
||||||
|
re.prog = []Token {len: max_code_len + 1} // max program length, can not be longer then the pattern
|
||||||
|
re.cc = []CharClass{len: max_code_len} // can not be more char class the the length of the pattern
|
||||||
|
re.group_csave_flag = false // enable continuos group saving
|
||||||
|
re.group_max_nested = 128 // set max 128 group nested
|
||||||
|
re.group_max = max_code_len >> 1 // we can't have more groups than the half of the pattern legth
|
||||||
|
|
||||||
|
re.group_stack = []int{len: re.group_max, init: -1}
|
||||||
|
re.group_data = []int{len: re.group_max, init: -1}
|
||||||
|
|
||||||
|
return re
|
||||||
}
|
}
|
||||||
|
|
||||||
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
||||||
|
|
|
@ -144,29 +144,6 @@ match_test_suite = [
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
struct TestItemFa {
|
|
||||||
src string
|
|
||||||
q string
|
|
||||||
r []int
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
match_test_suite_fa = [
|
|
||||||
// find_all tests
|
|
||||||
TestItemFa{
|
|
||||||
"oggi pippo è andato a casa di pluto ed ha trovato pippo",
|
|
||||||
r"p[iplut]+o",
|
|
||||||
[5, 10, 31, 36, 51, 56]
|
|
||||||
},
|
|
||||||
TestItemFa{
|
|
||||||
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
|
||||||
r"(pi?(ba)+o)",
|
|
||||||
[5, 10, 31, 39, 54, 65]
|
|
||||||
},
|
|
||||||
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
struct TestItemRe {
|
struct TestItemRe {
|
||||||
src string
|
src string
|
||||||
q string
|
q string
|
||||||
|
@ -174,7 +151,7 @@ struct TestItemRe {
|
||||||
r string
|
r string
|
||||||
}
|
}
|
||||||
const (
|
const (
|
||||||
match_test_suite_re = [
|
match_test_suite_replace = [
|
||||||
// replace tests
|
// replace tests
|
||||||
TestItemRe{
|
TestItemRe{
|
||||||
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
||||||
|
@ -241,12 +218,53 @@ cgroups_test_suite = [
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
struct Test_find_all {
|
||||||
|
src string
|
||||||
|
q string
|
||||||
|
res []int // [0,4,5,6...]
|
||||||
|
res_str []string // ['find0','find1'...]
|
||||||
|
}
|
||||||
|
const (
|
||||||
|
find_all_test_suite = [
|
||||||
|
Test_find_all{
|
||||||
|
"abcd 1234 efgh 1234 ghkl1234 ab34546df",
|
||||||
|
r"\d+",
|
||||||
|
[5, 9, 15, 19, 24, 28, 31, 36],
|
||||||
|
['1234', '1234', '1234', '34546']
|
||||||
|
},
|
||||||
|
Test_find_all{
|
||||||
|
"abcd 1234 efgh 1234 ghkl1234 ab34546df",
|
||||||
|
r"\a+",
|
||||||
|
[0, 4, 10, 14, 20, 24, 29, 31, 36, 38],
|
||||||
|
['abcd', 'efgh', 'ghkl', 'ab', 'df']
|
||||||
|
},
|
||||||
|
Test_find_all{
|
||||||
|
"oggi pippo è andato a casa di pluto ed ha trovato pippo",
|
||||||
|
r"p[iplut]+o",
|
||||||
|
[5, 10, 31, 36, 51, 56],
|
||||||
|
['pippo', 'pluto', 'pippo']
|
||||||
|
},
|
||||||
|
Test_find_all{
|
||||||
|
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
||||||
|
r"(pi?(ba)+o)",
|
||||||
|
[5, 10, 31, 39, 54, 65],
|
||||||
|
['pibao', 'pbababao', 'pibabababao']
|
||||||
|
},
|
||||||
|
Test_find_all{
|
||||||
|
"Today is a good day and tomorrow will be for sure.",
|
||||||
|
r"[Tt]o\w+",
|
||||||
|
[0, 5, 24, 32],
|
||||||
|
['Today', 'tomorrow']
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
debug = false // true for debug println
|
debug = false // true for debug println
|
||||||
)
|
)
|
||||||
|
|
||||||
fn test_regex(){
|
fn test_regex(){
|
||||||
|
|
||||||
// check capturing groups
|
// check capturing groups
|
||||||
for c,to in cgroups_test_suite {
|
for c,to in cgroups_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
|
@ -275,8 +293,8 @@ fn test_regex(){
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
//println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
//println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||||
println("ERROR!")
|
eprintln("ERROR!")
|
||||||
C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -284,7 +302,7 @@ fn test_regex(){
|
||||||
// check cgroups
|
// check cgroups
|
||||||
if to.cgn.len > 0 {
|
if to.cgn.len > 0 {
|
||||||
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
||||||
println("Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}")
|
eprintln("Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}")
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -293,7 +311,7 @@ fn test_regex(){
|
||||||
mut ln := re.group_csave[0]*3
|
mut ln := re.group_csave[0]*3
|
||||||
for ln > 0 {
|
for ln > 0 {
|
||||||
if re.group_csave[ln] != to.cg[ln] {
|
if re.group_csave[ln] != to.cg[ln] {
|
||||||
println("Capturing group failed on $ln item!")
|
eprintln("Capturing group failed on $ln item!")
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
ln--
|
ln--
|
||||||
|
@ -302,7 +320,7 @@ fn test_regex(){
|
||||||
// check named captured groups
|
// check named captured groups
|
||||||
for k in to.cgn.keys() {
|
for k in to.cgn.keys() {
|
||||||
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
||||||
println("Named capturing group error! [$k]")
|
eprintln("Named capturing group error! [$k]")
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -314,9 +332,9 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
for ln:=0; ln < re.groups.len; ln++ {
|
for ln:=0; ln < re.groups.len; ln++ {
|
||||||
if re.groups[ln] != to.cg[ln] {
|
if re.groups[ln] != to.cg[ln] {
|
||||||
println("Capture group doesn't match:")
|
eprintln("Capture group doesn't match:")
|
||||||
println("true ground: [${to.cg}]")
|
eprintln("true ground: [${to.cg}]")
|
||||||
println("elaborated : [${re.groups}]")
|
eprintln("elaborated : [${re.groups}]")
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -324,9 +342,9 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
|
|
||||||
// check find_all
|
// check find_all
|
||||||
for c,to in match_test_suite_fa{
|
for c,to in find_all_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
if debug { println("#$c [$to.src] q[$to.q] ($to.res, $to.res_str)") }
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
@ -334,25 +352,24 @@ fn test_regex(){
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
re.reset()
|
||||||
res := re.find_all(to.src)
|
res := re.find_all(to.src)
|
||||||
if res.len != to.r.len {
|
if res != to.res {
|
||||||
println("ERROR: find_all, array of different size.")
|
eprintln('err: find_all !!')
|
||||||
|
if debug { println("#$c exp: $to.res calculated: $res") }
|
||||||
assert false
|
assert false
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for c1,i in res {
|
res_str := re.find_all_str(to.src)
|
||||||
if i != to.r[c1] {
|
if res_str != to.res_str {
|
||||||
println("ERROR: find_all, different indexes.")
|
eprintln('err: find_all_str !!')
|
||||||
assert false
|
if debug { println("#$c exp: $to.res_str calculated: $res_str") }
|
||||||
continue
|
assert false
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check replace
|
// check replace
|
||||||
for c,to in match_test_suite_re{
|
for c,to in match_test_suite_replace{
|
||||||
// debug print
|
// debug print
|
||||||
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
||||||
|
|
||||||
|
@ -364,7 +381,7 @@ fn test_regex(){
|
||||||
|
|
||||||
res := re.replace(to.src,to.rep)
|
res := re.replace(to.src,to.rep)
|
||||||
if res != to.r {
|
if res != to.r {
|
||||||
println("ERROR: replace.")
|
eprintln("ERROR: replace.")
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -383,12 +400,12 @@ fn test_regex(){
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// q_str := re.get_query()
|
// q_str := re.get_query()
|
||||||
// println("Query: $q_str")
|
// eprintln("Query: $q_str")
|
||||||
start,end := re.find(to.src)
|
start,end := re.find(to.src)
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
err_str := re.get_parse_error_string(start)
|
err_str := re.get_parse_error_string(start)
|
||||||
println("ERROR : $err_str start: ${start} end: ${end}")
|
eprintln("ERROR : $err_str start: ${start} end: ${end}")
|
||||||
assert false
|
assert false
|
||||||
} else {
|
} else {
|
||||||
//tmp_str := text[start..end]
|
//tmp_str := text[start..end]
|
||||||
|
@ -416,8 +433,8 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
eprintln("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||||
println("ERROR!")
|
eprintln("ERROR!")
|
||||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
|
@ -427,7 +444,7 @@ fn test_regex(){
|
||||||
tmp_str1 := to.src.clone()
|
tmp_str1 := to.src.clone()
|
||||||
start1, end1 := re.match_string(tmp_str1)
|
start1, end1 := re.match_string(tmp_str1)
|
||||||
if start1 != start || end1 != end {
|
if start1 != start || end1 != end {
|
||||||
println("two run ERROR!!")
|
eprintln("two run ERROR!!")
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,6 +117,7 @@ pub fn (re RE) get_group_list() []Re_group {
|
||||||
mut res := []Re_group{len: re.groups.len >> 1}
|
mut res := []Re_group{len: re.groups.len >> 1}
|
||||||
mut gi := 0
|
mut gi := 0
|
||||||
//println("len: ${re.groups.len} groups: ${re.groups}")
|
//println("len: ${re.groups.len} groups: ${re.groups}")
|
||||||
|
|
||||||
for gi < re.groups.len {
|
for gi < re.groups.len {
|
||||||
if re.groups[gi] >= 0 {
|
if re.groups[gi] >= 0 {
|
||||||
txt_st := re.groups[gi]
|
txt_st := re.groups[gi]
|
||||||
|
@ -136,3 +137,143 @@ pub fn (re RE) get_group_list() []Re_group {
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/******************************************************************************
|
||||||
|
*
|
||||||
|
* Finders
|
||||||
|
*
|
||||||
|
******************************************************************************/
|
||||||
|
// find try to find the first match in the input string
|
||||||
|
[direct_array_access]
|
||||||
|
pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
|
old_flag := re.flag
|
||||||
|
re.flag |= f_src // enable search mode
|
||||||
|
|
||||||
|
start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
|
||||||
|
//print("Find [$start,$end] '${in_txt[start..end]}'")
|
||||||
|
if end > in_txt.len {
|
||||||
|
end = in_txt.len
|
||||||
|
}
|
||||||
|
re.flag = old_flag
|
||||||
|
|
||||||
|
if start >= 0 && end > start {
|
||||||
|
return start, end
|
||||||
|
}
|
||||||
|
return no_match_found, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// find_all find all the non overlapping occurrences of the match pattern
|
||||||
|
[direct_array_access]
|
||||||
|
pub fn (mut re RE) find_all(in_txt string) []int {
|
||||||
|
mut i := 0
|
||||||
|
mut res := []int{}
|
||||||
|
mut ls := -1
|
||||||
|
|
||||||
|
for i < in_txt.len {
|
||||||
|
s,e := re.find(in_txt[i..])
|
||||||
|
if s >= 0 && e > s && i+s > ls {
|
||||||
|
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
||||||
|
res << i+s
|
||||||
|
res << i+e
|
||||||
|
ls = i+s
|
||||||
|
i = i+e
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
|
||||||
|
[direct_array_access]
|
||||||
|
pub fn (mut re RE) find_all_str(in_txt string) []string {
|
||||||
|
mut i := 0
|
||||||
|
mut res := []string{}
|
||||||
|
mut ls := -1
|
||||||
|
|
||||||
|
for i < in_txt.len {
|
||||||
|
s,e := re.find(in_txt[i..])
|
||||||
|
if s >= 0 && e > s && i+s > ls {
|
||||||
|
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
||||||
|
res << in_txt[i+s..i+e]
|
||||||
|
ls = i+s
|
||||||
|
i = i+e
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
/******************************************************************************
|
||||||
|
*
|
||||||
|
* Replacers
|
||||||
|
*
|
||||||
|
******************************************************************************/
|
||||||
|
// replace return a string where the matches are replaced with the replace string
|
||||||
|
pub fn (mut re RE) replace(in_txt string, repl string) string {
|
||||||
|
pos := re.find_all(in_txt)
|
||||||
|
|
||||||
|
if pos.len > 0 {
|
||||||
|
mut res := ""
|
||||||
|
mut i := 0
|
||||||
|
|
||||||
|
mut s1 := 0
|
||||||
|
mut e1 := in_txt.len
|
||||||
|
|
||||||
|
for i < pos.len {
|
||||||
|
e1 = pos[i]
|
||||||
|
res += in_txt[s1..e1] + repl
|
||||||
|
s1 = pos[i+1]
|
||||||
|
i += 2
|
||||||
|
}
|
||||||
|
|
||||||
|
res += in_txt[s1..]
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
return in_txt
|
||||||
|
}
|
||||||
|
|
||||||
|
// type of function used for custom replace
|
||||||
|
// in_txt source text
|
||||||
|
// start index of the start of the match in in_txt
|
||||||
|
// end index of the end of the match in in_txt
|
||||||
|
// the match is in in_txt[start..end]
|
||||||
|
pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
|
||||||
|
|
||||||
|
// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
|
||||||
|
pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
|
||||||
|
mut i := 0
|
||||||
|
mut res := ""
|
||||||
|
mut ls := -1
|
||||||
|
mut s1 := 0
|
||||||
|
|
||||||
|
for i < in_txt.len {
|
||||||
|
s,e := re.find(in_txt[i..])
|
||||||
|
if s >= 0 && e > s && i+s > ls {
|
||||||
|
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
||||||
|
start := i + s
|
||||||
|
end := i + e
|
||||||
|
// update grups index diplacement
|
||||||
|
mut gi := 0
|
||||||
|
for gi < re.groups.len {
|
||||||
|
re.groups[gi] += i
|
||||||
|
gi++
|
||||||
|
}
|
||||||
|
repl := repl_fn(re, in_txt, start, end)
|
||||||
|
|
||||||
|
res += in_txt[s1..start] + repl
|
||||||
|
s1 = end
|
||||||
|
|
||||||
|
ls = i + s
|
||||||
|
i = i + e
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
res += in_txt[s1..]
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue