regex: added groups in replace strings (#9576)
parent
0eb59cf2bd
commit
1a324679b9
|
@ -544,10 +544,36 @@ pub fn (mut re RE) find_all_str(in_txt string) []string
|
||||||
#### Replace functions
|
#### Replace functions
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// replace return a string where the matches are replaced with the replace string, only non overlapped matches are used
|
// replace return a string where the matches are replaced with the repl_str string,
|
||||||
|
// this function support groups in the replace string
|
||||||
pub fn (re mut RE) replace(in_txt string, repl string) string
|
pub fn (re mut RE) replace(in_txt string, repl string) string
|
||||||
```
|
```
|
||||||
|
|
||||||
|
replace string can include groups references:
|
||||||
|
|
||||||
|
```v ignore
|
||||||
|
txt := "Today it is a good day."
|
||||||
|
query := r'(a\w)[ ,.]'
|
||||||
|
mut re := regex.regex_opt(query)?
|
||||||
|
res := re.replace(txt, r"__[\0]__")
|
||||||
|
```
|
||||||
|
|
||||||
|
in this example we used the group `0` in the replace string: `\0`, the result will be:
|
||||||
|
|
||||||
|
```
|
||||||
|
Today it is a good day. => Tod__[ay]__it is a good d__[ay]__
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** in the replace strings can be used only groups from `0` to `9`.
|
||||||
|
|
||||||
|
If the usage of `groups` in the replace process is not needed it is possible
|
||||||
|
to use a quick function:
|
||||||
|
|
||||||
|
```v ignore
|
||||||
|
// replace_simple return a string where the matches are replaced with the replace string
|
||||||
|
pub fn (mut re RE) replace_simple(in_txt string, repl string) string
|
||||||
|
```
|
||||||
|
|
||||||
#### Custom replace function
|
#### Custom replace function
|
||||||
|
|
||||||
For complex find and replace operations it is available the function `replace_by_fn` .
|
For complex find and replace operations it is available the function `replace_by_fn` .
|
||||||
|
|
|
@ -176,7 +176,41 @@ match_test_suite_replace = [
|
||||||
r"[Tt]o\w+",
|
r"[Tt]o\w+",
|
||||||
"CIAO",
|
"CIAO",
|
||||||
"CIAO is a good day and CIAO will be for sure."
|
"CIAO is a good day and CIAO will be for sure."
|
||||||
}
|
},
|
||||||
|
TestItemRe{
|
||||||
|
"Today is a good day and tomorrow will be for sure.",
|
||||||
|
r"(a\w) ",
|
||||||
|
r"[\0] ",
|
||||||
|
"Tod[ay] is a good d[ay] and tomorrow will be for sure."
|
||||||
|
},
|
||||||
|
TestItemRe{
|
||||||
|
"Today is a good day and tomorrow will be for sure.",
|
||||||
|
r"(a\w) ",
|
||||||
|
r"[\0_\0] ",
|
||||||
|
"Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure."
|
||||||
|
},
|
||||||
|
TestItemRe{
|
||||||
|
"Today is a good day and tomorrow will be for sure.",
|
||||||
|
r"(a\w) ",
|
||||||
|
r"[\0\1] ",
|
||||||
|
"Tod[ay] is a good d[ay] and tomorrow will be for sure."
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
match_test_suite_replace_simple = [
|
||||||
|
// replace tests
|
||||||
|
TestItemRe{
|
||||||
|
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
||||||
|
r"(pi?(ba)+o)",
|
||||||
|
"CIAO",
|
||||||
|
"oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
|
||||||
|
},
|
||||||
|
TestItemRe{
|
||||||
|
"Today is a good day and tomorrow will be for sure.",
|
||||||
|
r"[Tt]o\w+",
|
||||||
|
"CIAO",
|
||||||
|
"CIAO is a good day and CIAO will be for sure."
|
||||||
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -425,6 +459,25 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check replace simple
|
||||||
|
for c,to in match_test_suite_replace_simple{
|
||||||
|
// debug print
|
||||||
|
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
||||||
|
|
||||||
|
mut re := regex.regex_opt(to.q) or {
|
||||||
|
eprintln('err: $err')
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
res := re.replace_simple(to.src,to.rep)
|
||||||
|
if res != to.r {
|
||||||
|
eprintln("ERROR: replace.")
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// check match and find
|
// check match and find
|
||||||
for c,to in match_test_suite {
|
for c,to in match_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
|
|
|
@ -54,7 +54,9 @@ pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
|
||||||
tmp_index := re.group_map[group_name]-1
|
tmp_index := re.group_map[group_name]-1
|
||||||
start := re.groups[tmp_index * 2]
|
start := re.groups[tmp_index * 2]
|
||||||
end := re.groups[tmp_index * 2 + 1]
|
end := re.groups[tmp_index * 2 + 1]
|
||||||
return in_txt[start..end]
|
if start >= 0 && end > start {
|
||||||
|
return in_txt[start..end]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
@ -65,7 +67,9 @@ pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
|
||||||
index := group_id << 1
|
index := group_id << 1
|
||||||
start := re.groups[index]
|
start := re.groups[index]
|
||||||
end := re.groups[index + 1]
|
end := re.groups[index + 1]
|
||||||
return in_txt[start..end]
|
if start >= 0 && end > start {
|
||||||
|
return in_txt[start..end]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
@ -307,8 +311,8 @@ pub fn (mut re RE) find_all_str(in_txt string) []string {
|
||||||
* Replacers
|
* Replacers
|
||||||
*
|
*
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
// replace return a string where the matches are replaced with the replace string
|
// replace_simple return a string where the matches are replaced with the replace string
|
||||||
pub fn (mut re RE) replace(in_txt string, repl string) string {
|
pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
|
||||||
pos := re.find_all(in_txt)
|
pos := re.find_all(in_txt)
|
||||||
|
|
||||||
if pos.len > 0 {
|
if pos.len > 0 {
|
||||||
|
@ -331,6 +335,7 @@ pub fn (mut re RE) replace(in_txt string, repl string) string {
|
||||||
return in_txt
|
return in_txt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// type of function used for custom replace
|
// type of function used for custom replace
|
||||||
// in_txt source text
|
// in_txt source text
|
||||||
// start index of the start of the match in in_txt
|
// start index of the start of the match in in_txt
|
||||||
|
@ -378,3 +383,67 @@ pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
|
||||||
}
|
}
|
||||||
return res.str()
|
return res.str()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn (re RE) parsed_replace_string(in_txt string, repl string) string {
|
||||||
|
str_lst := repl.split("\\")
|
||||||
|
mut res := str_lst[0]
|
||||||
|
mut i := 1
|
||||||
|
for i < str_lst.len {
|
||||||
|
tmp := str_lst[i]
|
||||||
|
//println("tmp: ${tmp}")
|
||||||
|
if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
|
||||||
|
group_id := int(tmp[0] - `0`)
|
||||||
|
group := re.get_group_by_id(in_txt, group_id)
|
||||||
|
//println("group: $group_id [$group]")
|
||||||
|
res += "${group}${tmp[1..]}"
|
||||||
|
} else {
|
||||||
|
res += '\\'+tmp
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
// replace return a string where the matches are replaced with the repl_str string,
|
||||||
|
// this function support use groups in the replace string
|
||||||
|
pub fn (mut re RE) replace(in_txt string, repl_str string) string {
|
||||||
|
mut i := 0
|
||||||
|
mut res := strings.new_builder(in_txt.len)
|
||||||
|
mut last_end := 0
|
||||||
|
|
||||||
|
for i < in_txt.len {
|
||||||
|
//println("Find Start. $i [${in_txt[i..]}]")
|
||||||
|
s, e := re.find_from(in_txt,i)
|
||||||
|
//println("Find End.")
|
||||||
|
if s >= 0 && e > s {
|
||||||
|
//println("find match in: ${s},${e} [${in_txt[s..e]}]")
|
||||||
|
|
||||||
|
if last_end < s {
|
||||||
|
res.write_string(in_txt[last_end..s])
|
||||||
|
}
|
||||||
|
|
||||||
|
for g_i in 0..re.group_count {
|
||||||
|
re.groups[g_i << 1 ] += i
|
||||||
|
re.groups[(g_i << 1) + 1] += i
|
||||||
|
}
|
||||||
|
|
||||||
|
//repl := repl_fn(re, in_txt, s, e)
|
||||||
|
repl := re.parsed_replace_string(in_txt, repl_str)
|
||||||
|
//println("repl res: $repl")
|
||||||
|
res.write_string(repl)
|
||||||
|
//res.write_string("[[${in_txt[s..e]}]]")
|
||||||
|
|
||||||
|
last_end = e
|
||||||
|
i = e
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
//i++
|
||||||
|
}
|
||||||
|
//println(i)
|
||||||
|
}
|
||||||
|
if last_end >= 0 && last_end < in_txt.len {
|
||||||
|
res.write_string(in_txt[last_end..])
|
||||||
|
}
|
||||||
|
return res.str()
|
||||||
|
}
|
Loading…
Reference in New Issue