regex: bug fix in replace using function, added tests (#9381)
parent
b0e225ac2d
commit
59f95170b3
|
@ -1,4 +1,5 @@
|
||||||
import regex
|
import regex
|
||||||
|
import rand
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
*
|
*
|
||||||
|
@ -288,7 +289,7 @@ find_all_test_suite = [
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
debug = false // true for debug println
|
debug = true // true for debug println
|
||||||
)
|
)
|
||||||
|
|
||||||
fn test_regex(){
|
fn test_regex(){
|
||||||
|
@ -497,3 +498,35 @@ fn test_regex_func(){
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn my_repl(re regex.RE, in_txt string, start int, end int) string {
|
||||||
|
s0 := re.get_group_by_id(in_txt,0)[0..1] + "X"
|
||||||
|
s1 := re.get_group_by_id(in_txt,1)[0..1] + "X"
|
||||||
|
s2 := re.get_group_by_id(in_txt,2)[0..1] + "X"
|
||||||
|
return "${s0}${s1}${s2}"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// test regex replace function
|
||||||
|
fn test_regex_func_replace(){
|
||||||
|
filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio."
|
||||||
|
txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"'
|
||||||
|
query := r'"(content":\s+")(.*)(, "channel_id")'
|
||||||
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
|
||||||
|
mut txt1 := ""
|
||||||
|
mut txt2 := ""
|
||||||
|
|
||||||
|
for _ in 0..3 {
|
||||||
|
rnd := int(10+rand.u32() % 20)
|
||||||
|
txt1 += txt + filler[0..rnd] + "\n"
|
||||||
|
txt2 += "cXTX,X" + filler[0..rnd] + "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
result := re.replace_by_fn(txt1, my_repl)
|
||||||
|
if debug {
|
||||||
|
eprintln(result)
|
||||||
|
eprintln(txt2)
|
||||||
|
}
|
||||||
|
assert result == txt2
|
||||||
|
}
|
|
@ -8,6 +8,7 @@ that can be found in the LICENSE file.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
module regex
|
module regex
|
||||||
|
import strings
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
*
|
*
|
||||||
|
@ -71,7 +72,7 @@ pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
|
||||||
|
|
||||||
// get_group_by_id get a group boundaries by its id
|
// get_group_by_id get a group boundaries by its id
|
||||||
pub fn (re RE) get_group_bounds_by_id(group_id int) (int,int) {
|
pub fn (re RE) get_group_bounds_by_id(group_id int) (int,int) {
|
||||||
if group_id < (re.groups.len >> 1) {
|
if group_id < re.group_count {
|
||||||
index := group_id << 1
|
index := group_id << 1
|
||||||
return re.groups[index], re.groups[index + 1]
|
return re.groups[index], re.groups[index + 1]
|
||||||
}
|
}
|
||||||
|
@ -146,7 +147,7 @@ pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
||||||
*
|
*
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
/*
|
/*
|
||||||
// find internal implementation
|
// find internal implementation HERE for reference do not remove!!
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
fn (mut re RE) find_imp(in_txt string) (int,int) {
|
fn (mut re RE) find_imp(in_txt string) (int,int) {
|
||||||
old_flag := re.flag
|
old_flag := re.flag
|
||||||
|
@ -169,6 +170,9 @@ fn (mut re RE) find_imp(in_txt string) (int,int) {
|
||||||
// find try to find the first match in the input string
|
// find try to find the first match in the input string
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
pub fn (mut re RE) find(in_txt string) (int,int) {
|
pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
|
//old_flag := re.flag
|
||||||
|
//re.flag |= f_src // enable search mode
|
||||||
|
|
||||||
mut i := 0
|
mut i := 0
|
||||||
for i < in_txt.len {
|
for i < in_txt.len {
|
||||||
//--- speed references ---
|
//--- speed references ---
|
||||||
|
@ -183,18 +187,59 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
//------------------------
|
//------------------------
|
||||||
if s >= 0 && e > s {
|
if s >= 0 && e > s {
|
||||||
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
|
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
|
||||||
|
//re.flag = old_flag
|
||||||
return i+s, i+e
|
return i+s, i+e
|
||||||
} else {
|
} else {
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//re.flag = old_flag
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// find try to find the first match in the input string strarting from start index
|
||||||
|
[direct_array_access]
|
||||||
|
pub fn (mut re RE) find_from(in_txt string, start int) (int,int) {
|
||||||
|
old_flag := re.flag
|
||||||
|
re.flag |= f_src // enable search mode
|
||||||
|
|
||||||
|
mut i := start
|
||||||
|
if i < 0 {
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
for i < in_txt.len {
|
||||||
|
//--- speed references ---
|
||||||
|
|
||||||
|
mut s := -1
|
||||||
|
mut e := -1
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
tmp_str := tos(in_txt.str+i, in_txt.len-i)
|
||||||
|
s,e = re.match_string(tmp_str)
|
||||||
|
}
|
||||||
|
//------------------------
|
||||||
|
//s,e = re.find_imp(in_txt[i..])
|
||||||
|
//------------------------
|
||||||
|
if s >= 0 && e > s {
|
||||||
|
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
|
||||||
|
re.flag = old_flag
|
||||||
|
return i+s, i+e
|
||||||
|
} else {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
re.flag = old_flag
|
||||||
return -1, -1
|
return -1, -1
|
||||||
}
|
}
|
||||||
|
|
||||||
// find_all find all the non overlapping occurrences of the match pattern
|
// find_all find all the non overlapping occurrences of the match pattern
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
pub fn (mut re RE) find_all(in_txt string) []int {
|
pub fn (mut re RE) find_all(in_txt string) []int {
|
||||||
|
//old_flag := re.flag
|
||||||
|
//re.flag |= f_src // enable search mode
|
||||||
|
|
||||||
mut i := 0
|
mut i := 0
|
||||||
mut res := []int{}
|
mut res := []int{}
|
||||||
mut ls := -1
|
mut ls := -1
|
||||||
|
@ -222,6 +267,7 @@ pub fn (mut re RE) find_all(in_txt string) []int {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//re.flag = old_flag
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,34 +341,40 @@ pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
|
||||||
// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
|
// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
|
||||||
pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
|
pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
|
||||||
mut i := 0
|
mut i := 0
|
||||||
mut res := ""
|
mut res := strings.new_builder(in_txt.len)
|
||||||
mut ls := -1
|
mut last_end := 0
|
||||||
mut s1 := 0
|
|
||||||
|
|
||||||
for i < in_txt.len {
|
for i < in_txt.len {
|
||||||
s,e := re.find(in_txt[i..])
|
//println("Find Start. $i [${in_txt[i..]}]")
|
||||||
if s >= 0 && e > s && i+s > ls {
|
s, e := re.find_from(in_txt,i)
|
||||||
//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
|
//println("Find End.")
|
||||||
start := i + s
|
if s >= 0 && e > s {
|
||||||
end := i + e
|
//println("find match in: ${s},${e} [${in_txt[s..e]}]")
|
||||||
// update grups index diplacement
|
|
||||||
mut gi := 0
|
if last_end < s {
|
||||||
for gi < re.groups.len {
|
res.write_string(in_txt[last_end..s])
|
||||||
re.groups[gi] += i
|
|
||||||
gi++
|
|
||||||
}
|
}
|
||||||
repl := repl_fn(re, in_txt, start, end)
|
|
||||||
|
|
||||||
res += in_txt[s1..start] + repl
|
for g_i in 0..re.group_count {
|
||||||
s1 = end
|
re.groups[g_i << 1 ] += i
|
||||||
|
re.groups[(g_i << 1) + 1] += i
|
||||||
|
}
|
||||||
|
|
||||||
ls = i + s
|
repl := repl_fn(re, in_txt, s, e)
|
||||||
i = i + e
|
//println("repl res: $repl")
|
||||||
continue
|
res.write_string(repl)
|
||||||
|
//res.write_string("[[${in_txt[s..e]}]]")
|
||||||
|
|
||||||
|
last_end = e
|
||||||
|
i = e
|
||||||
} else {
|
} else {
|
||||||
i++
|
break
|
||||||
|
//i++
|
||||||
}
|
}
|
||||||
|
//println(i)
|
||||||
}
|
}
|
||||||
res += in_txt[s1..]
|
if last_end >= 0 && last_end < in_txt.len {
|
||||||
return res
|
res.write_string(in_txt[last_end..])
|
||||||
|
}
|
||||||
|
return res.str()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue