From 1a324679b94910d787ab68bd4d140809aa5da2c6 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Sat, 3 Apr 2021 22:16:56 +0200 Subject: [PATCH] regex: added groups in replace strings (#9576) --- vlib/regex/README.md | 28 ++++++++++++++- vlib/regex/regex_test.v | 55 ++++++++++++++++++++++++++++- vlib/regex/regex_util.v | 77 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 154 insertions(+), 6 deletions(-) diff --git a/vlib/regex/README.md b/vlib/regex/README.md index 7b6611e9dc..76ff9803cf 100644 --- a/vlib/regex/README.md +++ b/vlib/regex/README.md @@ -544,10 +544,36 @@ pub fn (mut re RE) find_all_str(in_txt string) []string #### Replace functions ```v ignore -// replace return a string where the matches are replaced with the replace string, only non overlapped matches are used +// replace return a string where the matches are replaced with the repl_str string, +// this function support groups in the replace string pub fn (re mut RE) replace(in_txt string, repl string) string ``` +replace string can include groups references: + +```v ignore +txt := "Today it is a good day." +query := r'(a\w)[ ,.]' +mut re := regex.regex_opt(query)? +res := re.replace(txt, r"__[\0]__") +``` + +in this example we used the group `0` in the replace string: `\0`, the result will be: + +``` +Today it is a good day. => Tod__[ay]__it is a good d__[ay]__ +``` + +**Note:** in the replace strings can be used only groups from `0` to `9`. + +If the usage of `groups` in the replace process is not needed it is possible +to use a quick function: + +```v ignore +// replace_simple return a string where the matches are replaced with the replace string +pub fn (mut re RE) replace_simple(in_txt string, repl string) string +``` + #### Custom replace function For complex find and replace operations it is available the function `replace_by_fn` . diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index 60bf748813..3e1912ab71 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -176,7 +176,41 @@ match_test_suite_replace = [ r"[Tt]o\w+", "CIAO", "CIAO is a good day and CIAO will be for sure." - } + }, + TestItemRe{ + "Today is a good day and tomorrow will be for sure.", + r"(a\w) ", + r"[\0] ", + "Tod[ay] is a good d[ay] and tomorrow will be for sure." + }, + TestItemRe{ + "Today is a good day and tomorrow will be for sure.", + r"(a\w) ", + r"[\0_\0] ", + "Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure." + }, + TestItemRe{ + "Today is a good day and tomorrow will be for sure.", + r"(a\w) ", + r"[\0\1] ", + "Tod[ay] is a good d[ay] and tomorrow will be for sure." + }, +] + +match_test_suite_replace_simple = [ + // replace tests + TestItemRe{ + "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", + r"(pi?(ba)+o)", + "CIAO", + "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO" + }, + TestItemRe{ + "Today is a good day and tomorrow will be for sure.", + r"[Tt]o\w+", + "CIAO", + "CIAO is a good day and CIAO will be for sure." + }, ] ) @@ -425,6 +459,25 @@ fn test_regex(){ } } + // check replace simple + for c,to in match_test_suite_replace_simple{ + // debug print + if debug { println("#$c [$to.src] q[$to.q] $to.r") } + + mut re := regex.regex_opt(to.q) or { + eprintln('err: $err') + assert false + continue + } + + res := re.replace_simple(to.src,to.rep) + if res != to.r { + eprintln("ERROR: replace.") + assert false + continue + } + } + // check match and find for c,to in match_test_suite { // debug print diff --git a/vlib/regex/regex_util.v b/vlib/regex/regex_util.v index 96369cfb1f..44b97cd1bc 100644 --- a/vlib/regex/regex_util.v +++ b/vlib/regex/regex_util.v @@ -54,7 +54,9 @@ pub fn (re RE) get_group_by_name(in_txt string, group_name string) string { tmp_index := re.group_map[group_name]-1 start := re.groups[tmp_index * 2] end := re.groups[tmp_index * 2 + 1] - return in_txt[start..end] + if start >= 0 && end > start { + return in_txt[start..end] + } } return "" } @@ -65,7 +67,9 @@ pub fn (re RE) get_group_by_id(in_txt string, group_id int) string { index := group_id << 1 start := re.groups[index] end := re.groups[index + 1] - return in_txt[start..end] + if start >= 0 && end > start { + return in_txt[start..end] + } } return "" } @@ -307,8 +311,8 @@ pub fn (mut re RE) find_all_str(in_txt string) []string { * Replacers * ******************************************************************************/ -// replace return a string where the matches are replaced with the replace string -pub fn (mut re RE) replace(in_txt string, repl string) string { +// replace_simple return a string where the matches are replaced with the replace string +pub fn (mut re RE) replace_simple(in_txt string, repl string) string { pos := re.find_all(in_txt) if pos.len > 0 { @@ -331,6 +335,7 @@ pub fn (mut re RE) replace(in_txt string, repl string) string { return in_txt } + // type of function used for custom replace // in_txt source text // start index of the start of the match in in_txt @@ -378,3 +383,67 @@ pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string { } return res.str() } + + +fn (re RE) parsed_replace_string(in_txt string, repl string) string { + str_lst := repl.split("\\") + mut res := str_lst[0] + mut i := 1 + for i < str_lst.len { + tmp := str_lst[i] + //println("tmp: ${tmp}") + if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` { + group_id := int(tmp[0] - `0`) + group := re.get_group_by_id(in_txt, group_id) + //println("group: $group_id [$group]") + res += "${group}${tmp[1..]}" + } else { + res += '\\'+tmp + } + i++ + } + return res +} + +// replace return a string where the matches are replaced with the repl_str string, +// this function support use groups in the replace string +pub fn (mut re RE) replace(in_txt string, repl_str string) string { + mut i := 0 + mut res := strings.new_builder(in_txt.len) + mut last_end := 0 + + for i < in_txt.len { + //println("Find Start. $i [${in_txt[i..]}]") + s, e := re.find_from(in_txt,i) + //println("Find End.") + if s >= 0 && e > s { + //println("find match in: ${s},${e} [${in_txt[s..e]}]") + + if last_end < s { + res.write_string(in_txt[last_end..s]) + } + + for g_i in 0..re.group_count { + re.groups[g_i << 1 ] += i + re.groups[(g_i << 1) + 1] += i + } + + //repl := repl_fn(re, in_txt, s, e) + repl := re.parsed_replace_string(in_txt, repl_str) + //println("repl res: $repl") + res.write_string(repl) + //res.write_string("[[${in_txt[s..e]}]]") + + last_end = e + i = e + } else { + break + //i++ + } + //println(i) + } + if last_end >= 0 && last_end < in_txt.len { + res.write_string(in_txt[last_end..]) + } + return res.str() +} \ No newline at end of file