regex: add split (#14329)
parent
0bd8d872d1
commit
3fd63264d7
|
@ -9,9 +9,9 @@ import strings
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
struct TestItem {
|
struct TestItem {
|
||||||
src string
|
src string
|
||||||
q string
|
q string
|
||||||
s int
|
s int
|
||||||
e int
|
e int
|
||||||
}
|
}
|
||||||
|
|
||||||
const(
|
const(
|
||||||
|
@ -115,7 +115,7 @@ match_test_suite = [
|
||||||
TestItem{"12345678", r"^\d{8}$",0,8},
|
TestItem{"12345678", r"^\d{8}$",0,8},
|
||||||
TestItem{"12345678", r"^\d{7}$",-1,0},
|
TestItem{"12345678", r"^\d{7}$",-1,0},
|
||||||
TestItem{"12345678", r"^\d{9}$",-1,8},
|
TestItem{"12345678", r"^\d{9}$",-1,8},
|
||||||
|
|
||||||
TestItem{"eth", r"(oth)|(eth)",0,3},
|
TestItem{"eth", r"(oth)|(eth)",0,3},
|
||||||
TestItem{"et", r"(oth)|(eth)",-1,2},
|
TestItem{"et", r"(oth)|(eth)",-1,2},
|
||||||
TestItem{"et", r".*(oth)|(eth)",-1,2},
|
TestItem{"et", r".*(oth)|(eth)",-1,2},
|
||||||
|
@ -141,7 +141,7 @@ match_test_suite = [
|
||||||
TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
|
TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
|
||||||
TestItem{"p_p", r"\w+",0,3},
|
TestItem{"p_p", r"\w+",0,3},
|
||||||
TestItem{"p_é", r"\w+",0,2},
|
TestItem{"p_é", r"\w+",0,2},
|
||||||
|
|
||||||
// Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()])
|
// Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()])
|
||||||
TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},
|
TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},
|
||||||
TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0},
|
TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0},
|
||||||
|
@ -170,10 +170,11 @@ match_test_suite = [
|
||||||
|
|
||||||
struct TestItemRe {
|
struct TestItemRe {
|
||||||
src string
|
src string
|
||||||
q string
|
q string
|
||||||
rep string
|
rep string
|
||||||
r string
|
r string
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
match_test_suite_replace = [
|
match_test_suite_replace = [
|
||||||
// replace tests
|
// replace tests
|
||||||
|
@ -228,12 +229,13 @@ match_test_suite_replace_simple = [
|
||||||
|
|
||||||
struct TestItemCGroup {
|
struct TestItemCGroup {
|
||||||
src string
|
src string
|
||||||
q string
|
q string
|
||||||
s int
|
s int
|
||||||
e int
|
e int
|
||||||
cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ]
|
cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ]
|
||||||
cgn map[string]int
|
cgn map[string]int
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
cgroups_test_suite = [
|
cgroups_test_suite = [
|
||||||
TestItemCGroup{
|
TestItemCGroup{
|
||||||
|
@ -276,13 +278,13 @@ cgroups_test_suite = [
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
struct Test_find_all {
|
struct Test_find_all {
|
||||||
src string
|
src string
|
||||||
q string
|
q string
|
||||||
res []int // [0,4,5,6...]
|
res []int // [0,4,5,6...]
|
||||||
res_str []string // ['find0','find1'...]
|
res_str []string // ['find0','find1'...]
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
find_all_test_suite = [
|
find_all_test_suite = [
|
||||||
Test_find_all{
|
Test_find_all{
|
||||||
|
@ -361,16 +363,52 @@ find_all_test_suite = [
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
struct Test_split {
|
||||||
|
src string
|
||||||
|
q string
|
||||||
|
res []string // ['abc','def',...]
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
debug = true // true for debug println
|
split_test_suite = [
|
||||||
|
Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\d+', ['abcd ', ' efgh ', ' ghkl',
|
||||||
|
' ab', 'df']},
|
||||||
|
Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\a+', [' 1234 ', ' 1234 ', '1234 ',
|
||||||
|
'34546']},
|
||||||
|
Test_split{'oggi pippo è andato a casa di pluto ed ha trovato pippo', r'p[iplut]+o', [
|
||||||
|
'oggi ', ' è andato a casa di ', ' ed ha trovato ']},
|
||||||
|
Test_split{'oggi pibao è andato a casa di pbababao ed ha trovato pibabababao', r'(pi?(ba)+o)', [
|
||||||
|
'oggi ', ' è andato a casa di ', ' ed ha trovato ']},
|
||||||
|
Test_split{'Today is a good day and tomorrow will be for sure.', r'[Tt]o\w+', [
|
||||||
|
' is a good day and ', ' will be for sure.']},
|
||||||
|
Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://[\w./]+', [
|
||||||
|
'pera\n', '\npippo']},
|
||||||
|
Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://.*' +
|
||||||
|
'\n', ['pera\n', 'pippo']},
|
||||||
|
Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'#[.#]{4}##[.#]{4}##[.#]{4}###', [
|
||||||
|
'#.#......##.#..#..##........#', '##.......#.....#..#......#...#........###.#..#.']},
|
||||||
|
Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'.*#[.#]{4}##[.#]{4}##[.#]{4}###', [
|
||||||
|
'##.......#.....#..#......#...#........###.#..#.']},
|
||||||
|
Test_split{'1234 Aa dddd Aaf 12334 Aa opopo Aaf', r'Aa.+Aaf', ['1234 ', ' 12334 ']},
|
||||||
|
Test_split{'@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo', r'@for.+@endfor', [
|
||||||
|
' ', ' altro testo ', ' uno due ', ' pippo']},
|
||||||
|
Test_split{'+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++', r'\+{3}.*\+{3}', [
|
||||||
|
'\n elvo ', ' ']},
|
||||||
|
Test_split{'foobar', r'\d', ['foobar']},
|
||||||
|
Test_split{'1234', r'\d+', []},
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
fn test_regex(){
|
const (
|
||||||
|
debug = true // true for debug println
|
||||||
|
)
|
||||||
|
|
||||||
|
fn test_regex() {
|
||||||
// check capturing groups
|
// check capturing groups
|
||||||
for c,to in cgroups_test_suite {
|
for c, to in cgroups_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
if debug {
|
if debug {
|
||||||
println("$c [${to.src}] [q${to.q}] (${to.s}, ${to.e})")
|
println('$c [$to.src] [q$to.q] ($to.s, $to.e)')
|
||||||
}
|
}
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
|
@ -381,39 +419,43 @@ fn test_regex(){
|
||||||
|
|
||||||
if to.cgn.len > 0 {
|
if to.cgn.len > 0 {
|
||||||
re.group_csave_flag = true
|
re.group_csave_flag = true
|
||||||
//re.group_csave = [-1].repeat(3*20+1)
|
// re.group_csave = [-1].repeat(3*20+1)
|
||||||
if debug { println("continuous save")}
|
if debug {
|
||||||
|
println('continuous save')
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if debug { println("NO continuous save")}
|
if debug {
|
||||||
|
println('NO continuous save')
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
start, end := re.match_string(to.src)
|
start, end := re.match_string(to.src)
|
||||||
|
|
||||||
mut tmp_str := ""
|
mut tmp_str := ''
|
||||||
if start >= 0 && end > start{
|
if start >= 0 && end > start {
|
||||||
tmp_str = to.src[start..end]
|
tmp_str = to.src[start..end]
|
||||||
}
|
}
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] base:[${to.s},${to.e}] $start, $end")
|
println('#$c [$to.src] q[$to.q] res[$tmp_str] base:[$to.s,$to.e] $start, $end')
|
||||||
eprintln("ERROR!")
|
eprintln('ERROR!')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// check cgroups
|
// check cgroups
|
||||||
if to.cgn.len > 0 {
|
if to.cgn.len > 0 {
|
||||||
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
||||||
eprintln("Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}")
|
eprintln('Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// check captured groups
|
// check captured groups
|
||||||
mut ln := re.group_csave[0]*3
|
mut ln := re.group_csave[0] * 3
|
||||||
for ln > 0 {
|
for ln > 0 {
|
||||||
if re.group_csave[ln] != to.cg[ln] {
|
if re.group_csave[ln] != to.cg[ln] {
|
||||||
eprintln("Capturing group failed on $ln item!")
|
eprintln('Capturing group failed on $ln item!')
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
ln--
|
ln--
|
||||||
|
@ -421,8 +463,8 @@ fn test_regex(){
|
||||||
|
|
||||||
// check named captured groups
|
// check named captured groups
|
||||||
for k in to.cgn.keys() {
|
for k in to.cgn.keys() {
|
||||||
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
if to.cgn[k] != (re.group_map[k] - 1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
||||||
eprintln("Named capturing group error! [$k]")
|
eprintln('Named capturing group error! [$k]')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -432,21 +474,23 @@ fn test_regex(){
|
||||||
if re.groups.len != to.cg.len {
|
if re.groups.len != to.cg.len {
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
for ln:=0; ln < re.groups.len; ln++ {
|
for ln := 0; ln < re.groups.len; ln++ {
|
||||||
if re.groups[ln] != to.cg[ln] {
|
if re.groups[ln] != to.cg[ln] {
|
||||||
eprintln("Capture group doesn't match:")
|
eprintln("Capture group doesn't match:")
|
||||||
eprintln("true ground: ${to.cg}")
|
eprintln('true ground: $to.cg')
|
||||||
eprintln("elaborated : ${re.groups}")
|
eprintln('elaborated : $re.groups')
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check find_all
|
// check find_all
|
||||||
for c,to in find_all_test_suite {
|
for c, to in find_all_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
if debug { println("#$c [$to.src] q[$to.q] ($to.res, $to.res_str)") }
|
if debug {
|
||||||
|
println('#$c [$to.src] q[$to.q] ($to.res, $to.res_str)')
|
||||||
|
}
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
@ -458,22 +502,28 @@ fn test_regex(){
|
||||||
res := re.find_all(to.src)
|
res := re.find_all(to.src)
|
||||||
if res != to.res {
|
if res != to.res {
|
||||||
eprintln('err: find_all !!')
|
eprintln('err: find_all !!')
|
||||||
if debug { println("#$c exp: $to.res calculated: $res") }
|
if debug {
|
||||||
|
println('#$c exp: $to.res calculated: $res')
|
||||||
|
}
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
|
|
||||||
res_str := re.find_all_str(to.src)
|
res_str := re.find_all_str(to.src)
|
||||||
if res_str != to.res_str {
|
if res_str != to.res_str {
|
||||||
eprintln('err: find_all_str !!')
|
eprintln('err: find_all_str !!')
|
||||||
if debug { println("#$c exp: $to.res_str calculated: $res_str") }
|
if debug {
|
||||||
|
println('#$c exp: $to.res_str calculated: $res_str')
|
||||||
|
}
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check replace
|
// check split
|
||||||
for c,to in match_test_suite_replace{
|
for c, to in split_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
if debug {
|
||||||
|
println('#$c [$to.src] q[$to.q] ($to.res)')
|
||||||
|
}
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
@ -481,18 +531,42 @@ fn test_regex(){
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
res := re.replace(to.src,to.rep)
|
re.reset()
|
||||||
|
res := re.split(to.src)
|
||||||
|
if res != to.res {
|
||||||
|
eprintln('err: split !!')
|
||||||
|
if debug {
|
||||||
|
println('#$c exp: $to.res calculated: $res')
|
||||||
|
}
|
||||||
|
assert false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check replace
|
||||||
|
for c, to in match_test_suite_replace {
|
||||||
|
// debug print
|
||||||
|
if debug {
|
||||||
|
println('#$c [$to.src] q[$to.q] $to.r')
|
||||||
|
}
|
||||||
|
|
||||||
|
mut re := regex.regex_opt(to.q) or {
|
||||||
|
eprintln('err: $err')
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
res := re.replace(to.src, to.rep)
|
||||||
if res != to.r {
|
if res != to.r {
|
||||||
eprintln("ERROR: replace.")
|
eprintln('ERROR: replace.')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check replace simple
|
// check replace simple
|
||||||
for c,to in match_test_suite_replace_simple{
|
for c, to in match_test_suite_replace_simple {
|
||||||
// debug print
|
// debug print
|
||||||
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
if debug { println('#$c [$to.src] q[$to.q] $to.r') }
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
@ -500,18 +574,18 @@ fn test_regex(){
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
res := re.replace_simple(to.src,to.rep)
|
res := re.replace_simple(to.src, to.rep)
|
||||||
if res != to.r {
|
if res != to.r {
|
||||||
eprintln("ERROR: replace.")
|
eprintln('ERROR: replace.')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check match and find
|
// check match and find
|
||||||
for c,to in match_test_suite {
|
for c, to in match_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
if debug { println("#$c [$to.src] q[$to.q] $to.s $to.e") }
|
if debug { println('#$c [$to.src] q[$to.q] $to.s $to.e') }
|
||||||
|
|
||||||
// test the find
|
// test the find
|
||||||
if to.s > 0 {
|
if to.s > 0 {
|
||||||
|
@ -522,15 +596,15 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
// q_str := re.get_query()
|
// q_str := re.get_query()
|
||||||
// eprintln("Query: $q_str")
|
// eprintln("Query: $q_str")
|
||||||
start,end := re.find(to.src)
|
start, end := re.find(to.src)
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
err_str := re.get_parse_error_string(start)
|
err_str := re.get_parse_error_string(start)
|
||||||
eprintln("ERROR : $err_str start: ${start} end: ${end}")
|
eprintln('ERROR : $err_str start: $start end: $end')
|
||||||
assert false
|
assert false
|
||||||
} else {
|
} else {
|
||||||
//tmp_str := text[start..end]
|
// tmp_str := text[start..end]
|
||||||
//println("found in [$start, $end] => [$tmp_str]")
|
// println("found in [$start, $end] => [$tmp_str]")
|
||||||
assert true
|
assert true
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
|
@ -538,24 +612,24 @@ fn test_regex(){
|
||||||
|
|
||||||
// test the match
|
// test the match
|
||||||
mut re := regex.new()
|
mut re := regex.new()
|
||||||
//re.debug = true
|
// re.debug = true
|
||||||
|
|
||||||
re.compile_opt(to.q) or {
|
re.compile_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
//println("#$c [$to.src] q[$to.q]")
|
// println("#$c [$to.src] q[$to.q]")
|
||||||
start, end := re.match_string(to.src)
|
start, end := re.match_string(to.src)
|
||||||
|
|
||||||
mut tmp_str := ""
|
mut tmp_str := ''
|
||||||
if start >= 0 && end > start{
|
if start >= 0 && end > start {
|
||||||
tmp_str = to.src[start..end]
|
tmp_str = to.src[start..end]
|
||||||
}
|
}
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
eprintln("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
eprintln('#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end')
|
||||||
eprintln("ERROR!")
|
eprintln('ERROR!')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -571,67 +645,66 @@ fn test_regex(){
|
||||||
tmp_str1 := to.src.clone()
|
tmp_str1 := to.src.clone()
|
||||||
start1, end1 := re.match_string(tmp_str1)
|
start1, end1 := re.match_string(tmp_str1)
|
||||||
if start1 != start || end1 != end {
|
if start1 != start || end1 != end {
|
||||||
eprintln("two run ERROR!!")
|
eprintln('two run ERROR!!')
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if debug { println("DONE!") }
|
if debug { println('DONE!') }
|
||||||
}
|
}
|
||||||
|
|
||||||
// test regex_base function
|
// test regex_base function
|
||||||
fn test_regex_func(){
|
fn test_regex_func() {
|
||||||
query := r"\d\dabcd"
|
query := r'\d\dabcd'
|
||||||
test_str := "78abcd"
|
test_str := '78abcd'
|
||||||
mut re, re_err, err_pos := regex.regex_base(query)
|
mut re, re_err, err_pos := regex.regex_base(query)
|
||||||
if re_err == regex.compile_ok {
|
if re_err == regex.compile_ok {
|
||||||
start, end := re.match_string(test_str)
|
start, end := re.match_string(test_str)
|
||||||
assert (start == 0) && (end == 6)
|
assert (start == 0) && (end == 6)
|
||||||
} else {
|
} else {
|
||||||
eprintln("Error in query string in pos ${err_pos}")
|
eprintln('Error in query string in pos $err_pos')
|
||||||
eprintln("Error: ${re.get_parse_error_string(re_err)}")
|
eprintln('Error: ${re.get_parse_error_string(re_err)}')
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn my_repl_1(re regex.RE, in_txt string, start int, end int) string {
|
fn my_repl_1(re regex.RE, in_txt string, start int, end int) string {
|
||||||
s0 := re.get_group_by_id(in_txt,0)
|
s0 := re.get_group_by_id(in_txt, 0)
|
||||||
println("[$start, $end] => ${s0}")
|
println('[$start, $end] => $s0')
|
||||||
return "a" + s0.to_upper()
|
return 'a' + s0.to_upper()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_regex_func_replace1(){
|
fn test_regex_func_replace1() {
|
||||||
txt := "abbabbbabbbbaabba"
|
txt := 'abbabbbabbbbaabba'
|
||||||
query := r"a(b+)"
|
query := r'a(b+)'
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
result := re.replace_by_fn(txt, my_repl_1)
|
result := re.replace_by_fn(txt, my_repl_1)
|
||||||
|
|
||||||
assert result == "aBBaBBBaBBBBaaBBa"
|
assert result == 'aBBaBBBaBBBBaaBBa'
|
||||||
}
|
}
|
||||||
|
|
||||||
fn my_repl(re regex.RE, in_txt string, start int, end int) string {
|
fn my_repl(re regex.RE, in_txt string, start int, end int) string {
|
||||||
s0 := re.get_group_by_id(in_txt,0)[0..1] + "X"
|
s0 := re.get_group_by_id(in_txt, 0)[0..1] + 'X'
|
||||||
s1 := re.get_group_by_id(in_txt,1)[0..1] + "X"
|
s1 := re.get_group_by_id(in_txt, 1)[0..1] + 'X'
|
||||||
s2 := re.get_group_by_id(in_txt,2)[0..1] + "X"
|
s2 := re.get_group_by_id(in_txt, 2)[0..1] + 'X'
|
||||||
return "${s0}${s1}${s2}"
|
return '$s0$s1$s2'
|
||||||
}
|
}
|
||||||
|
|
||||||
// test regex replace function
|
// test regex replace function
|
||||||
fn test_regex_func_replace(){
|
fn test_regex_func_replace() {
|
||||||
filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio."
|
filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio."
|
||||||
txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"'
|
txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"'
|
||||||
query := r'"(content":\s+")(.*)(, "channel_id")'
|
query := r'"(content":\s+")(.*)(, "channel_id")'
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
|
||||||
mut txt1 := ""
|
mut txt1 := ''
|
||||||
mut txt2 := ""
|
mut txt2 := ''
|
||||||
|
|
||||||
for _ in 0..3 {
|
for _ in 0 .. 3 {
|
||||||
rnd := int(10+rand.u32() % 20)
|
rnd := int(10 + rand.u32() % 20)
|
||||||
txt1 += txt + filler[0..rnd] + "\n"
|
txt1 += txt + filler[0..rnd] + '\n'
|
||||||
txt2 += "cXTX,X" + filler[0..rnd] + "\n"
|
txt2 += 'cXTX,X' + filler[0..rnd] + '\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
result := re.replace_by_fn(txt1, my_repl)
|
result := re.replace_by_fn(txt1, my_repl)
|
||||||
|
@ -642,42 +715,43 @@ fn test_regex_func_replace(){
|
||||||
assert result == txt2
|
assert result == txt2
|
||||||
}
|
}
|
||||||
|
|
||||||
fn rest_regex_replace_n(){
|
fn rest_regex_replace_n() {
|
||||||
s := "dario 1234 pepep 23454 pera"
|
s := 'dario 1234 pepep 23454 pera'
|
||||||
query := r"\d+"
|
query := r'\d+'
|
||||||
|
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
|
||||||
assert re.replace_n(s, "[repl]", 0) == "dario 1234 pepep 23454 pera"
|
assert re.replace_n(s, '[repl]', 0) == 'dario 1234 pepep 23454 pera'
|
||||||
assert re.replace_n(s, "[repl]", -1) == "dario 1234 pepep [repl] pera"
|
assert re.replace_n(s, '[repl]', -1) == 'dario 1234 pepep [repl] pera'
|
||||||
assert re.replace_n(s, "[repl]", 1) == "dario [repl] pepep 23454 pera"
|
assert re.replace_n(s, '[repl]', 1) == 'dario [repl] pepep 23454 pera'
|
||||||
assert re.replace_n(s, "[repl]", 2) == "dario [repl] pepep [repl] pera"
|
assert re.replace_n(s, '[repl]', 2) == 'dario [repl] pepep [repl] pera'
|
||||||
assert re.replace_n(s, "[repl]", -2) == "dario [repl] pepep [repl] pera"
|
assert re.replace_n(s, '[repl]', -2) == 'dario [repl] pepep [repl] pera'
|
||||||
assert re.replace_n(s, "[repl]", 3) == "dario [repl] pepep [repl] pera"
|
assert re.replace_n(s, '[repl]', 3) == 'dario [repl] pepep [repl] pera'
|
||||||
assert re.replace_n(s, "[repl]", -3) == "dario [repl] pepep [repl] pera"
|
assert re.replace_n(s, '[repl]', -3) == 'dario [repl] pepep [repl] pera'
|
||||||
|
|
||||||
//mut res := re.replace_n(s, "[repl]", -1)
|
// mut res := re.replace_n(s, "[repl]", -1)
|
||||||
//println("source: ${s}")
|
// println("source: ${s}")
|
||||||
//println("res : ${res}")
|
// println("res : ${res}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// test quantifier wrong sequences
|
// test quantifier wrong sequences
|
||||||
const(
|
const (
|
||||||
test_quantifier_sequences_list = [
|
test_quantifier_sequences_list = [
|
||||||
r'+{3}.*+{3}',
|
r'+{3}.*+{3}',
|
||||||
r'+{3}.*?{3}',
|
r'+{3}.*?{3}',
|
||||||
r'+{3}.**{3}',
|
r'+{3}.**{3}',
|
||||||
r'+{3}.*\+{3}*',
|
r'+{3}.*\+{3}*',
|
||||||
r'+{3}.*\+{3}+',
|
r'+{3}.*\+{3}+',
|
||||||
r'+{3}.*\+{3}??',
|
r'+{3}.*\+{3}??',
|
||||||
r'+{3}.*\+{3}{4}'
|
r'+{3}.*\+{3}{4}',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
fn test_quantifier_sequences(){
|
|
||||||
|
fn test_quantifier_sequences() {
|
||||||
for pattern in test_quantifier_sequences_list {
|
for pattern in test_quantifier_sequences_list {
|
||||||
re, re_err, err_pos := regex.regex_base(pattern)
|
re, re_err, err_pos := regex.regex_base(pattern)
|
||||||
if re_err != regex.err_syntax_error {
|
if re_err != regex.err_syntax_error {
|
||||||
eprintln("pattern: $pattern => $re_err")
|
eprintln('pattern: $pattern => $re_err')
|
||||||
}
|
}
|
||||||
assert re_err == regex.err_syntax_error
|
assert re_err == regex.err_syntax_error
|
||||||
}
|
}
|
||||||
|
@ -686,11 +760,12 @@ fn test_quantifier_sequences(){
|
||||||
// test group index in find
|
// test group index in find
|
||||||
struct Test_find_groups {
|
struct Test_find_groups {
|
||||||
src string
|
src string
|
||||||
q string
|
q string
|
||||||
s int // start index
|
s int // start index
|
||||||
e int // end index
|
e int // end index
|
||||||
res []int // groups indexes
|
res []int // groups indexes
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
find_groups_test_suite = [
|
find_groups_test_suite = [
|
||||||
Test_find_groups{
|
Test_find_groups{
|
||||||
|
@ -716,7 +791,8 @@ find_groups_test_suite = [
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
fn test_groups_in_find(){
|
|
||||||
|
fn test_groups_in_find() {
|
||||||
for test_obj in find_groups_test_suite {
|
for test_obj in find_groups_test_suite {
|
||||||
src_text := test_obj.src
|
src_text := test_obj.src
|
||||||
query := test_obj.q
|
query := test_obj.q
|
||||||
|
@ -744,12 +820,13 @@ fn test_groups_in_find(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const(
|
const (
|
||||||
err_query_list = [
|
err_query_list = [
|
||||||
r'([a]|[b])*'
|
r'([a]|[b])*',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
fn test_errors(){
|
|
||||||
|
fn test_errors() {
|
||||||
mut count := 0
|
mut count := 0
|
||||||
for query in err_query_list {
|
for query in err_query_list {
|
||||||
_, err, _ := regex.regex_base(query)
|
_, err, _ := regex.regex_base(query)
|
||||||
|
@ -760,65 +837,65 @@ fn test_errors(){
|
||||||
assert count == err_query_list.len
|
assert count == err_query_list.len
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn test_long_query() {
|
fn test_long_query() {
|
||||||
test_len := 32768
|
test_len := 32768
|
||||||
mut buf := strings.new_builder(test_len * 3)
|
mut buf := strings.new_builder(test_len * 3)
|
||||||
base_string := rand.string(test_len)
|
base_string := rand.string(test_len)
|
||||||
|
|
||||||
for c in base_string {
|
for c in base_string {
|
||||||
buf.write_u8(`(`)
|
buf.write_u8(`(`)
|
||||||
buf.write_u8(c)
|
buf.write_u8(c)
|
||||||
buf.write_u8(`)`)
|
buf.write_u8(`)`)
|
||||||
}
|
}
|
||||||
|
|
||||||
mut query := buf.str()
|
mut query := buf.str()
|
||||||
|
|
||||||
//println(base_string)
|
|
||||||
//println(buf.str())
|
|
||||||
|
|
||||||
// test 1
|
// println(base_string)
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
// println(buf.str())
|
||||||
mut start, mut end := re.match_string(base_string)
|
|
||||||
//println("$start, $end")
|
|
||||||
assert start >= 0 && end == base_string.len
|
|
||||||
|
|
||||||
// test 2
|
// test 1
|
||||||
buf.clear()
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
for c in base_string {
|
mut start, mut end := re.match_string(base_string)
|
||||||
buf.write_u8(`(`)
|
// println("$start, $end")
|
||||||
buf.write_u8(c)
|
assert start >= 0 && end == base_string.len
|
||||||
}
|
|
||||||
for _ in 0..base_string.len {
|
// test 2
|
||||||
buf.write_u8(`)`)
|
buf.clear()
|
||||||
}
|
for c in base_string {
|
||||||
query = buf.str()
|
buf.write_u8(`(`)
|
||||||
re = regex.regex_opt(query) or { panic(err) }
|
buf.write_u8(c)
|
||||||
start, end = re.match_string(base_string)
|
}
|
||||||
//println("$start, $end")
|
for _ in 0 .. base_string.len {
|
||||||
assert start >= 0 && end == base_string.len
|
buf.write_u8(`)`)
|
||||||
|
}
|
||||||
|
query = buf.str()
|
||||||
|
re = regex.regex_opt(query) or { panic(err) }
|
||||||
|
start, end = re.match_string(base_string)
|
||||||
|
// println("$start, $end")
|
||||||
|
assert start >= 0 && end == base_string.len
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct Test_negation_group {
|
struct Test_negation_group {
|
||||||
src string
|
src string
|
||||||
res bool
|
res bool
|
||||||
}
|
}
|
||||||
const(
|
|
||||||
|
const (
|
||||||
negation_groups = [
|
negation_groups = [
|
||||||
Test_negation_group{'automobile',false},
|
Test_negation_group{'automobile', false},
|
||||||
Test_negation_group{'botomobile',true},
|
Test_negation_group{'botomobile', true},
|
||||||
Test_negation_group{'auto_caravan',false},
|
Test_negation_group{'auto_caravan', false},
|
||||||
Test_negation_group{'moto_mobile',true},
|
Test_negation_group{'moto_mobile', true},
|
||||||
Test_negation_group{'pippole',true},
|
Test_negation_group{'pippole', true},
|
||||||
Test_negation_group{'boring test',false},
|
Test_negation_group{'boring test', false},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
fn test_negation_groups() {
|
fn test_negation_groups() {
|
||||||
mut query := r"(?!auto)\w+le"
|
mut query := r'(?!auto)\w+le'
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
for test in negation_groups {
|
for test in negation_groups {
|
||||||
start, end := re.match_string(test.src)
|
start, end := re.match_string(test.src)
|
||||||
assert (start >= 0) == test.res
|
assert (start >= 0) == test.res
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -293,6 +293,38 @@ pub fn (mut re RE) find_all(in_txt string) []int {
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// split returns the sections of string around the regex
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
// ```v
|
||||||
|
// blurb := 'foobar boo steelbar toolbox foot tooooot'
|
||||||
|
// mut re := regex.regex_opt('f|t[eo]+')?
|
||||||
|
// res := re.split(blurb) // ['bar boo s', 'lbar ', 'lbox ', 't ', 't']
|
||||||
|
// ```
|
||||||
|
pub fn (mut re RE) split(in_txt string) []string {
|
||||||
|
pos := re.find_all(in_txt)
|
||||||
|
|
||||||
|
mut sections := []string{cap: pos.len / 2 + 1}
|
||||||
|
|
||||||
|
if pos.len == 0 {
|
||||||
|
return [in_txt]
|
||||||
|
}
|
||||||
|
for i := 0; i < pos.len; i += 2 {
|
||||||
|
if pos[i] == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if i == 0 {
|
||||||
|
sections << in_txt[..pos[i]]
|
||||||
|
} else {
|
||||||
|
sections << in_txt[pos[i - 1]..pos[i]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pos[pos.len - 1] != in_txt.len {
|
||||||
|
sections << in_txt[pos[pos.len - 1]..]
|
||||||
|
}
|
||||||
|
return sections
|
||||||
|
}
|
||||||
|
|
||||||
// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
|
// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
|
||||||
[direct_array_access]
|
[direct_array_access]
|
||||||
pub fn (mut re RE) find_all_str(in_txt string) []string {
|
pub fn (mut re RE) find_all_str(in_txt string) []string {
|
||||||
|
|
Loading…
Reference in New Issue