v/vlib/regex/regex_test.v

353 lines
9.7 KiB
V
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import regex
/******************************************************************************
*
* Test section
*
******************************************************************************/
struct TestItem {
src string
q string
s int = 0
e int = 0
}
const(
match_test_suite = [
// positive
TestItem{"this is a good.",r"this",0,4},
TestItem{"this is a good.",r"good",10,14},
TestItem{"this is a good.",r"go+d",10,14},
TestItem{"this is a good.",r"g[oae]+d",10,14},
TestItem{"this is a goed.",r"g[oae]+d",10,14},
TestItem{"this is a good.",r"g[oae]*d",10,14},
TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16},
TestItem{"this is a good.",r"this (\w+) a",0,9},
TestItem{"this is a good.",r"this( \w+){2} g",0,11},
TestItem{"this is a good.",r"( ?\w+){,1}",0,4},
TestItem{"this is a good.",r"( ?\w+)+",0,14},
TestItem{"this is a good.",r"this( \w+)+",0,14},
TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7},
TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9},
TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14},
TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21},
TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9},
TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9},
TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11},
TestItem{"this these those ",r"(th[eio]se? ?)+",0,17},
TestItem{"this these those ",r"(th[eio]se? )+",0,17},
TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17},
TestItem{"soday,this,these,those. over",r"(th[eio]se?[,. ])+",6,23},
TestItem{"cpapaz",r"(c(pa)+z)",0,6},
TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16},
TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18},
TestItem{"test@post.pip.com",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17},
TestItem{"test1@post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",0,18},
TestItem{"pippo@pera.com ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14},
TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4},
TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5},
TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3},
TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17},
TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5},
TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21},
TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25},
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21},
TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20},
TestItem{"this is a good sample.",r".*i(\w)+",0,4},
TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23},
TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29},
TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
TestItem{"cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,17},
TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14},
TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18},
TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18},
TestItem{"abbb",r"ab{2,3}?",0,3},
TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11},
TestItem{" abb",r"\s(.*)",0,4},
TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29}
// negative
TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
TestItem{"this is a good.",r"thes",-1,0},
TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,0},
TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0},
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
TestItem{"/home/us_er/pippo/info-01.jpeg", r"(/?[-\w_]+)*\.txt$",-1,0}
// check unicode
TestItem{"this is a test",r".*a [-Ⅵ ]+",0,34},
TestItem{"123 test",r"[-\s]+",3,23},
]
)
struct TestItemFa {
src string
q string
r []int
}
const (
match_test_suite_fa = [
// find_all tests
TestItemFa{
"oggi pippo è andato a casa di pluto ed ha trovato pippo",
r"p[iplut]+o",
[5, 10, 31, 36, 51, 56]
},
TestItemFa{
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
r"(pi?(ba)+o)",
[5, 10, 31, 39, 54, 65]
},
]
)
struct TestItemRe {
src string
q string
rep string
r string
}
const (
match_test_suite_re = [
// replace tests
TestItemRe{
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
r"(pi?(ba)+o)",
"CIAO",
"oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
},
TestItemRe{
"Today is a good day and tomorrow will be for sure.",
r"[Tt]o\w+",
"CIAO",
"CIAO is a good day and CIAO will be for sure."
}
]
)
struct TestItemCGroup {
src string
q string
s int = 0
e int = 0
cg []int
cgn map[string]int
}
const (
cgroups_test_suite = [
TestItemCGroup{
"http://www.ciao.mondo/hello/pippo12_/pera.html",
r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+",0,46,
[8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46],
{'format':0,'token':1}
},
TestItemCGroup{
"http://www.ciao.mondo/hello/pippo12_/pera.html",
r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
[8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46],
{'format':0,'token':1}
},
TestItemCGroup{
"http://www.ciao.mondo/hello/pippo12_/pera.html",
r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+.)+",0,46,
[8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46],
{'format':0}
},
]
)
fn test_regex(){
// check capturing groups
for c,to in cgroups_test_suite {
// debug print
//println("#$c [$to.src] q[$to.q] ($to.s, $to.e)")
mut re, re_err, _ := regex.regex(to.q)
re.group_csave = [-1].repeat(3*20+1)
if re_err == regex.COMPILE_OK {
start, end := re.match_string(to.src)
mut tmp_str := ""
if start >= 0 && end > start{
tmp_str = to.src[start..end]
}
if start != to.s || end != to.e {
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
println("ERROR!")
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
assert false
break
}
// check cgroups
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
println("Capturing group len error!")
assert false
}
// check captured groups
mut ln := re.group_csave[0]*3
for ln > 0 {
if re.group_csave[ln] != to.cg[ln] {
assert false
}
ln--
}
// check named captured groups
for k in to.cgn.keys() {
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
println("Named capturing group error! [$k]")
assert false
}
}
}
}
// check find_all
for _,to in match_test_suite_fa{
// debug print
//println("#$c [$to.src] q[$to.q] $to.r")
mut re, re_err, err_pos := regex.regex(to.q)
if re_err == regex.COMPILE_OK {
res := re.find_all(to.src)
if res.len != to.r.len {
println("ERROR: find_all, array of different size.")
assert false
}
for c1,i in res {
if i != to.r[c1] {
println("ERROR: find_all, different indexes.")
assert false
}
}
} else {
println("query: $to.q")
lc := "-".repeat(err_pos-1)
println("err : $lc^")
err_str := re.get_parse_error_string(re_err)
println("ERROR: $err_str")
assert false
}
}
// check replace
for _,to in match_test_suite_re{
// debug print
//println("#$c [$to.src] q[$to.q] $to.r")
mut re, re_err, err_pos := regex.regex(to.q)
if re_err == regex.COMPILE_OK {
res := re.replace(to.src,to.rep)
if res != to.r {
println("ERROR: replace.")
assert false
}
} else {
println("query: $to.q")
lc := "-".repeat(err_pos-1)
println("err : $lc^")
err_str := re.get_parse_error_string(re_err)
println("ERROR: $err_str")
assert false
}
}
// check match and find
for c,to in match_test_suite {
// debug print
//println("#$c [$to.src] q[$to.q] $to.s")
// test the find
if to.s > 0 {
mut re, re_err, err_pos := regex.regex(to.q)
if re_err == regex.COMPILE_OK {
//q_str := re.get_query()
//println("Query: $q_str")
start,end := re.find(to.src)
if start != to.s || end != to.e {
err_str := re.get_parse_error_string(start)
println("ERROR : $err_str")
assert false
} else {
//tmp_str := text[start..end]
//println("found in [$start, $end] => [$tmp_str]")
assert true
}
} else {
println("query: $to.q")
lc := "-".repeat(err_pos-1)
println("err : $lc^")
err_str := re.get_parse_error_string(re_err)
println("ERROR: $err_str")
assert false
}
continue
}
// test the match
mut re := regex.new_regex()
//re.debug = true
re_err,err_pos := re.compile(to.q)
if re_err == regex.COMPILE_OK {
//println("#$c [$to.src] q[$to.q]")
start, end := re.match_string(to.src)
mut tmp_str := ""
if start >= 0 && end > start{
tmp_str = to.src[start..end]
}
if start != to.s || end != to.e {
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
println("ERROR!")
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
assert false
break
}
// rerun to test consistency
tmp_str1 := to.src.clone()
start1, end1 := re.match_string(tmp_str1)
if start1 != start || end1 != end {
println("two run ERROR!!")
assert false
break
}
} else {
println("query: $to.q")
lc := "-".repeat(err_pos-1)
println("err : $lc^")
err_str := re.get_parse_error_string(re_err)
println("ERROR: $err_str")
assert false
break
}
}
}