vlib.regex module in pure V
parent
6733b12ec3
commit
8ea0c08a38
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,157 @@
|
|||
import regex
|
||||
|
||||
struct TestItem {
|
||||
src string
|
||||
q string
|
||||
s int = 0
|
||||
e int = 0
|
||||
}
|
||||
|
||||
const(
|
||||
match_test_suite = [
|
||||
|
||||
// positive
|
||||
TestItem{"this is a good.",r"this",0,4},
|
||||
TestItem{"this is a good.",r"good",10,14},
|
||||
TestItem{"this is a good.",r"go+d",10,14},
|
||||
TestItem{"this is a good.",r"g[oae]+d",10,14},
|
||||
TestItem{"this is a goed.",r"g[oae]+d",10,14},
|
||||
TestItem{"this is a good.",r"g[oae]*d",10,14},
|
||||
TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16},
|
||||
TestItem{"this is a good.",r"this (\w+) a",0,9},
|
||||
TestItem{"this is a good.",r"this( \w+){2} g",0,11},
|
||||
TestItem{"this is a good.",r"( ?\w+){,1}",0,4},
|
||||
TestItem{"this is a good.",r"( ?\w+)+",0,14},
|
||||
TestItem{"this is a good.",r"this( \w+)+",0,14},
|
||||
TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7},
|
||||
TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9},
|
||||
TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14},
|
||||
TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21},
|
||||
TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9},
|
||||
TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9},
|
||||
TestItem{"this is a good sample.",r".*i(\w)+",0,4},
|
||||
TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11},
|
||||
TestItem{"this these those ",r"(th[eio]se? ?)+",0,17},
|
||||
TestItem{"this these those ",r"(th[eio]se? )+",0,17},
|
||||
TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17},
|
||||
TestItem{"soday,this,these,those. over",r"(th[eio]se?[,. ])+",6,23},
|
||||
TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23},
|
||||
TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29},
|
||||
TestItem{"cpapaz",r"(c(pa)+z)",0,6},
|
||||
TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16},
|
||||
TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18},
|
||||
TestItem{"test@post.pip.com",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17},
|
||||
TestItem{"test1@post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",0,18},
|
||||
TestItem{"pippo@pera.com ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14},
|
||||
TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4},
|
||||
TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5},
|
||||
TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3},
|
||||
TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17},
|
||||
TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5},
|
||||
TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21},
|
||||
TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25},
|
||||
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21},
|
||||
TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
||||
TestItem{"cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,17},
|
||||
TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
||||
TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
||||
TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
||||
TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20},
|
||||
TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14},
|
||||
TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18},
|
||||
TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18},
|
||||
|
||||
// negative
|
||||
TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
|
||||
TestItem{"this is a good.",r"thes",-1,0},
|
||||
TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,0},
|
||||
TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0},
|
||||
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
|
||||
TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
|
||||
TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
|
||||
|
||||
|
||||
// check unicode
|
||||
TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34},
|
||||
TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23},
|
||||
]
|
||||
)
|
||||
|
||||
fn test_regex(){
|
||||
for c,to in match_test_suite {
|
||||
// debug print
|
||||
//println("#$c [$to.src] q[$to.q] $to.s")
|
||||
|
||||
// test the find
|
||||
if to.s > 0 {
|
||||
mut re, re_err, err_pos := regex.regex(to.q)
|
||||
if re_err == regex.COMPILE_OK {
|
||||
//q_str := re.get_query()
|
||||
//println("Query: $q_str")
|
||||
start,end := re.find(to.src)
|
||||
|
||||
if start != to.s || end != to.e {
|
||||
err_str := re.get_parse_error_string(start)
|
||||
println("ERROR : $err_str")
|
||||
assert false
|
||||
} else {
|
||||
//tmp_str := text[start..end]
|
||||
//println("found in [$start, $end] => [$tmp_str]")
|
||||
assert true
|
||||
}
|
||||
|
||||
} else {
|
||||
println("query: $to.q")
|
||||
lc := "-".repeat(err_pos-1)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
assert false
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// test the match
|
||||
mut re := regex.new_regex()
|
||||
//re.debug = true
|
||||
|
||||
re_err,err_pos := re.compile(to.q)
|
||||
if re_err == regex.COMPILE_OK {
|
||||
//println("#$c [$to.src] q[$to.q]")
|
||||
start, end := re.match_string(to.src)
|
||||
|
||||
mut tmp_str := ""
|
||||
if start >= 0 && end > start{
|
||||
tmp_str = to.src[start..end]
|
||||
}
|
||||
|
||||
if start != to.s || end != to.e {
|
||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||
println("ERROR!")
|
||||
C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||
assert false
|
||||
break
|
||||
} else {
|
||||
assert true
|
||||
}
|
||||
|
||||
// rerun to test consistency
|
||||
tmp_str1 := to.src.clone()
|
||||
start1, end1 := re.match_string(tmp_str1)
|
||||
if start1 != start || end1 != end {
|
||||
println("two run ERROR!!")
|
||||
assert false
|
||||
break
|
||||
}
|
||||
|
||||
} else {
|
||||
println("query: $to.q")
|
||||
lc := "-".repeat(err_pos-1)
|
||||
println("err : $lc")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
assert false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue