2020-01-13 13:30:41 +01:00
|
|
|
|
import regex
|
2021-03-20 00:54:12 +01:00
|
|
|
|
import rand
|
2021-12-01 07:38:50 +01:00
|
|
|
|
import strings
|
2020-01-13 13:30:41 +01:00
|
|
|
|
|
2020-01-14 02:45:08 +01:00
|
|
|
|
/******************************************************************************
|
|
|
|
|
*
|
|
|
|
|
* Test section
|
|
|
|
|
*
|
|
|
|
|
******************************************************************************/
|
2020-01-13 13:30:41 +01:00
|
|
|
|
struct TestItem {
|
|
|
|
|
src string
|
2022-05-08 14:21:39 +02:00
|
|
|
|
q string
|
|
|
|
|
s int
|
|
|
|
|
e int
|
2020-01-13 13:30:41 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const(
|
|
|
|
|
match_test_suite = [
|
2021-03-27 17:15:06 +01:00
|
|
|
|
// minus in CC
|
|
|
|
|
TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,4},
|
2021-03-27 17:15:06 +01:00
|
|
|
|
TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8},
|
2021-08-14 07:47:12 +02:00
|
|
|
|
TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,16},
|
2021-03-27 17:15:06 +01:00
|
|
|
|
TestItem{"abcdefGHK",r"[a-f]+\A+",0,9},
|
|
|
|
|
TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11},
|
|
|
|
|
|
2020-12-05 01:51:48 +01:00
|
|
|
|
// base OR
|
|
|
|
|
TestItem{"a",r"a|b",0,1},
|
|
|
|
|
TestItem{"a",r"b|a",0,1},
|
|
|
|
|
TestItem{"b",r"a|b",0,1},
|
|
|
|
|
TestItem{"b",r"b|a",0,1},
|
|
|
|
|
TestItem{"c",r"b|a",-1,0},
|
2020-01-13 13:30:41 +01:00
|
|
|
|
|
2020-12-18 05:57:31 +01:00
|
|
|
|
// test base
|
|
|
|
|
TestItem{"[ciao]",r"(.)ciao(.)",0,6},
|
|
|
|
|
TestItem{"[ciao] da me",r"(.)ciao(.)",0,6},
|
|
|
|
|
|
2020-01-13 13:30:41 +01:00
|
|
|
|
// positive
|
|
|
|
|
TestItem{"this is a good.",r"this",0,4},
|
|
|
|
|
TestItem{"this is a good.",r"good",10,14},
|
|
|
|
|
TestItem{"this is a good.",r"go+d",10,14},
|
|
|
|
|
TestItem{"this is a good.",r"g[oae]+d",10,14},
|
|
|
|
|
TestItem{"this is a goed.",r"g[oae]+d",10,14},
|
|
|
|
|
TestItem{"this is a good.",r"g[oae]*d",10,14},
|
|
|
|
|
TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16},
|
|
|
|
|
TestItem{"this is a good.",r"this (\w+) a",0,9},
|
|
|
|
|
TestItem{"this is a good.",r"this( \w+){2} g",0,11},
|
|
|
|
|
TestItem{"this is a good.",r"( ?\w+){,1}",0,4},
|
|
|
|
|
TestItem{"this is a good.",r"( ?\w+)+",0,14},
|
|
|
|
|
TestItem{"this is a good.",r"this( \w+)+",0,14},
|
|
|
|
|
TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7},
|
|
|
|
|
TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9},
|
|
|
|
|
TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14},
|
|
|
|
|
TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21},
|
|
|
|
|
TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9},
|
2020-05-21 23:33:51 +02:00
|
|
|
|
TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9},
|
2020-01-13 13:30:41 +01:00
|
|
|
|
TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11},
|
|
|
|
|
TestItem{"this these those ",r"(th[eio]se? ?)+",0,17},
|
|
|
|
|
TestItem{"this these those ",r"(th[eio]se? )+",0,17},
|
|
|
|
|
TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17},
|
2020-12-05 01:51:48 +01:00
|
|
|
|
TestItem{"soday,this,these,those. over",r".+(th[eio]se?[,. ])+",0,23},
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2020-01-13 13:30:41 +01:00
|
|
|
|
TestItem{"cpapaz",r"(c(pa)+z)",0,6},
|
|
|
|
|
TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16},
|
|
|
|
|
TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18},
|
|
|
|
|
TestItem{"test@post.pip.com",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17},
|
|
|
|
|
TestItem{"test1@post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",0,18},
|
|
|
|
|
TestItem{"pippo@pera.com ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14},
|
|
|
|
|
TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4},
|
|
|
|
|
TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5},
|
|
|
|
|
TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3},
|
|
|
|
|
TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17},
|
|
|
|
|
TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5},
|
|
|
|
|
TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21},
|
|
|
|
|
TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25},
|
|
|
|
|
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21},
|
2020-01-18 07:38:00 +01:00
|
|
|
|
TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20},
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2020-01-18 07:38:00 +01:00
|
|
|
|
TestItem{"this is a good sample.",r".*i(\w)+",0,4},
|
|
|
|
|
TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23},
|
|
|
|
|
TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29},
|
2020-01-13 13:30:41 +01:00
|
|
|
|
TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
2020-12-05 01:51:48 +01:00
|
|
|
|
TestItem{"cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",0,17},
|
2020-01-13 13:30:41 +01:00
|
|
|
|
TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
|
|
|
|
TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
|
|
|
|
TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2020-01-13 13:30:41 +01:00
|
|
|
|
TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14},
|
|
|
|
|
TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18},
|
|
|
|
|
TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18},
|
2020-01-16 00:39:33 +01:00
|
|
|
|
TestItem{"abbb",r"ab{2,3}?",0,3},
|
2020-01-18 07:38:00 +01:00
|
|
|
|
TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11},
|
|
|
|
|
TestItem{" abb",r"\s(.*)",0,4},
|
2020-01-13 13:30:41 +01:00
|
|
|
|
|
2020-05-16 17:11:13 +02:00
|
|
|
|
TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29}
|
|
|
|
|
|
2020-01-13 13:30:41 +01:00
|
|
|
|
// negative
|
|
|
|
|
TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
TestItem{"this is a good.",r"thes",-1,2},
|
|
|
|
|
TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,9},
|
2020-01-13 13:30:41 +01:00
|
|
|
|
TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0},
|
|
|
|
|
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
|
|
|
|
|
TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
|
|
|
|
|
TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
TestItem{"/home/us_er/pippo/info-01.jpeg", r"(/?[-\w_]+)*\.txt$",-1,26}
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2020-01-13 13:30:41 +01:00
|
|
|
|
// check unicode
|
|
|
|
|
TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34},
|
|
|
|
|
TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23},
|
2020-12-05 01:51:48 +01:00
|
|
|
|
|
|
|
|
|
// new edge cases
|
2022-01-01 08:21:27 +01:00
|
|
|
|
TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",-1,8},
|
2020-12-05 01:51:48 +01:00
|
|
|
|
TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",0,8},
|
|
|
|
|
TestItem{"123456789", r"^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$",0,9}
|
|
|
|
|
TestItem{"12345678", r"^\d{8}$",0,8},
|
|
|
|
|
TestItem{"12345678", r"^\d{7}$",-1,0},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
TestItem{"12345678", r"^\d{9}$",-1,8},
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
2020-12-05 01:51:48 +01:00
|
|
|
|
TestItem{"eth", r"(oth)|(eth)",0,3},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
TestItem{"et", r"(oth)|(eth)",-1,2},
|
|
|
|
|
TestItem{"et", r".*(oth)|(eth)",-1,2},
|
|
|
|
|
TestItem{"peoth", r".*(ith)|(eth)",-1,5},
|
2020-12-05 01:51:48 +01:00
|
|
|
|
|
|
|
|
|
TestItem{"poth", r"(eth)|(oth)",1,4},
|
|
|
|
|
TestItem{"poth", r"(oth)|(eth)",1,4},
|
|
|
|
|
TestItem{"poth", r".(oth)|(eth)$",0,4},
|
|
|
|
|
TestItem{"poth", r"^.(oth)|(eth)$",0,4},
|
|
|
|
|
TestItem{"poth", r"^\w+$",0,4},
|
2020-12-13 02:04:53 +01:00
|
|
|
|
|
|
|
|
|
// test dot_char
|
|
|
|
|
TestItem{"8-11 l: qllllqllklhlvtl", r"^(\d+)-(\d+) ([a-z]): (.*)$",0,23},
|
|
|
|
|
TestItem{"accccb deer", r"^a(.*)b d(.+)r",0,11},
|
|
|
|
|
TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11},
|
|
|
|
|
TestItem{"accccb deer", r"^(.*)$",0,11},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,11},
|
2020-12-28 11:43:03 +01:00
|
|
|
|
TestItem{"##.#....#.##.####...#.##", r".{18}[.#]",0,19},
|
2021-01-03 01:33:34 +01:00
|
|
|
|
TestItem{"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", r'.*#[.#]{4}##[.#]{4}##[.#]{4}###',0,49},
|
2020-12-15 16:04:06 +01:00
|
|
|
|
|
|
|
|
|
// test bcksls chars
|
|
|
|
|
TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
|
|
|
|
|
TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
|
2021-08-14 07:47:12 +02:00
|
|
|
|
TestItem{"p_p", r"\w+",0,3},
|
|
|
|
|
TestItem{"p_é", r"\w+",0,2},
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
2020-12-19 00:32:57 +01:00
|
|
|
|
// Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()])
|
|
|
|
|
TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},
|
|
|
|
|
TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0},
|
|
|
|
|
TestItem{"*1*1", r"(?:^|[*])(\d+)([*])(\d+)",0,4},
|
|
|
|
|
TestItem{"*1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
|
|
|
|
|
TestItem{")1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
|
|
|
|
|
TestItem{"(1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
|
|
|
|
|
TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5},
|
|
|
|
|
TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
|
|
|
|
TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
|
2020-12-21 05:36:14 +01:00
|
|
|
|
|
|
|
|
|
// particular groups
|
|
|
|
|
TestItem{"ababababac", r"ab(.*)(ac)",0,10},
|
2020-12-27 08:16:00 +01:00
|
|
|
|
|
2021-09-06 02:11:38 +02:00
|
|
|
|
// backslash on finish string
|
|
|
|
|
TestItem{"a", r"\S+",0,1},
|
|
|
|
|
TestItem{"aaaa", r"\S+",0,4},
|
|
|
|
|
TestItem{"aaaa ", r"\S+",0,4},
|
2022-02-01 12:49:37 +01:00
|
|
|
|
|
|
|
|
|
// multiple dot char
|
|
|
|
|
TestItem{"aba", r"a*(b*)*a",0,3},
|
|
|
|
|
TestItem{"/*x*/", r"/\**(.*)\**/",0,5},
|
|
|
|
|
TestItem{"/*x*/", r"/*(.*)*/",0,5},
|
2020-01-13 13:30:41 +01:00
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2020-01-14 02:45:08 +01:00
|
|
|
|
struct TestItemRe {
|
|
|
|
|
src string
|
2022-05-08 14:21:39 +02:00
|
|
|
|
q string
|
2020-01-14 02:45:08 +01:00
|
|
|
|
rep string
|
2022-05-08 14:21:39 +02:00
|
|
|
|
r string
|
2020-01-14 02:45:08 +01:00
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
2020-01-14 02:45:08 +01:00
|
|
|
|
const (
|
2020-12-24 06:27:46 +01:00
|
|
|
|
match_test_suite_replace = [
|
2020-01-14 02:45:08 +01:00
|
|
|
|
// replace tests
|
|
|
|
|
TestItemRe{
|
|
|
|
|
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
|
|
|
|
r"(pi?(ba)+o)",
|
|
|
|
|
"CIAO",
|
|
|
|
|
"oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
|
|
|
|
|
},
|
|
|
|
|
TestItemRe{
|
|
|
|
|
"Today is a good day and tomorrow will be for sure.",
|
|
|
|
|
r"[Tt]o\w+",
|
|
|
|
|
"CIAO",
|
|
|
|
|
"CIAO is a good day and CIAO will be for sure."
|
2021-04-03 22:16:56 +02:00
|
|
|
|
},
|
|
|
|
|
TestItemRe{
|
|
|
|
|
"Today is a good day and tomorrow will be for sure.",
|
|
|
|
|
r"(a\w) ",
|
|
|
|
|
r"[\0] ",
|
|
|
|
|
"Tod[ay] is a good d[ay] and tomorrow will be for sure."
|
|
|
|
|
},
|
|
|
|
|
TestItemRe{
|
|
|
|
|
"Today is a good day and tomorrow will be for sure.",
|
|
|
|
|
r"(a\w) ",
|
|
|
|
|
r"[\0_\0] ",
|
|
|
|
|
"Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure."
|
|
|
|
|
},
|
|
|
|
|
TestItemRe{
|
|
|
|
|
"Today is a good day and tomorrow will be for sure.",
|
|
|
|
|
r"(a\w) ",
|
|
|
|
|
r"[\0\1] ",
|
|
|
|
|
"Tod[ay] is a good d[ay] and tomorrow will be for sure."
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
match_test_suite_replace_simple = [
|
|
|
|
|
// replace tests
|
|
|
|
|
TestItemRe{
|
|
|
|
|
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
|
|
|
|
r"(pi?(ba)+o)",
|
|
|
|
|
"CIAO",
|
|
|
|
|
"oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
|
|
|
|
|
},
|
|
|
|
|
TestItemRe{
|
|
|
|
|
"Today is a good day and tomorrow will be for sure.",
|
|
|
|
|
r"[Tt]o\w+",
|
|
|
|
|
"CIAO",
|
|
|
|
|
"CIAO is a good day and CIAO will be for sure."
|
|
|
|
|
},
|
2020-01-14 02:45:08 +01:00
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2020-01-28 20:34:11 +01:00
|
|
|
|
struct TestItemCGroup {
|
|
|
|
|
src string
|
2022-05-08 14:21:39 +02:00
|
|
|
|
q string
|
|
|
|
|
s int
|
|
|
|
|
e int
|
|
|
|
|
cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ]
|
2020-01-28 20:34:11 +01:00
|
|
|
|
cgn map[string]int
|
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
2020-01-28 20:34:11 +01:00
|
|
|
|
const (
|
|
|
|
|
cgroups_test_suite = [
|
|
|
|
|
TestItemCGroup{
|
|
|
|
|
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
2020-12-05 01:51:48 +01:00
|
|
|
|
r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+[\.|/])+",0,42,
|
|
|
|
|
[7, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42],
|
2020-05-24 21:07:32 +02:00
|
|
|
|
{'format':int(0),'token':1}
|
2020-01-28 20:34:11 +01:00
|
|
|
|
},
|
|
|
|
|
TestItemCGroup{
|
|
|
|
|
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
|
|
|
|
r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
|
2020-12-18 05:57:31 +01:00
|
|
|
|
[8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46]
|
|
|
|
|
//[8, 0, 0, 4, 1, 7, 10, 1, 11, 15, 1, 16, 21, 1, 22, 27, 1, 28, 36, 1, 37, 41, 1, 42, 46],
|
2020-05-24 21:07:32 +02:00
|
|
|
|
{'format':int(0),'token':1}
|
2020-01-28 20:34:11 +01:00
|
|
|
|
},
|
|
|
|
|
TestItemCGroup{
|
|
|
|
|
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
2020-12-05 01:51:48 +01:00
|
|
|
|
r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+\.)+",0,16,
|
|
|
|
|
[3, 0, 0, 4, 1, 7, 11, 1, 11, 16],
|
2020-05-24 21:07:32 +02:00
|
|
|
|
{'format':int(0)}
|
2020-01-28 20:34:11 +01:00
|
|
|
|
},
|
2020-12-08 19:38:25 +01:00
|
|
|
|
TestItemCGroup{
|
|
|
|
|
"acc +13 pippo",
|
|
|
|
|
r"(\w+)\s(.)([0-9]+) \w+",0,13,
|
|
|
|
|
[0, 3, 4, 5, 5, 7],
|
|
|
|
|
map[string]int{}
|
|
|
|
|
},
|
|
|
|
|
TestItemCGroup{
|
|
|
|
|
"acc +13",
|
|
|
|
|
r"(\w+)\s(.)([0-9]+)",0,7,
|
|
|
|
|
[0, 3, 4, 5, 5, 7],
|
|
|
|
|
map[string]int{}
|
|
|
|
|
},
|
2020-12-21 05:36:14 +01:00
|
|
|
|
TestItemCGroup{
|
|
|
|
|
"ababababac",
|
|
|
|
|
r"ab(.*)(ac)",0,10,
|
|
|
|
|
[2, 8, 8, 10],
|
|
|
|
|
map[string]int{}
|
|
|
|
|
},
|
2020-01-28 20:34:11 +01:00
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2020-12-24 06:27:46 +01:00
|
|
|
|
struct Test_find_all {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
src string
|
|
|
|
|
q string
|
|
|
|
|
res []int // [0,4,5,6...]
|
2020-12-24 06:27:46 +01:00
|
|
|
|
res_str []string // ['find0','find1'...]
|
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
2020-12-24 06:27:46 +01:00
|
|
|
|
const (
|
|
|
|
|
find_all_test_suite = [
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"abcd 1234 efgh 1234 ghkl1234 ab34546df",
|
|
|
|
|
r"\d+",
|
|
|
|
|
[5, 9, 15, 19, 24, 28, 31, 36],
|
|
|
|
|
['1234', '1234', '1234', '34546']
|
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"abcd 1234 efgh 1234 ghkl1234 ab34546df",
|
|
|
|
|
r"\a+",
|
|
|
|
|
[0, 4, 10, 14, 20, 24, 29, 31, 36, 38],
|
|
|
|
|
['abcd', 'efgh', 'ghkl', 'ab', 'df']
|
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"oggi pippo è andato a casa di pluto ed ha trovato pippo",
|
|
|
|
|
r"p[iplut]+o",
|
|
|
|
|
[5, 10, 31, 36, 51, 56],
|
|
|
|
|
['pippo', 'pluto', 'pippo']
|
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
|
|
|
|
|
r"(pi?(ba)+o)",
|
|
|
|
|
[5, 10, 31, 39, 54, 65],
|
|
|
|
|
['pibao', 'pbababao', 'pibabababao']
|
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"Today is a good day and tomorrow will be for sure.",
|
|
|
|
|
r"[Tt]o\w+",
|
|
|
|
|
[0, 5, 24, 32],
|
|
|
|
|
['Today', 'tomorrow']
|
2020-12-27 08:16:00 +01:00
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"pera\nurl = https://github.com/dario/pig.html\npippo",
|
|
|
|
|
r"url *= *https?://[\w./]+",
|
|
|
|
|
[5, 44],
|
|
|
|
|
['url = https://github.com/dario/pig.html']
|
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"pera\nurl = https://github.com/dario/pig.html\npippo",
|
|
|
|
|
r"url *= *https?://.*"+'\n',
|
|
|
|
|
[5, 45],
|
|
|
|
|
['url = https://github.com/dario/pig.html\n']
|
2021-01-03 16:59:00 +01:00
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
|
|
|
|
|
r"#[.#]{4}##[.#]{4}##[.#]{4}###",
|
|
|
|
|
[29, 49],
|
|
|
|
|
['#....###...##...####']
|
|
|
|
|
},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
Test_find_all{
|
2021-01-03 16:59:00 +01:00
|
|
|
|
"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
|
|
|
|
|
r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
|
|
|
|
|
[0, 49],
|
|
|
|
|
['#.#......##.#..#..##........##....###...##...####']
|
2021-07-14 21:20:05 +02:00
|
|
|
|
},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
Test_find_all{
|
2021-07-14 21:20:05 +02:00
|
|
|
|
"1234 Aa dddd Aaf 12334 Aa opopo Aaf",
|
|
|
|
|
r"Aa.+Aaf",
|
|
|
|
|
[5, 16, 23, 35],
|
|
|
|
|
['Aa dddd Aaf', 'Aa opopo Aaf']
|
|
|
|
|
},
|
2022-01-01 08:21:27 +01:00
|
|
|
|
Test_find_all{
|
2021-07-14 21:20:05 +02:00
|
|
|
|
"@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo",
|
|
|
|
|
r"@for.+@endfor",
|
|
|
|
|
[0, 22, 23, 50, 63, 80, 89, 117],
|
|
|
|
|
['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
|
2021-09-05 03:48:59 +02:00
|
|
|
|
},
|
|
|
|
|
Test_find_all{
|
|
|
|
|
"+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++",
|
|
|
|
|
r"\+{3}.*\+{3}",
|
|
|
|
|
[0, 11, 18, 32, 33, 44],
|
|
|
|
|
['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++']
|
2020-12-24 06:27:46 +01:00
|
|
|
|
}
|
2021-07-14 21:20:05 +02:00
|
|
|
|
|
2020-12-24 06:27:46 +01:00
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
struct Test_split {
|
|
|
|
|
src string
|
|
|
|
|
q string
|
|
|
|
|
res []string // ['abc','def',...]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
split_test_suite = [
|
|
|
|
|
Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\d+', ['abcd ', ' efgh ', ' ghkl',
|
|
|
|
|
' ab', 'df']},
|
|
|
|
|
Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\a+', [' 1234 ', ' 1234 ', '1234 ',
|
|
|
|
|
'34546']},
|
|
|
|
|
Test_split{'oggi pippo è andato a casa di pluto ed ha trovato pippo', r'p[iplut]+o', [
|
|
|
|
|
'oggi ', ' è andato a casa di ', ' ed ha trovato ']},
|
|
|
|
|
Test_split{'oggi pibao è andato a casa di pbababao ed ha trovato pibabababao', r'(pi?(ba)+o)', [
|
|
|
|
|
'oggi ', ' è andato a casa di ', ' ed ha trovato ']},
|
|
|
|
|
Test_split{'Today is a good day and tomorrow will be for sure.', r'[Tt]o\w+', [
|
|
|
|
|
' is a good day and ', ' will be for sure.']},
|
|
|
|
|
Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://[\w./]+', [
|
|
|
|
|
'pera\n', '\npippo']},
|
|
|
|
|
Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://.*' +
|
|
|
|
|
'\n', ['pera\n', 'pippo']},
|
|
|
|
|
Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'#[.#]{4}##[.#]{4}##[.#]{4}###', [
|
|
|
|
|
'#.#......##.#..#..##........#', '##.......#.....#..#......#...#........###.#..#.']},
|
|
|
|
|
Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'.*#[.#]{4}##[.#]{4}##[.#]{4}###', [
|
|
|
|
|
'##.......#.....#..#......#...#........###.#..#.']},
|
|
|
|
|
Test_split{'1234 Aa dddd Aaf 12334 Aa opopo Aaf', r'Aa.+Aaf', ['1234 ', ' 12334 ']},
|
|
|
|
|
Test_split{'@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo', r'@for.+@endfor', [
|
|
|
|
|
' ', ' altro testo ', ' uno due ', ' pippo']},
|
|
|
|
|
Test_split{'+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++', r'\+{3}.*\+{3}', [
|
|
|
|
|
'\n elvo ', ' ']},
|
|
|
|
|
Test_split{'foobar', r'\d', ['foobar']},
|
|
|
|
|
Test_split{'1234', r'\d+', []},
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
|
2020-12-05 01:51:48 +01:00
|
|
|
|
const (
|
2022-05-08 14:21:39 +02:00
|
|
|
|
debug = true // true for debug println
|
2020-12-05 01:51:48 +01:00
|
|
|
|
)
|
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
fn test_regex() {
|
2020-01-28 20:34:11 +01:00
|
|
|
|
// check capturing groups
|
2022-05-08 14:21:39 +02:00
|
|
|
|
for c, to in cgroups_test_suite {
|
2020-01-28 20:34:11 +01:00
|
|
|
|
// debug print
|
2021-01-03 16:59:00 +01:00
|
|
|
|
if debug {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
println('$c [$to.src] [q$to.q] ($to.s, $to.e)')
|
2021-01-03 16:59:00 +01:00
|
|
|
|
}
|
2020-01-28 20:34:11 +01:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
mut re := regex.regex_opt(to.q) or {
|
|
|
|
|
eprintln('err: $err')
|
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-08 19:38:25 +01:00
|
|
|
|
if to.cgn.len > 0 {
|
2020-12-14 14:02:13 +01:00
|
|
|
|
re.group_csave_flag = true
|
2022-05-08 14:21:39 +02:00
|
|
|
|
// re.group_csave = [-1].repeat(3*20+1)
|
|
|
|
|
if debug {
|
|
|
|
|
println('continuous save')
|
|
|
|
|
}
|
2020-12-08 19:38:25 +01:00
|
|
|
|
} else {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug {
|
|
|
|
|
println('NO continuous save')
|
|
|
|
|
}
|
2020-12-08 19:38:25 +01:00
|
|
|
|
}
|
2020-01-28 20:34:11 +01:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
start, end := re.match_string(to.src)
|
2020-01-28 20:34:11 +01:00
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
mut tmp_str := ''
|
|
|
|
|
if start >= 0 && end > start {
|
2020-08-08 08:04:12 +02:00
|
|
|
|
tmp_str = to.src[start..end]
|
|
|
|
|
}
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
if start != to.s || end != to.e {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
println('#$c [$to.src] q[$to.q] res[$tmp_str] base:[$to.s,$to.e] $start, $end')
|
|
|
|
|
eprintln('ERROR!')
|
2020-08-08 08:04:12 +02:00
|
|
|
|
assert false
|
|
|
|
|
continue
|
2022-05-08 14:21:39 +02:00
|
|
|
|
}
|
2020-01-28 20:34:11 +01:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
// check cgroups
|
2020-12-08 19:38:25 +01:00
|
|
|
|
if to.cgn.len > 0 {
|
|
|
|
|
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}')
|
2020-08-08 08:04:12 +02:00
|
|
|
|
assert false
|
2020-12-08 19:38:25 +01:00
|
|
|
|
continue
|
2020-01-28 20:34:11 +01:00
|
|
|
|
}
|
|
|
|
|
|
2020-12-08 19:38:25 +01:00
|
|
|
|
// check captured groups
|
2022-05-08 14:21:39 +02:00
|
|
|
|
mut ln := re.group_csave[0] * 3
|
2020-12-08 19:38:25 +01:00
|
|
|
|
for ln > 0 {
|
|
|
|
|
if re.group_csave[ln] != to.cg[ln] {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('Capturing group failed on $ln item!')
|
2020-12-08 19:38:25 +01:00
|
|
|
|
assert false
|
|
|
|
|
}
|
|
|
|
|
ln--
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// check named captured groups
|
|
|
|
|
for k in to.cgn.keys() {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if to.cgn[k] != (re.group_map[k] - 1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
|
|
|
|
eprintln('Named capturing group error! [$k]')
|
2020-12-08 19:38:25 +01:00
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// check normal captured groups
|
|
|
|
|
if re.groups.len != to.cg.len {
|
2020-08-08 08:04:12 +02:00
|
|
|
|
assert false
|
2020-01-28 20:34:11 +01:00
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
for ln := 0; ln < re.groups.len; ln++ {
|
2020-12-08 19:38:25 +01:00
|
|
|
|
if re.groups[ln] != to.cg[ln] {
|
2020-12-24 06:27:46 +01:00
|
|
|
|
eprintln("Capture group doesn't match:")
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('true ground: $to.cg')
|
|
|
|
|
eprintln('elaborated : $re.groups')
|
2020-12-08 19:38:25 +01:00
|
|
|
|
assert false
|
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
}
|
2020-01-28 20:34:11 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-14 02:45:08 +01:00
|
|
|
|
// check find_all
|
2022-05-08 14:21:39 +02:00
|
|
|
|
for c, to in find_all_test_suite {
|
2020-01-14 02:45:08 +01:00
|
|
|
|
// debug print
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug {
|
|
|
|
|
println('#$c [$to.src] q[$to.q] ($to.res, $to.res_str)')
|
|
|
|
|
}
|
2020-01-14 02:45:08 +01:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
mut re := regex.regex_opt(to.q) or {
|
|
|
|
|
eprintln('err: $err')
|
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-01-14 02:45:08 +01:00
|
|
|
|
|
2020-12-24 06:27:46 +01:00
|
|
|
|
re.reset()
|
2020-08-08 08:04:12 +02:00
|
|
|
|
res := re.find_all(to.src)
|
2020-12-24 06:27:46 +01:00
|
|
|
|
if res != to.res {
|
|
|
|
|
eprintln('err: find_all !!')
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug {
|
|
|
|
|
println('#$c exp: $to.res calculated: $res')
|
|
|
|
|
}
|
2020-01-14 02:45:08 +01:00
|
|
|
|
assert false
|
2020-08-08 08:04:12 +02:00
|
|
|
|
}
|
|
|
|
|
|
2020-12-24 06:27:46 +01:00
|
|
|
|
res_str := re.find_all_str(to.src)
|
|
|
|
|
if res_str != to.res_str {
|
|
|
|
|
eprintln('err: find_all_str !!')
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug {
|
|
|
|
|
println('#$c exp: $to.res_str calculated: $res_str')
|
|
|
|
|
}
|
|
|
|
|
assert false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// check split
|
|
|
|
|
for c, to in split_test_suite {
|
|
|
|
|
// debug print
|
|
|
|
|
if debug {
|
|
|
|
|
println('#$c [$to.src] q[$to.q] ($to.res)')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mut re := regex.regex_opt(to.q) or {
|
|
|
|
|
eprintln('err: $err')
|
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
re.reset()
|
|
|
|
|
res := re.split(to.src)
|
|
|
|
|
if res != to.res {
|
|
|
|
|
eprintln('err: split !!')
|
|
|
|
|
if debug {
|
|
|
|
|
println('#$c exp: $to.res calculated: $res')
|
|
|
|
|
}
|
2020-12-24 06:27:46 +01:00
|
|
|
|
assert false
|
2020-01-14 02:45:08 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// check replace
|
2022-05-08 14:21:39 +02:00
|
|
|
|
for c, to in match_test_suite_replace {
|
2020-01-14 02:45:08 +01:00
|
|
|
|
// debug print
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug {
|
|
|
|
|
println('#$c [$to.src] q[$to.q] $to.r')
|
|
|
|
|
}
|
2020-01-14 02:45:08 +01:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
mut re := regex.regex_opt(to.q) or {
|
|
|
|
|
eprintln('err: $err')
|
2020-01-14 02:45:08 +01:00
|
|
|
|
assert false
|
2020-08-08 08:04:12 +02:00
|
|
|
|
continue
|
2020-01-14 02:45:08 +01:00
|
|
|
|
}
|
2020-08-08 08:04:12 +02:00
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
res := re.replace(to.src, to.rep)
|
2020-08-08 08:04:12 +02:00
|
|
|
|
if res != to.r {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('ERROR: replace.')
|
2020-08-08 08:04:12 +02:00
|
|
|
|
assert false
|
|
|
|
|
continue
|
2020-09-09 14:14:44 +02:00
|
|
|
|
}
|
2020-01-14 02:45:08 +01:00
|
|
|
|
}
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2021-04-03 22:16:56 +02:00
|
|
|
|
// check replace simple
|
2022-05-08 14:21:39 +02:00
|
|
|
|
for c, to in match_test_suite_replace_simple {
|
2021-04-03 22:16:56 +02:00
|
|
|
|
// debug print
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug { println('#$c [$to.src] q[$to.q] $to.r') }
|
2021-04-03 22:16:56 +02:00
|
|
|
|
|
|
|
|
|
mut re := regex.regex_opt(to.q) or {
|
|
|
|
|
eprintln('err: $err')
|
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
res := re.replace_simple(to.src, to.rep)
|
2021-04-03 22:16:56 +02:00
|
|
|
|
if res != to.r {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('ERROR: replace.')
|
2021-04-03 22:16:56 +02:00
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-14 02:45:08 +01:00
|
|
|
|
// check match and find
|
2022-05-08 14:21:39 +02:00
|
|
|
|
for c, to in match_test_suite {
|
2020-01-13 13:30:41 +01:00
|
|
|
|
// debug print
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug { println('#$c [$to.src] q[$to.q] $to.s $to.e') }
|
2020-01-13 13:30:41 +01:00
|
|
|
|
|
|
|
|
|
// test the find
|
|
|
|
|
if to.s > 0 {
|
2020-08-08 08:04:12 +02:00
|
|
|
|
mut re := regex.regex_opt(to.q) or {
|
|
|
|
|
eprintln('err: $err')
|
|
|
|
|
assert false
|
|
|
|
|
continue
|
2020-09-09 14:14:44 +02:00
|
|
|
|
}
|
2020-08-08 08:04:12 +02:00
|
|
|
|
// q_str := re.get_query()
|
2020-12-24 06:27:46 +01:00
|
|
|
|
// eprintln("Query: $q_str")
|
2022-05-08 14:21:39 +02:00
|
|
|
|
start, end := re.find(to.src)
|
2020-01-13 13:30:41 +01:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
if start != to.s || end != to.e {
|
|
|
|
|
err_str := re.get_parse_error_string(start)
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('ERROR : $err_str start: $start end: $end')
|
2020-01-13 13:30:41 +01:00
|
|
|
|
assert false
|
2020-08-08 08:04:12 +02:00
|
|
|
|
} else {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
// tmp_str := text[start..end]
|
|
|
|
|
// println("found in [$start, $end] => [$tmp_str]")
|
2020-08-08 08:04:12 +02:00
|
|
|
|
assert true
|
2020-01-13 13:30:41 +01:00
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2020-01-13 13:30:41 +01:00
|
|
|
|
// test the match
|
2020-08-08 08:04:12 +02:00
|
|
|
|
mut re := regex.new()
|
2022-05-08 14:21:39 +02:00
|
|
|
|
// re.debug = true
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
re.compile_opt(to.q) or {
|
|
|
|
|
eprintln('err: $err')
|
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
// println("#$c [$to.src] q[$to.q]")
|
2020-08-08 08:04:12 +02:00
|
|
|
|
start, end := re.match_string(to.src)
|
2020-05-21 23:33:51 +02:00
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
mut tmp_str := ''
|
|
|
|
|
if start >= 0 && end > start {
|
2020-08-08 08:04:12 +02:00
|
|
|
|
tmp_str = to.src[start..end]
|
|
|
|
|
}
|
2020-01-13 13:30:41 +01:00
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
if start != to.s || end != to.e {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end')
|
|
|
|
|
eprintln('ERROR!')
|
2020-08-08 08:04:12 +02:00
|
|
|
|
assert false
|
|
|
|
|
continue
|
|
|
|
|
}
|
2020-01-13 13:30:41 +01:00
|
|
|
|
|
2021-09-07 06:01:23 +02:00
|
|
|
|
// test the match predicate
|
|
|
|
|
if to.s >= 0 {
|
|
|
|
|
assert re.matches_string(to.src)
|
|
|
|
|
} else {
|
|
|
|
|
assert !re.matches_string(to.src)
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-08 08:04:12 +02:00
|
|
|
|
// rerun to test consistency
|
|
|
|
|
tmp_str1 := to.src.clone()
|
|
|
|
|
start1, end1 := re.match_string(tmp_str1)
|
|
|
|
|
if start1 != start || end1 != end {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('two run ERROR!!')
|
2020-01-13 13:30:41 +01:00
|
|
|
|
assert false
|
2020-08-08 08:04:12 +02:00
|
|
|
|
continue
|
2020-01-13 13:30:41 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-12-05 01:51:48 +01:00
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
if debug { println('DONE!') }
|
2020-01-13 13:30:41 +01:00
|
|
|
|
}
|
2020-12-05 01:51:48 +01:00
|
|
|
|
|
2021-02-20 20:39:08 +01:00
|
|
|
|
// test regex_base function
|
2022-05-08 14:21:39 +02:00
|
|
|
|
fn test_regex_func() {
|
|
|
|
|
query := r'\d\dabcd'
|
|
|
|
|
test_str := '78abcd'
|
2021-02-20 20:39:08 +01:00
|
|
|
|
mut re, re_err, err_pos := regex.regex_base(query)
|
|
|
|
|
if re_err == regex.compile_ok {
|
|
|
|
|
start, end := re.match_string(test_str)
|
|
|
|
|
assert (start == 0) && (end == 6)
|
|
|
|
|
} else {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('Error in query string in pos $err_pos')
|
|
|
|
|
eprintln('Error: ${re.get_parse_error_string(re_err)}')
|
2021-02-20 20:39:08 +01:00
|
|
|
|
assert false
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-03-20 00:54:12 +01:00
|
|
|
|
|
2022-02-02 09:52:18 +01:00
|
|
|
|
fn my_repl_1(re regex.RE, in_txt string, start int, end int) string {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
s0 := re.get_group_by_id(in_txt, 0)
|
|
|
|
|
println('[$start, $end] => $s0')
|
|
|
|
|
return 'a' + s0.to_upper()
|
2022-02-02 09:52:18 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
fn test_regex_func_replace1() {
|
|
|
|
|
txt := 'abbabbbabbbbaabba'
|
|
|
|
|
query := r'a(b+)'
|
2022-02-02 09:52:18 +01:00
|
|
|
|
mut re := regex.regex_opt(query) or { panic(err) }
|
|
|
|
|
result := re.replace_by_fn(txt, my_repl_1)
|
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
assert result == 'aBBaBBBaBBBBaaBBa'
|
2022-02-02 09:52:18 +01:00
|
|
|
|
}
|
|
|
|
|
|
2021-03-20 00:54:12 +01:00
|
|
|
|
fn my_repl(re regex.RE, in_txt string, start int, end int) string {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
s0 := re.get_group_by_id(in_txt, 0)[0..1] + 'X'
|
|
|
|
|
s1 := re.get_group_by_id(in_txt, 1)[0..1] + 'X'
|
|
|
|
|
s2 := re.get_group_by_id(in_txt, 2)[0..1] + 'X'
|
|
|
|
|
return '$s0$s1$s2'
|
2021-03-20 00:54:12 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// test regex replace function
|
2022-05-08 14:21:39 +02:00
|
|
|
|
fn test_regex_func_replace() {
|
2021-03-20 00:54:12 +01:00
|
|
|
|
filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio."
|
2022-05-08 14:21:39 +02:00
|
|
|
|
txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"'
|
2021-03-20 00:54:12 +01:00
|
|
|
|
query := r'"(content":\s+")(.*)(, "channel_id")'
|
|
|
|
|
mut re := regex.regex_opt(query) or { panic(err) }
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
|
|
|
|
mut txt1 := ''
|
|
|
|
|
mut txt2 := ''
|
|
|
|
|
|
|
|
|
|
for _ in 0 .. 3 {
|
|
|
|
|
rnd := int(10 + rand.u32() % 20)
|
|
|
|
|
txt1 += txt + filler[0..rnd] + '\n'
|
|
|
|
|
txt2 += 'cXTX,X' + filler[0..rnd] + '\n'
|
2021-03-20 00:54:12 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result := re.replace_by_fn(txt1, my_repl)
|
|
|
|
|
if debug {
|
|
|
|
|
eprintln(result)
|
|
|
|
|
eprintln(txt2)
|
|
|
|
|
}
|
|
|
|
|
assert result == txt2
|
2021-09-05 03:48:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
fn rest_regex_replace_n() {
|
|
|
|
|
s := 'dario 1234 pepep 23454 pera'
|
|
|
|
|
query := r'\d+'
|
2022-01-03 05:32:24 +01:00
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
mut re := regex.regex_opt(query) or { panic(err) }
|
2022-01-03 05:32:24 +01:00
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
assert re.replace_n(s, '[repl]', 0) == 'dario 1234 pepep 23454 pera'
|
|
|
|
|
assert re.replace_n(s, '[repl]', -1) == 'dario 1234 pepep [repl] pera'
|
|
|
|
|
assert re.replace_n(s, '[repl]', 1) == 'dario [repl] pepep 23454 pera'
|
|
|
|
|
assert re.replace_n(s, '[repl]', 2) == 'dario [repl] pepep [repl] pera'
|
|
|
|
|
assert re.replace_n(s, '[repl]', -2) == 'dario [repl] pepep [repl] pera'
|
|
|
|
|
assert re.replace_n(s, '[repl]', 3) == 'dario [repl] pepep [repl] pera'
|
|
|
|
|
assert re.replace_n(s, '[repl]', -3) == 'dario [repl] pepep [repl] pera'
|
|
|
|
|
|
|
|
|
|
// mut res := re.replace_n(s, "[repl]", -1)
|
|
|
|
|
// println("source: ${s}")
|
|
|
|
|
// println("res : ${res}")
|
2022-01-03 05:32:24 +01:00
|
|
|
|
}
|
|
|
|
|
|
2021-09-05 03:48:59 +02:00
|
|
|
|
// test quantifier wrong sequences
|
2022-05-08 14:21:39 +02:00
|
|
|
|
const (
|
2021-09-05 03:48:59 +02:00
|
|
|
|
test_quantifier_sequences_list = [
|
2022-05-08 14:21:39 +02:00
|
|
|
|
r'+{3}.*+{3}',
|
|
|
|
|
r'+{3}.*?{3}',
|
2021-09-05 03:48:59 +02:00
|
|
|
|
r'+{3}.**{3}',
|
|
|
|
|
r'+{3}.*\+{3}*',
|
|
|
|
|
r'+{3}.*\+{3}+',
|
|
|
|
|
r'+{3}.*\+{3}??',
|
2022-05-08 14:21:39 +02:00
|
|
|
|
r'+{3}.*\+{3}{4}',
|
2021-09-05 03:48:59 +02:00
|
|
|
|
]
|
|
|
|
|
)
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
|
|
|
|
fn test_quantifier_sequences() {
|
2021-09-05 03:48:59 +02:00
|
|
|
|
for pattern in test_quantifier_sequences_list {
|
|
|
|
|
re, re_err, err_pos := regex.regex_base(pattern)
|
|
|
|
|
if re_err != regex.err_syntax_error {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
eprintln('pattern: $pattern => $re_err')
|
2021-09-05 03:48:59 +02:00
|
|
|
|
}
|
|
|
|
|
assert re_err == regex.err_syntax_error
|
|
|
|
|
}
|
2021-09-07 06:01:23 +02:00
|
|
|
|
}
|
2021-10-12 05:03:23 +02:00
|
|
|
|
|
|
|
|
|
// test group index in find
|
|
|
|
|
struct Test_find_groups {
|
|
|
|
|
src string
|
2022-05-08 14:21:39 +02:00
|
|
|
|
q string
|
|
|
|
|
s int // start index
|
|
|
|
|
e int // end index
|
|
|
|
|
res []int // groups indexes
|
2021-10-12 05:03:23 +02:00
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
2021-10-12 05:03:23 +02:00
|
|
|
|
const (
|
|
|
|
|
find_groups_test_suite = [
|
|
|
|
|
Test_find_groups{
|
|
|
|
|
"aabbbccccdd",
|
|
|
|
|
r"(b+)(c+)",
|
|
|
|
|
2,
|
|
|
|
|
9,
|
|
|
|
|
[2, 5, 5, 9],
|
|
|
|
|
},
|
|
|
|
|
Test_find_groups{
|
|
|
|
|
"aabbbccccdd",
|
|
|
|
|
r"(a+).*(c+)",
|
|
|
|
|
0,
|
|
|
|
|
9,
|
|
|
|
|
[0, 2, 5, 9],
|
|
|
|
|
},
|
|
|
|
|
Test_find_groups{
|
|
|
|
|
"aabbbccccdd",
|
|
|
|
|
r"((b+).*)(d+)",
|
|
|
|
|
2,
|
|
|
|
|
11,
|
|
|
|
|
[2, 9, 2, 5, 9, 11],
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
)
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
|
|
|
|
fn test_groups_in_find() {
|
2021-10-12 05:03:23 +02:00
|
|
|
|
for test_obj in find_groups_test_suite {
|
|
|
|
|
src_text := test_obj.src
|
|
|
|
|
query := test_obj.q
|
|
|
|
|
mut re := regex.regex_opt(query) or { panic(err) }
|
|
|
|
|
start, end := re.find(src_text)
|
|
|
|
|
// Debug print do not remove!!
|
|
|
|
|
/*
|
|
|
|
|
println("---------")
|
|
|
|
|
println("src_text:[${src_text}]")
|
|
|
|
|
println("query :[${query}]")
|
|
|
|
|
println("[${start}, ${end}]")
|
|
|
|
|
println(re.groups)
|
|
|
|
|
mut gi := 0
|
|
|
|
|
for gi < re.groups.len {
|
|
|
|
|
if re.groups[gi] >= 0 {
|
|
|
|
|
println('${gi / 2} :[${src_text[re.groups[gi]..re.groups[gi + 1]]}]')
|
|
|
|
|
}
|
|
|
|
|
gi += 2
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
// check
|
|
|
|
|
assert start == test_obj.s
|
|
|
|
|
assert end == test_obj.e
|
|
|
|
|
assert re.groups == test_obj.res
|
|
|
|
|
}
|
2021-10-24 10:54:51 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
const (
|
2021-10-24 10:54:51 +02:00
|
|
|
|
err_query_list = [
|
2022-05-08 14:21:39 +02:00
|
|
|
|
r'([a]|[b])*',
|
2021-10-24 10:54:51 +02:00
|
|
|
|
]
|
|
|
|
|
)
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
|
|
|
|
fn test_errors() {
|
2021-10-24 10:54:51 +02:00
|
|
|
|
mut count := 0
|
|
|
|
|
for query in err_query_list {
|
|
|
|
|
_, err, _ := regex.regex_base(query)
|
|
|
|
|
if err != regex.compile_ok {
|
|
|
|
|
count++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert count == err_query_list.len
|
|
|
|
|
}
|
2021-12-01 07:38:50 +01:00
|
|
|
|
|
|
|
|
|
fn test_long_query() {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
test_len := 32768
|
|
|
|
|
mut buf := strings.new_builder(test_len * 3)
|
|
|
|
|
base_string := rand.string(test_len)
|
|
|
|
|
|
|
|
|
|
for c in base_string {
|
|
|
|
|
buf.write_u8(`(`)
|
|
|
|
|
buf.write_u8(c)
|
|
|
|
|
buf.write_u8(`)`)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mut query := buf.str()
|
2021-12-27 21:18:48 +01:00
|
|
|
|
|
2022-05-08 14:21:39 +02:00
|
|
|
|
// println(base_string)
|
|
|
|
|
// println(buf.str())
|
|
|
|
|
|
|
|
|
|
// test 1
|
|
|
|
|
mut re := regex.regex_opt(query) or { panic(err) }
|
|
|
|
|
mut start, mut end := re.match_string(base_string)
|
|
|
|
|
// println("$start, $end")
|
|
|
|
|
assert start >= 0 && end == base_string.len
|
|
|
|
|
|
|
|
|
|
// test 2
|
|
|
|
|
buf.clear()
|
|
|
|
|
for c in base_string {
|
|
|
|
|
buf.write_u8(`(`)
|
|
|
|
|
buf.write_u8(c)
|
|
|
|
|
}
|
|
|
|
|
for _ in 0 .. base_string.len {
|
|
|
|
|
buf.write_u8(`)`)
|
|
|
|
|
}
|
|
|
|
|
query = buf.str()
|
|
|
|
|
re = regex.regex_opt(query) or { panic(err) }
|
|
|
|
|
start, end = re.match_string(base_string)
|
|
|
|
|
// println("$start, $end")
|
|
|
|
|
assert start >= 0 && end == base_string.len
|
|
|
|
|
}
|
2021-12-27 21:18:48 +01:00
|
|
|
|
|
|
|
|
|
struct Test_negation_group {
|
|
|
|
|
src string
|
2022-05-08 14:21:39 +02:00
|
|
|
|
res bool
|
2021-12-27 21:18:48 +01:00
|
|
|
|
}
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
|
|
|
|
const (
|
2021-12-27 21:18:48 +01:00
|
|
|
|
negation_groups = [
|
2022-05-08 14:21:39 +02:00
|
|
|
|
Test_negation_group{'automobile', false},
|
|
|
|
|
Test_negation_group{'botomobile', true},
|
|
|
|
|
Test_negation_group{'auto_caravan', false},
|
|
|
|
|
Test_negation_group{'moto_mobile', true},
|
|
|
|
|
Test_negation_group{'pippole', true},
|
|
|
|
|
Test_negation_group{'boring test', false},
|
|
|
|
|
]
|
2021-12-27 21:18:48 +01:00
|
|
|
|
)
|
2022-05-08 14:21:39 +02:00
|
|
|
|
|
2021-12-27 21:18:48 +01:00
|
|
|
|
fn test_negation_groups() {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
mut query := r'(?!auto)\w+le'
|
|
|
|
|
mut re := regex.regex_opt(query) or { panic(err) }
|
2021-12-27 21:18:48 +01:00
|
|
|
|
for test in negation_groups {
|
2022-05-08 14:21:39 +02:00
|
|
|
|
start, end := re.match_string(test.src)
|
|
|
|
|
assert (start >= 0) == test.res
|
|
|
|
|
}
|
2021-12-27 21:18:48 +01:00
|
|
|
|
}
|