regex: bug fixes (#7137)
parent
89952edd25
commit
4fb37e81b2
|
@ -616,7 +616,7 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if status == .in_bsls {
|
if status == .in_bsls {
|
||||||
println("CC bsls not found [${ch:c}]")
|
//println("CC bsls not found [${ch:c}]")
|
||||||
status = .in_char
|
status = .in_char
|
||||||
}else {
|
}else {
|
||||||
continue
|
continue
|
||||||
|
@ -1212,6 +1212,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
// set the jump in the right places
|
// set the jump in the right places
|
||||||
pc1 = 0
|
pc1 = 0
|
||||||
for pc1 < pc-2 {
|
for pc1 < pc-2 {
|
||||||
|
//println("Here $pc1 ${pc-2}")
|
||||||
// two consecutive OR are a syntax error
|
// two consecutive OR are a syntax error
|
||||||
if re.prog[pc1+1].ist == ist_or_branch && re.prog[pc1+2].ist == ist_or_branch {
|
if re.prog[pc1+1].ist == ist_or_branch && re.prog[pc1+2].ist == ist_or_branch {
|
||||||
return err_syntax_error, i
|
return err_syntax_error, i
|
||||||
|
@ -1238,8 +1239,13 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
re.prog[pc1+1].rep_max = pc2 + 1
|
re.prog[pc1+1].rep_max = pc2 + 1
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
pc2++
|
pc2++
|
||||||
}
|
}
|
||||||
|
// special case query of few chars, teh true can't go on the first instruction
|
||||||
|
if re.prog[pc1+1].rep_max == pc1 {
|
||||||
|
re.prog[pc1+1].rep_max = 3
|
||||||
|
}
|
||||||
//println("Compile OR postproc. [$pc1,OR ${pc1+1},$pc2]")
|
//println("Compile OR postproc. [$pc1,OR ${pc1+1},$pc2]")
|
||||||
pc1 = pc2
|
pc1 = pc2
|
||||||
continue
|
continue
|
||||||
|
@ -1490,6 +1496,8 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
mut ch := rune(0) // examinated char
|
mut ch := rune(0) // examinated char
|
||||||
mut char_len := 0 // utf8 examinated char len
|
mut char_len := 0 // utf8 examinated char len
|
||||||
mut m_state := Match_state.start // start point for the matcher FSM
|
mut m_state := Match_state.start // start point for the matcher FSM
|
||||||
|
mut src_end := false
|
||||||
|
mut last_fnd_pc := -1
|
||||||
|
|
||||||
mut pc := -1 // program counter
|
mut pc := -1 // program counter
|
||||||
mut state := StateObj{} // actual state
|
mut state := StateObj{} // actual state
|
||||||
|
@ -1599,9 +1607,14 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
dbg_line++
|
dbg_line++
|
||||||
}
|
}
|
||||||
//******************************************
|
//******************************************
|
||||||
|
/* if ist == ist_prog_end {
|
||||||
|
//println("HERE")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
*/
|
||||||
// we're out of text, manage it
|
// we're out of text, manage it
|
||||||
if i >= in_txt_len || m_state == .new_line {
|
if i >= in_txt_len || m_state == .new_line {
|
||||||
|
src_end = true
|
||||||
|
|
||||||
// manage groups
|
// manage groups
|
||||||
if group_index >= 0 && state.match_index >= 0 {
|
if group_index >= 0 && state.match_index >= 0 {
|
||||||
|
@ -1644,11 +1657,29 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// manage ist_dot_char
|
if pc == -1 {
|
||||||
|
pc = last_fnd_pc
|
||||||
|
}
|
||||||
|
//println("Finished text!!")
|
||||||
|
//println("Instruction: ${ist:08x} pc: $pc")
|
||||||
|
//println("min_rep: ${re.prog[pc].rep_min} max_rep: ${re.prog[pc].rep_max} rep: ${re.prog[pc].rep}")
|
||||||
|
|
||||||
|
// program end
|
||||||
|
if ist == ist_prog_end {
|
||||||
|
//println("Program end on end of text!")
|
||||||
|
return first_match,i
|
||||||
|
}
|
||||||
|
|
||||||
m_state = .end
|
// if we go out of text and we are the last instruction .* check
|
||||||
break
|
if (re.prog[pc+1].ist == ist_prog_end) &&
|
||||||
//return no_match_found,0
|
(re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max) {
|
||||||
|
//println("Ok .* rep match!")
|
||||||
|
return first_match,i
|
||||||
|
}
|
||||||
|
|
||||||
|
//m_state = .end
|
||||||
|
//break
|
||||||
|
return no_match_found,0
|
||||||
}
|
}
|
||||||
|
|
||||||
// starting and init
|
// starting and init
|
||||||
|
@ -1697,7 +1728,8 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
if m_state == .stop {
|
if m_state == .stop {
|
||||||
|
|
||||||
// we are in search mode, don't exit until the end
|
// we are in search mode, don't exit until the end
|
||||||
if re.flag & f_src != 0 && ist != ist_prog_end {
|
if ((re.flag & f_src) != 0) && (ist != ist_prog_end) {
|
||||||
|
last_fnd_pc = pc
|
||||||
pc = -1
|
pc = -1
|
||||||
i += char_len
|
i += char_len
|
||||||
m_state = .ist_next
|
m_state = .ist_next
|
||||||
|
@ -1741,9 +1773,10 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// we have a DOT MATCH on going
|
// we have a DOT MATCH on going
|
||||||
//println("ist_prog_end l_ist: ${l_ist:08x}", l_ist)
|
//println("ist_prog_end l_ist: ${l_ist:08x}", l_ist)
|
||||||
if re.state_stack_index>=0 && l_ist == ist_dot_char {
|
if re.state_stack_index>=0 && l_ist == ist_dot_char {
|
||||||
|
i = in_txt_len // dario
|
||||||
m_state = .stop
|
m_state = .stop
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
re.state_stack_index = -1
|
re.state_stack_index = -1
|
||||||
m_state = .stop
|
m_state = .stop
|
||||||
|
@ -1832,7 +1865,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
first_match = i
|
first_match = i
|
||||||
}
|
}
|
||||||
state.match_index = i
|
state.match_index = i
|
||||||
re.prog[pc].rep++
|
re.prog[pc].rep++ // increase repetitions
|
||||||
|
|
||||||
//if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max {
|
//if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max {
|
||||||
if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max {
|
if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max {
|
||||||
|
@ -1857,12 +1890,15 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
re.state_stack[re.state_stack_index].i = i + char_len
|
re.state_stack[re.state_stack_index].i = i + char_len
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//i += char_len // next char
|
||||||
|
/*
|
||||||
// manage * and {0,} quantifier
|
// manage * and {0,} quantifier
|
||||||
if re.prog[pc].rep_min > 0 {
|
if re.prog[pc].rep_max == max_quantifier {
|
||||||
i += char_len // next char
|
//println("manage .*")
|
||||||
l_ist = u32(ist_dot_char)
|
m_state = .ist_load
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
m_state = .ist_next
|
m_state = .ist_next
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -2163,14 +2199,48 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
return err_internal_error, i
|
return err_internal_error, i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//println("Check end of text!")
|
||||||
// Check the results
|
// Check the results
|
||||||
if state.match_index >= 0 {
|
if state.match_index >= 0 {
|
||||||
if group_index < 0 {
|
if group_index < 0 {
|
||||||
//println("OK match,natural end [$first_match,$i]")
|
|
||||||
return first_match, i
|
if re.prog[pc].ist == ist_prog_end {
|
||||||
|
//println("program ended!!")
|
||||||
|
|
||||||
|
if (re.flag & f_src) != 0 {
|
||||||
|
//println("find return")
|
||||||
|
return first_match, i
|
||||||
|
} else {
|
||||||
|
return 0, i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//println("No Group here, natural end [$first_match,$i] state: ${state_str(m_state)} ist: $ist pgr_end: $re.prog.len")
|
||||||
|
|
||||||
|
if re.prog[pc+1].ist == ist_prog_end || re.prog[pc].ist == ist_prog_end{
|
||||||
|
rep := re.prog[pc].rep
|
||||||
|
//println("rep: $rep re.prog[pc].rep_min: ${re.prog[pc].rep_min} re.prog[pc].rep_max: ${re.prog[pc].rep_max}")
|
||||||
|
if rep >= re.prog[pc].rep_min && rep <= re.prog[pc].rep_max {
|
||||||
|
return first_match, i
|
||||||
|
}
|
||||||
|
//println("Program not finished! ")
|
||||||
|
return no_match_found, 0
|
||||||
|
}
|
||||||
|
if src_end {
|
||||||
|
//println("program end")
|
||||||
|
return first_match, i
|
||||||
|
}
|
||||||
|
//print("No match found!!")
|
||||||
|
return no_match_found, 0
|
||||||
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
//println("Group match! OK")
|
||||||
|
//println("first_match: $first_match, i: $i")
|
||||||
|
|
||||||
//println("Skip last group")
|
//println("Skip last group")
|
||||||
return first_match,group_stack[group_index--]
|
return first_match,i
|
||||||
|
//return first_match,group_stack[group_index--]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//println("no_match_found, natural end")
|
//println("no_match_found, natural end")
|
||||||
|
@ -2224,7 +2294,12 @@ fn impl_new_regex_by_size(mult int) RE {
|
||||||
//
|
//
|
||||||
|
|
||||||
pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
||||||
start, end := re.match_base(in_txt.str,in_txt.len)
|
|
||||||
|
start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
|
||||||
|
if end > in_txt.len {
|
||||||
|
end = in_txt.len
|
||||||
|
}
|
||||||
|
|
||||||
if start >= 0 && end > start {
|
if start >= 0 && end > start {
|
||||||
if (re.flag & f_ms) != 0 && start > 0 {
|
if (re.flag & f_ms) != 0 && start > 0 {
|
||||||
return no_match_found, 0
|
return no_match_found, 0
|
||||||
|
@ -2247,9 +2322,15 @@ pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
||||||
// find try to find the first match in the input string
|
// find try to find the first match in the input string
|
||||||
pub fn (mut re RE) find(in_txt string) (int,int) {
|
pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
old_flag := re.flag
|
old_flag := re.flag
|
||||||
|
|
||||||
re.flag |= f_src // enable search mode
|
re.flag |= f_src // enable search mode
|
||||||
start, end := re.match_base(in_txt.str, in_txt.len)
|
start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
|
||||||
|
//print("Find [$start,$end] '${in_txt[start..end]}'")
|
||||||
|
if end > in_txt.len {
|
||||||
|
end = in_txt.len
|
||||||
|
}
|
||||||
re.flag = old_flag
|
re.flag = old_flag
|
||||||
|
|
||||||
if start >= 0 && end > start {
|
if start >= 0 && end > start {
|
||||||
return start, end
|
return start, end
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,11 +7,11 @@ pub fn (mut re RE) compile_opt(pattern string) ? {
|
||||||
|
|
||||||
if re_err != compile_ok {
|
if re_err != compile_ok {
|
||||||
mut err_msg := strings.new_builder(300)
|
mut err_msg := strings.new_builder(300)
|
||||||
err_str := re.get_parse_error_string(re_err)
|
err_msg.write("query: $pattern\n")
|
||||||
err_msg.write("$err_str\n")
|
|
||||||
err_msg.write(" query: $pattern\n")
|
|
||||||
line := "-".repeat(err_pos)
|
line := "-".repeat(err_pos)
|
||||||
err_msg.write(" err pos: ${line}^")
|
err_msg.write("err : ${line}^\n")
|
||||||
|
err_str := re.get_parse_error_string(re_err)
|
||||||
|
err_msg.write("ERROR: $err_str\n")
|
||||||
return error_with_code(err_msg.str(), re_err)
|
return error_with_code(err_msg.str(), re_err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,12 @@ struct TestItem {
|
||||||
|
|
||||||
const(
|
const(
|
||||||
match_test_suite = [
|
match_test_suite = [
|
||||||
|
// base OR
|
||||||
|
TestItem{"a",r"a|b",0,1},
|
||||||
|
TestItem{"a",r"b|a",0,1},
|
||||||
|
TestItem{"b",r"a|b",0,1},
|
||||||
|
TestItem{"b",r"b|a",0,1},
|
||||||
|
TestItem{"c",r"b|a",-1,0},
|
||||||
|
|
||||||
// positive
|
// positive
|
||||||
TestItem{"this is a good.",r"this",0,4},
|
TestItem{"this is a good.",r"this",0,4},
|
||||||
|
@ -38,7 +44,7 @@ match_test_suite = [
|
||||||
TestItem{"this these those ",r"(th[eio]se? ?)+",0,17},
|
TestItem{"this these those ",r"(th[eio]se? ?)+",0,17},
|
||||||
TestItem{"this these those ",r"(th[eio]se? )+",0,17},
|
TestItem{"this these those ",r"(th[eio]se? )+",0,17},
|
||||||
TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17},
|
TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17},
|
||||||
TestItem{"soday,this,these,those. over",r"(th[eio]se?[,. ])+",6,23},
|
TestItem{"soday,this,these,those. over",r".+(th[eio]se?[,. ])+",0,23},
|
||||||
|
|
||||||
TestItem{"cpapaz",r"(c(pa)+z)",0,6},
|
TestItem{"cpapaz",r"(c(pa)+z)",0,6},
|
||||||
TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16},
|
TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16},
|
||||||
|
@ -60,7 +66,7 @@ match_test_suite = [
|
||||||
TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23},
|
TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23},
|
||||||
TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29},
|
TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29},
|
||||||
TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
||||||
TestItem{"cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,17},
|
TestItem{"cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",0,17},
|
||||||
TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
|
||||||
TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
||||||
TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
|
||||||
|
@ -74,7 +80,6 @@ match_test_suite = [
|
||||||
|
|
||||||
TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29}
|
TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29}
|
||||||
|
|
||||||
|
|
||||||
// negative
|
// negative
|
||||||
TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
|
TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
|
||||||
TestItem{"this is a good.",r"thes",-1,0},
|
TestItem{"this is a good.",r"thes",-1,0},
|
||||||
|
@ -88,6 +93,25 @@ match_test_suite = [
|
||||||
// check unicode
|
// check unicode
|
||||||
TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34},
|
TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34},
|
||||||
TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23},
|
TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23},
|
||||||
|
|
||||||
|
// new edge cases
|
||||||
|
TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",-1,0},
|
||||||
|
TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",0,8},
|
||||||
|
TestItem{"123456789", r"^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$",0,9}
|
||||||
|
TestItem{"12345678", r"^\d{8}$",0,8},
|
||||||
|
TestItem{"12345678", r"^\d{7}$",-1,0},
|
||||||
|
TestItem{"12345678", r"^\d{9}$",-1,0},
|
||||||
|
|
||||||
|
TestItem{"eth", r"(oth)|(eth)",0,3},
|
||||||
|
TestItem{"et", r"(oth)|(eth)",-1,0},
|
||||||
|
TestItem{"et", r".*(oth)|(eth)",-1,0},
|
||||||
|
TestItem{"peoth", r".*(ith)|(eth)",-1,0},
|
||||||
|
|
||||||
|
TestItem{"poth", r"(eth)|(oth)",1,4},
|
||||||
|
TestItem{"poth", r"(oth)|(eth)",1,4},
|
||||||
|
TestItem{"poth", r".(oth)|(eth)$",0,4},
|
||||||
|
TestItem{"poth", r"^.(oth)|(eth)$",0,4},
|
||||||
|
TestItem{"poth", r"^\w+$",0,4},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -150,30 +174,35 @@ const (
|
||||||
cgroups_test_suite = [
|
cgroups_test_suite = [
|
||||||
TestItemCGroup{
|
TestItemCGroup{
|
||||||
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
||||||
r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+",0,46,
|
r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+[\.|/])+",0,42,
|
||||||
[8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46],
|
[7, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42],
|
||||||
{'format':int(0),'token':1}
|
{'format':int(0),'token':1}
|
||||||
},
|
},
|
||||||
TestItemCGroup{
|
TestItemCGroup{
|
||||||
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
||||||
r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
|
r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
|
||||||
[8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46],
|
[2, 0, 0, 4, 1, 7, 10],
|
||||||
{'format':int(0),'token':1}
|
{'format':int(0),'token':1}
|
||||||
},
|
},
|
||||||
TestItemCGroup{
|
TestItemCGroup{
|
||||||
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
||||||
r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+.)+",0,46,
|
r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+\.)+",0,16,
|
||||||
[8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46],
|
[3, 0, 0, 4, 1, 7, 11, 1, 11, 16],
|
||||||
{'format':int(0)}
|
{'format':int(0)}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
debug = false // true for debug println
|
||||||
|
)
|
||||||
|
|
||||||
fn test_regex(){
|
fn test_regex(){
|
||||||
|
|
||||||
// check capturing groups
|
// check capturing groups
|
||||||
for c,to in cgroups_test_suite {
|
for c,to in cgroups_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
//println("#$c [$to.src] q[$to.q] ($to.s, $to.e)")
|
if debug { println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") }
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
@ -191,16 +220,16 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
//println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||||
println("ERROR!")
|
println("ERROR!")
|
||||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// check cgroups
|
// check cgroups
|
||||||
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
||||||
println("Capturing group len error!")
|
println("Capturing group len error! ${re.group_csave[0]}")
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -225,9 +254,9 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
|
|
||||||
// check find_all
|
// check find_all
|
||||||
for _,to in match_test_suite_fa{
|
for c,to in match_test_suite_fa{
|
||||||
// debug print
|
// debug print
|
||||||
//println("#$c [$to.src] q[$to.q] $to.r")
|
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
@ -253,9 +282,9 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
|
|
||||||
// check replace
|
// check replace
|
||||||
for _,to in match_test_suite_re{
|
for c,to in match_test_suite_re{
|
||||||
// debug print
|
// debug print
|
||||||
//println("#$c [$to.src] q[$to.q] $to.r")
|
if debug { println("#$c [$to.src] q[$to.q] $to.r") }
|
||||||
|
|
||||||
mut re := regex.regex_opt(to.q) or {
|
mut re := regex.regex_opt(to.q) or {
|
||||||
eprintln('err: $err')
|
eprintln('err: $err')
|
||||||
|
@ -274,7 +303,7 @@ fn test_regex(){
|
||||||
// check match and find
|
// check match and find
|
||||||
for c,to in match_test_suite {
|
for c,to in match_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
println("#$c [$to.src] q[$to.q] $to.s $to.e")
|
if debug { println("#$c [$to.src] q[$to.q] $to.s $to.e") }
|
||||||
|
|
||||||
// test the find
|
// test the find
|
||||||
if to.s > 0 {
|
if to.s > 0 {
|
||||||
|
@ -289,7 +318,7 @@ fn test_regex(){
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
err_str := re.get_parse_error_string(start)
|
err_str := re.get_parse_error_string(start)
|
||||||
println("ERROR : $err_str")
|
println("ERROR : $err_str start: ${start} end: ${end}")
|
||||||
assert false
|
assert false
|
||||||
} else {
|
} else {
|
||||||
//tmp_str := text[start..end]
|
//tmp_str := text[start..end]
|
||||||
|
@ -334,4 +363,7 @@ fn test_regex(){
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
if debug { println("DONE!") }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
import regex
|
|
||||||
|
|
||||||
const (
|
|
||||||
a_or_b = regex.regex_opt('a|b') ?
|
|
||||||
)
|
|
||||||
|
|
||||||
fn f(s string) bool {
|
|
||||||
mut re := a_or_b
|
|
||||||
start, _ := re.match_string(s)
|
|
||||||
return start != -1
|
|
||||||
}
|
|
||||||
|
|
||||||
fn test_const_regex_works() {
|
|
||||||
assert f('a') == true
|
|
||||||
assert f('b') == true
|
|
||||||
assert f('c') == false
|
|
||||||
}
|
|
Loading…
Reference in New Issue