parent
6bd70bc88a
commit
4324be9bd8
|
@ -256,6 +256,9 @@ mut:
|
||||||
|
|
||||||
// OR flag for the token
|
// OR flag for the token
|
||||||
next_is_or bool // true if the next token is an OR
|
next_is_or bool // true if the next token is an OR
|
||||||
|
|
||||||
|
// last_dot flag
|
||||||
|
last_dot bool
|
||||||
}
|
}
|
||||||
|
|
||||||
[inline]
|
[inline]
|
||||||
|
@ -1192,16 +1195,44 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
// Post processing
|
// Post processing
|
||||||
//******************************************
|
//******************************************
|
||||||
|
|
||||||
|
//
|
||||||
|
// manage ist_dot_char
|
||||||
|
//
|
||||||
// count ist_dot_char to set the size of the state stack
|
// count ist_dot_char to set the size of the state stack
|
||||||
mut pc1 := 0
|
mut pc1 := 0
|
||||||
mut tmp_count := 0
|
mut tmp_count := 0
|
||||||
|
mut last_dot_pc := -1
|
||||||
for pc1 < pc {
|
for pc1 < pc {
|
||||||
if re.prog[pc1].ist == ist_dot_char {
|
if re.prog[pc1].ist == ist_dot_char {
|
||||||
tmp_count++
|
tmp_count++
|
||||||
|
last_dot_pc = pc1
|
||||||
|
//println("Found dot_char pc:[${last_dot_pc}]")
|
||||||
}
|
}
|
||||||
pc1++
|
pc1++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if exist set the last dot_char token to manage the last .*
|
||||||
|
if last_dot_pc >= 0 {
|
||||||
|
re.prog[last_dot_pc].last_dot = true
|
||||||
|
|
||||||
|
mut last_dot_flag := true
|
||||||
|
mut tmp_pc := last_dot_pc + 1
|
||||||
|
for tmp_pc < pc {
|
||||||
|
if re.prog[tmp_pc].ist !in [rune(ist_prog_end),ist_group_end] {
|
||||||
|
last_dot_flag = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
tmp_pc++
|
||||||
|
}
|
||||||
|
re.prog[last_dot_pc].last_dot = last_dot_flag
|
||||||
|
//println("Our last dot flag pc: ${last_dot_pc} flag: ${last_dot_flag}")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//******************************************
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// init the state stack
|
// init the state stack
|
||||||
re.state_stack = []StateDotObj{len: tmp_count+1, init: StateDotObj{}}
|
re.state_stack = []StateDotObj{len: tmp_count+1, init: StateDotObj{}}
|
||||||
|
|
||||||
|
@ -1325,6 +1356,12 @@ pub fn (re RE) get_code() string {
|
||||||
res.write("?")
|
res.write("?")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// last dot char flag
|
||||||
|
if tk.last_dot == true {
|
||||||
|
res.write(" Last dot_char!")
|
||||||
|
}
|
||||||
|
|
||||||
res.write("\n")
|
res.write("\n")
|
||||||
if stop_flag {
|
if stop_flag {
|
||||||
break
|
break
|
||||||
|
@ -1502,7 +1539,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
mut pc := -1 // program counter
|
mut pc := -1 // program counter
|
||||||
mut state := StateObj{} // actual state
|
mut state := StateObj{} // actual state
|
||||||
mut ist := rune(0) // actual instruction
|
mut ist := rune(0) // actual instruction
|
||||||
mut l_ist :=rune(0) // last matched instruction
|
mut l_ist := rune(0) // last matched instruction
|
||||||
|
|
||||||
//mut group_stack := [-1].repeat(re.group_max)
|
//mut group_stack := [-1].repeat(re.group_max)
|
||||||
//mut group_data := [-1].repeat(re.group_max)
|
//mut group_data := [-1].repeat(re.group_max)
|
||||||
|
@ -1581,6 +1618,9 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
buf2.write("CHAR_CLASS_NEG[${re.get_char_class(pc)}]")
|
buf2.write("CHAR_CLASS_NEG[${re.get_char_class(pc)}]")
|
||||||
} else if ist == ist_dot_char {
|
} else if ist == ist_dot_char {
|
||||||
buf2.write("DOT_CHAR")
|
buf2.write("DOT_CHAR")
|
||||||
|
if re.prog[pc].last_dot == true {
|
||||||
|
buf2.write(" Last dot_char!")
|
||||||
|
}
|
||||||
} else if ist == ist_group_start {
|
} else if ist == ist_group_start {
|
||||||
tmp_gi :=re.prog[pc].group_id
|
tmp_gi :=re.prog[pc].group_id
|
||||||
tmp_gr := re.prog[re.prog[pc].goto_pc].group_rep
|
tmp_gr := re.prog[re.prog[pc].goto_pc].group_rep
|
||||||
|
@ -1638,6 +1678,11 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
re.groups[g_index+1] = i
|
re.groups[g_index+1] = i
|
||||||
|
|
||||||
|
// manage last dot_char
|
||||||
|
if l_ist == ist_dot_char && re.prog[pc].last_dot == true {
|
||||||
|
re.groups[g_index+1]--
|
||||||
|
}
|
||||||
|
|
||||||
// continuous save, save until we have space
|
// continuous save, save until we have space
|
||||||
if re.group_csave_index > 0 {
|
if re.group_csave_index > 0 {
|
||||||
// check if we have space to save the record
|
// check if we have space to save the record
|
||||||
|
@ -1676,6 +1721,11 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//println("Ok .* rep match!")
|
//println("Ok .* rep match!")
|
||||||
return first_match,i
|
return first_match,i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// manage last dot_char
|
||||||
|
if first_match >= 0 && l_ist == ist_dot_char && re.prog[pc].last_dot == true {
|
||||||
|
return first_match,i
|
||||||
|
}
|
||||||
|
|
||||||
//m_state = .end
|
//m_state = .end
|
||||||
//break
|
//break
|
||||||
|
@ -1798,6 +1848,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// check GROUP end
|
// check GROUP end
|
||||||
else if ist == ist_group_end {
|
else if ist == ist_group_end {
|
||||||
// we are in matching streak
|
// we are in matching streak
|
||||||
|
//println("Group END!! last ist: ${l_ist:08x}")
|
||||||
if state.match_index >= 0 {
|
if state.match_index >= 0 {
|
||||||
// restore txt index stack and save the group data
|
// restore txt index stack and save the group data
|
||||||
|
|
||||||
|
@ -1816,9 +1867,14 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
re.groups[g_index+1] = i
|
re.groups[g_index+1] = i
|
||||||
|
|
||||||
// if a group end with a dot, manage the not increased char index
|
// if a group end with a dot, manage the not increased char index
|
||||||
|
/*
|
||||||
if i == re.groups[g_index] {
|
if i == re.groups[g_index] {
|
||||||
re.groups[g_index+1] = i+1
|
re.groups[g_index+1] = i+1
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
if l_ist == ist_dot_char {
|
||||||
|
re.groups[g_index+1] = i+1
|
||||||
|
}
|
||||||
|
|
||||||
//println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]")
|
//println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]")
|
||||||
|
|
||||||
|
@ -1873,8 +1929,8 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
state.match_index = i
|
state.match_index = i
|
||||||
re.prog[pc].rep++ // increase repetitions
|
re.prog[pc].rep++ // increase repetitions
|
||||||
|
|
||||||
//if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max {
|
if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max {
|
||||||
if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max {
|
//if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max {
|
||||||
//println("DOT CHAR save state : ${re.state_stack_index}")
|
//println("DOT CHAR save state : ${re.state_stack_index}")
|
||||||
// save the state
|
// save the state
|
||||||
|
|
||||||
|
@ -1893,18 +1949,21 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if re.prog[pc].rep >= 1 && re.state_stack_index >= 0 {
|
if re.prog[pc].rep >= 1 && re.state_stack_index >= 0 {
|
||||||
|
//println("Save state char index.")
|
||||||
re.state_stack[re.state_stack_index].i = i + char_len
|
re.state_stack[re.state_stack_index].i = i + char_len
|
||||||
}
|
}
|
||||||
|
|
||||||
//i += char_len // next char
|
// manage last dot char
|
||||||
/*
|
if re.prog[pc].last_dot == true
|
||||||
// manage * and {0,} quantifier
|
&& re.prog[pc].rep >= re.prog[pc].rep_min
|
||||||
if re.prog[pc].rep_max == max_quantifier {
|
&& re.prog[pc].rep <= re.prog[pc].rep_max
|
||||||
//println("manage .*")
|
{
|
||||||
|
//println("We are the last dot_char in the query")
|
||||||
|
i += char_len
|
||||||
m_state = .ist_load
|
m_state = .ist_load
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
m_state = .ist_next
|
m_state = .ist_next
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -2078,7 +2137,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
|
|
||||||
}
|
}
|
||||||
// ist_quant_pg => quantifier positive test on group
|
// ist_quant_pg => quantifier positive test on group
|
||||||
else if m_state == .ist_quant_pg {
|
else if m_state == .ist_quant_pg {
|
||||||
//println(".ist_quant_pg")
|
//println(".ist_quant_pg")
|
||||||
mut tmp_pc := pc
|
mut tmp_pc := pc
|
||||||
if group_index >= 0 {
|
if group_index >= 0 {
|
||||||
|
|
|
@ -112,6 +112,13 @@ match_test_suite = [
|
||||||
TestItem{"poth", r".(oth)|(eth)$",0,4},
|
TestItem{"poth", r".(oth)|(eth)$",0,4},
|
||||||
TestItem{"poth", r"^.(oth)|(eth)$",0,4},
|
TestItem{"poth", r"^.(oth)|(eth)$",0,4},
|
||||||
TestItem{"poth", r"^\w+$",0,4},
|
TestItem{"poth", r"^\w+$",0,4},
|
||||||
|
|
||||||
|
// test dot_char
|
||||||
|
TestItem{"8-11 l: qllllqllklhlvtl", r"^(\d+)-(\d+) ([a-z]): (.*)$",0,23},
|
||||||
|
TestItem{"accccb deer", r"^a(.*)b d(.+)r",0,11},
|
||||||
|
TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11},
|
||||||
|
TestItem{"accccb deer", r"^(.*)$",0,11},
|
||||||
|
TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,0},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -167,7 +174,7 @@ struct TestItemCGroup {
|
||||||
q string
|
q string
|
||||||
s int
|
s int
|
||||||
e int
|
e int
|
||||||
cg []int
|
cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ]
|
||||||
cgn map[string]int
|
cgn map[string]int
|
||||||
}
|
}
|
||||||
const (
|
const (
|
||||||
|
@ -181,7 +188,7 @@ cgroups_test_suite = [
|
||||||
TestItemCGroup{
|
TestItemCGroup{
|
||||||
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
"http://www.ciao.mondo/hello/pippo12_/pera.html",
|
||||||
r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
|
r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
|
||||||
[2, 0, 0, 4, 1, 7, 10],
|
[8, 0, 0, 4, 1, 7, 12, 1, 11, 17, 1, 16, 23, 1, 22, 29, 1, 28, 38, 1, 37, 43, 1, 42, 46],
|
||||||
{'format':int(0),'token':1}
|
{'format':int(0),'token':1}
|
||||||
},
|
},
|
||||||
TestItemCGroup{
|
TestItemCGroup{
|
||||||
|
@ -247,7 +254,7 @@ fn test_regex(){
|
||||||
// check cgroups
|
// check cgroups
|
||||||
if to.cgn.len > 0 {
|
if to.cgn.len > 0 {
|
||||||
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
||||||
println("Capturing group len error! ${re.group_csave[0]}")
|
println("Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}")
|
||||||
assert false
|
assert false
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -256,6 +263,7 @@ fn test_regex(){
|
||||||
mut ln := re.group_csave[0]*3
|
mut ln := re.group_csave[0]*3
|
||||||
for ln > 0 {
|
for ln > 0 {
|
||||||
if re.group_csave[ln] != to.cg[ln] {
|
if re.group_csave[ln] != to.cg[ln] {
|
||||||
|
println("Capturing group failed on $ln item!")
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
ln--
|
ln--
|
||||||
|
|
Loading…
Reference in New Issue