regix: fixes and optimizations
parent
23dd6e2294
commit
de832375b7
|
@ -1,4 +1,4 @@
|
||||||
# V RegEx (Regular expression) 0.9c
|
# V RegEx (Regular expression) 0.9d
|
||||||
|
|
||||||
[TOC]
|
[TOC]
|
||||||
|
|
||||||
|
|
|
@ -246,7 +246,7 @@ mut:
|
||||||
cc_index int = -1
|
cc_index int = -1
|
||||||
|
|
||||||
// counters for quantifier check (repetitions)
|
// counters for quantifier check (repetitions)
|
||||||
rep int = 0
|
rep int = 0
|
||||||
|
|
||||||
// validator function pointer
|
// validator function pointer
|
||||||
validator fn (byte) bool
|
validator fn (byte) bool
|
||||||
|
@ -257,9 +257,10 @@ mut:
|
||||||
goto_pc int = -1 // jump to this PC if is needed
|
goto_pc int = -1 // jump to this PC if is needed
|
||||||
|
|
||||||
// OR flag for the token
|
// OR flag for the token
|
||||||
next_is_or bool = false // true if the next token is an OR
|
next_is_or bool = false // true if the next token is an OR
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[inline]
|
||||||
fn (tok mut Token) reset() {
|
fn (tok mut Token) reset() {
|
||||||
tok.rep = 0
|
tok.rep = 0
|
||||||
}
|
}
|
||||||
|
@ -279,6 +280,7 @@ pub const (
|
||||||
|
|
||||||
// behaviour modifier flags
|
// behaviour modifier flags
|
||||||
//F_OR = 0x00010000 // the OR work with concatenation like PCRE
|
//F_OR = 0x00010000 // the OR work with concatenation like PCRE
|
||||||
|
F_SRC = 0x00020000 // search mode enabled
|
||||||
)
|
)
|
||||||
|
|
||||||
struct StateDotObj{
|
struct StateDotObj{
|
||||||
|
@ -324,8 +326,8 @@ pub mut:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset RE object
|
// Reset RE object
|
||||||
|
//[inline]
|
||||||
fn (re mut RE) reset(){
|
fn (re mut RE) reset(){
|
||||||
//re.group_count = 0
|
|
||||||
re.cc_index = 0
|
re.cc_index = 0
|
||||||
|
|
||||||
mut i := 0
|
mut i := 0
|
||||||
|
@ -345,6 +347,18 @@ fn (re mut RE) reset(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reset for search mode fail
|
||||||
|
// gcc bug, dont use [inline] or go 5 time slower
|
||||||
|
fn (re mut RE) reset_src(){
|
||||||
|
mut i := 0
|
||||||
|
for i < re.prog.len {
|
||||||
|
re.prog[i].group_rep = 0 // clear repetition of the group
|
||||||
|
re.prog[i].rep = 0 // clear repetition of the token
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
re.state_stack_index = -1
|
||||||
|
}
|
||||||
|
|
||||||
pub fn (re RE) get_group(group_name string) (int, int) {
|
pub fn (re RE) get_group(group_name string) (int, int) {
|
||||||
if group_name in re.group_map {
|
if group_name in re.group_map {
|
||||||
tmp_index := re.group_map[group_name]-1
|
tmp_index := re.group_map[group_name]-1
|
||||||
|
@ -1654,6 +1668,18 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
|
|
||||||
// check if stop
|
// check if stop
|
||||||
if m_state == .stop {
|
if m_state == .stop {
|
||||||
|
|
||||||
|
// we are in search mode, don't exit until the end
|
||||||
|
if re.flag & F_SRC != 0 && ist != IST_PROG_END {
|
||||||
|
pc = -1
|
||||||
|
i += char_len
|
||||||
|
m_state = .ist_next
|
||||||
|
re.reset_src()
|
||||||
|
state.match_index = -1
|
||||||
|
first_match = -1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// if we are in restore state ,do it and restart
|
// if we are in restore state ,do it and restart
|
||||||
//C.printf("re.state_stack_index %d\n",re.state_stack_index )
|
//C.printf("re.state_stack_index %d\n",re.state_stack_index )
|
||||||
if re.state_stack_index >=0 && re.state_stack[re.state_stack_index].pc >= 0 {
|
if re.state_stack_index >=0 && re.state_stack[re.state_stack_index].pc >= 0 {
|
||||||
|
@ -2187,42 +2213,12 @@ pub fn (re mut RE) match_string(in_txt string) (int,int) {
|
||||||
|
|
||||||
// find try to find the first match in the input string
|
// find try to find the first match in the input string
|
||||||
pub fn (re mut RE) find(in_txt string) (int,int) {
|
pub fn (re mut RE) find(in_txt string) (int,int) {
|
||||||
mut i := 0
|
|
||||||
mut start := -1
|
|
||||||
mut end := -1
|
|
||||||
old_flag := re.flag
|
old_flag := re.flag
|
||||||
|
re.flag |= F_SRC // enable search mode
|
||||||
for i < in_txt.len {
|
start, end := re.match_base(in_txt.str, in_txt.len)
|
||||||
|
re.flag = old_flag
|
||||||
// test only the first part of the query string
|
if start >= 0 && end > start {
|
||||||
re.flag |= F_EFM // set to exit on the first token match
|
return start,end
|
||||||
mut tmp_end := i+re.query.len
|
|
||||||
if tmp_end > in_txt.len { tmp_end = in_txt.len }
|
|
||||||
tmp_txt := string{ str: in_txt.str+i, len: tmp_end-i }
|
|
||||||
start, end = re.match_base(tmp_txt.str, tmp_txt.len)
|
|
||||||
|
|
||||||
if start >= 0 && end > start {
|
|
||||||
// test a complete match
|
|
||||||
re.flag = old_flag
|
|
||||||
tmp_txt1 := string{ str: in_txt.str+i , len: in_txt.len-i }
|
|
||||||
start, end = re.match_base(tmp_txt1.str, tmp_txt1.len)
|
|
||||||
|
|
||||||
if start >= 0 && end > start {
|
|
||||||
if (re.flag & F_MS) != 0 && (i+start) > 0 {
|
|
||||||
return NO_MATCH_FOUND, 0
|
|
||||||
}
|
|
||||||
if (re.flag & F_ME) != 0 && (i+end) < in_txt.len {
|
|
||||||
return NO_MATCH_FOUND, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return i+start, i+end
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
i++
|
|
||||||
if re.flag == F_MS && i>0 {
|
|
||||||
return NO_MATCH_FOUND, 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return NO_MATCH_FOUND, 0
|
return NO_MATCH_FOUND, 0
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue