regex: small fixes, '-' minus to char classes, remove all C.printf

pull/4921/head
penguindark 2020-05-16 17:11:13 +02:00 committed by GitHub
parent 57dd26650c
commit 48659f4145
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 85 additions and 76 deletions

View File

@ -49,6 +49,8 @@ A cc can contain meta-chars like: `[a-z\d]` that matches all the lowercase latin
It is possible to mix all the properties of the char class together. It is possible to mix all the properties of the char class together.
**Note:** In order to match the `-` (minus) char, it must be located at the first position in the cc, for example `[-_\d\a]` will match `-` minus, `_`underscore, `\d` numeric chars, `\a` lower case chars.
### Meta-chars ### Meta-chars
A meta-char is specified by a backslash before a char like `\w` in this case the meta-char is `w`. A meta-char is specified by a backslash before a char like `\w` in this case the meta-char is `w`.

View File

@ -1,6 +1,6 @@
/********************************************************************** /**********************************************************************
* *
* regex 0.9d * regex 0.9e
* *
* Copyright (c) 2019-2020 Dario Deledda. All rights reserved. * Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
* Use of this source code is governed by an MIT license * Use of this source code is governed by an MIT license
@ -19,7 +19,7 @@ module regex
import strings import strings
pub const( pub const(
V_REGEX_VERSION = "0.9d" // regex module version V_REGEX_VERSION = "0.9e" // regex module version
MAX_CODE_LEN = 256 // default small base code len for the regex programs MAX_CODE_LEN = 256 // default small base code len for the regex programs
MAX_QUANTIFIER = 1073741824 // default max repetitions allowed for the quantifiers = 2^30 MAX_QUANTIFIER = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
@ -315,17 +315,17 @@ pub mut:
group_max int = 8 // max allowed number of different groups group_max int = 8 // max allowed number of different groups
group_csave []int = []int{} // groups continuous save array group_csave []int = []int{} // groups continuous save array
group_csave_index int= -1 // groups continuous save index group_csave_index int= -1 // groups continuous save index
group_map map[string]int // groups names map group_map map[string]int // groups names map
// flags // flags
flag int = 0 // flag for optional parameters flag int = 0 // flag for optional parameters
// Debug/log // Debug/log
debug int = 0 // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE debug int = 0 // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
log_func FnLog = simple_log // log function, can be customized by the user log_func FnLog = simple_log // log function, can be customized by the user
query string = "" // query string query string = "" // query string
} }
// Reset RE object // Reset RE object
@ -362,6 +362,7 @@ fn (re mut RE) reset_src(){
re.state_stack_index = -1 re.state_stack_index = -1
} }
// get_group get a group boundaries by its name
pub fn (re RE) get_group(group_name string) (int, int) { pub fn (re RE) get_group(group_name string) (int, int) {
if group_name in re.group_map { if group_name in re.group_map {
tmp_index := re.group_map[group_name]-1 tmp_index := re.group_map[group_name]-1
@ -379,7 +380,7 @@ pub fn (re RE) get_group(group_name string) (int, int) {
******************************************************************************/ ******************************************************************************/
struct BslsStruct { struct BslsStruct {
ch u32 // meta char ch u32 // meta char
validator FnValidator // validator function pointer validator FnValidator // validator function pointer
} }
const( const(
@ -395,7 +396,7 @@ const(
] ]
// these chars are escape if preceded by a \ // these chars are escape if preceded by a \
BSLS_ESCAPE_LIST = [ `\\`,`|`,`.`,`*`,`+`,`{`,`}`,`[`,`]` ] BSLS_ESCAPE_LIST = [ `\\`,`|`,`.`,`*`,`+`,`-`,`{`,`}`,`[`,`]` ]
) )
enum BSLS_parse_state { enum BSLS_parse_state {
@ -563,7 +564,7 @@ fn (re mut RE) parse_char_class(in_txt string, in_i int) (int, int, u32) {
char_tmp,char_len := re.get_char(in_txt,i) char_tmp,char_len := re.get_char(in_txt,i)
ch := byte(char_tmp) ch := byte(char_tmp)
//C.printf("CC #%3d ch: %c\n",i,ch) //println("CC #${i:3d} ch: ${ch:c}")
// negation // negation
if status == .start && ch == `^` { if status == .start && ch == `^` {
@ -572,19 +573,29 @@ fn (re mut RE) parse_char_class(in_txt string, in_i int) (int, int, u32) {
continue continue
} }
// minus symbol
if status == .start && ch == `-` {
re.cc[tmp_index].cc_type = CC_CHAR
re.cc[tmp_index].ch0 = char_tmp
re.cc[tmp_index].ch1 = char_tmp
i += char_len
tmp_index++
continue
}
// bsls // bsls
if (status == .start || status == .in_char) && ch == `\\` { if (status == .start || status == .in_char) && ch == `\\` {
//C.printf("CC bsls.\n") //println("CC bsls.")
status = .in_bsls status = .in_bsls
i += char_len i += char_len
continue continue
} }
if status == .in_bsls { if status == .in_bsls {
//C.printf("CC bsls validation.\n") //println("CC bsls validation.")
for c,x in BSLS_VALIDATOR_ARRAY { for c,x in BSLS_VALIDATOR_ARRAY {
if x.ch == ch { if x.ch == ch {
//C.printf("CC bsls found \\%c.\n",ch) //println("CC bsls found [${ch:c}]")
re.cc[tmp_index].cc_type = CC_BSLS re.cc[tmp_index].cc_type = CC_BSLS
re.cc[tmp_index].ch0 = BSLS_VALIDATOR_ARRAY[c].ch re.cc[tmp_index].ch0 = BSLS_VALIDATOR_ARRAY[c].ch
re.cc[tmp_index].ch1 = BSLS_VALIDATOR_ARRAY[c].ch re.cc[tmp_index].ch1 = BSLS_VALIDATOR_ARRAY[c].ch
@ -596,7 +607,7 @@ fn (re mut RE) parse_char_class(in_txt string, in_i int) (int, int, u32) {
} }
} }
if status == .in_bsls { if status == .in_bsls {
//C.printf("CC bsls not found \\%c.\n",ch) println("CC bsls not found [${ch:c}]")
status = .in_char status = .in_char
}else { }else {
continue continue
@ -680,7 +691,7 @@ fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int, bool) {
for i < in_txt.len { for i < in_txt.len {
ch = in_txt.str[i] ch = in_txt.str[i]
//C.printf("%c status: %d\n",ch,status) //println("${ch:c} status: $status")
// exit on no compatible char with {} quantifier // exit on no compatible char with {} quantifier
if utf8util_char_len(ch) != 1 { if utf8util_char_len(ch) != 1 {
@ -904,7 +915,7 @@ pub fn (re mut RE) compile(in_txt string) (int,int) {
tmp_code = u32(0) tmp_code = u32(0)
mut char_tmp := u32(0) mut char_tmp := u32(0)
mut char_len := 0 mut char_len := 0
//C.printf("i: %3d ch: %c\n", i, in_txt.str[i]) //println("i: ${i:3d} ch: ${in_txt.str[i]:c}")
char_tmp,char_len = re.get_char(in_txt,i) char_tmp,char_len = re.get_char(in_txt,i)
@ -1035,19 +1046,19 @@ pub fn (re mut RE) compile(in_txt string) (int,int) {
mut quant_flag := true mut quant_flag := true
match byte(char_tmp) { match byte(char_tmp) {
`?` { `?` {
//C.printf("q: %c\n",char_tmp) //println("q: ${char_tmp:c}")
re.prog[pc-1].rep_min = 0 re.prog[pc-1].rep_min = 0
re.prog[pc-1].rep_max = 1 re.prog[pc-1].rep_max = 1
} }
`+` { `+` {
//C.printf("q: %c\n",char_tmp) //println("q: ${char_tmp:c}")
re.prog[pc-1].rep_min = 1 re.prog[pc-1].rep_min = 1
re.prog[pc-1].rep_max = MAX_QUANTIFIER re.prog[pc-1].rep_max = MAX_QUANTIFIER
} }
`*` { `*` {
//C.printf("q: %c\n",char_tmp) //println("q: ${char_tmp:c}")
re.prog[pc-1].rep_min = 0 re.prog[pc-1].rep_min = 0
re.prog[pc-1].rep_max = MAX_QUANTIFIER re.prog[pc-1].rep_max = MAX_QUANTIFIER
} }
@ -1056,7 +1067,7 @@ pub fn (re mut RE) compile(in_txt string) (int,int) {
min, max, tmp, greedy := re.parse_quantifier(in_txt, i+1) min, max, tmp, greedy := re.parse_quantifier(in_txt, i+1)
// it is a quantifier // it is a quantifier
if min >= 0 { if min >= 0 {
//C.printf("{%d,%d}\n str:[%s] greedy: %d\n", min, max, in_txt[i..i+tmp], greedy) //println("{$min,$max}\n str:[${in_txt[i..i+tmp]}] greedy:$greedy")
i = i + tmp i = i + tmp
re.prog[pc-1].rep_min = min re.prog[pc-1].rep_min = min
re.prog[pc-1].rep_max = max re.prog[pc-1].rep_max = max
@ -1090,7 +1101,7 @@ pub fn (re mut RE) compile(in_txt string) (int,int) {
if byte(char_tmp) == `[` { if byte(char_tmp) == `[` {
cc_index,tmp,cc_type := re.parse_char_class(in_txt, i+1) cc_index,tmp,cc_type := re.parse_char_class(in_txt, i+1)
if cc_index >= 0 { if cc_index >= 0 {
//C.printf("index: %d str:%s\n",cc_index,in_txt[i..i+tmp]) //println("index: $cc_index str:${in_txt[i..i+tmp]}")
i = i + tmp i = i + tmp
re.prog[pc].ist = u32(0) | cc_type re.prog[pc].ist = u32(0) | cc_type
re.prog[pc].cc_index = cc_index re.prog[pc].cc_index = cc_index
@ -1111,7 +1122,7 @@ pub fn (re mut RE) compile(in_txt string) (int,int) {
if char_len==1 && pc >= 0{ if char_len==1 && pc >= 0{
if byte(char_tmp) == `\\` { if byte(char_tmp) == `\\` {
bsls_index,tmp := re.parse_bsls(in_txt,i) bsls_index,tmp := re.parse_bsls(in_txt,i)
//C.printf("index: %d str:%s\n",bsls_index,in_txt[i..i+tmp]) //println("index: $bsls_index str:${in_txt[i..i+tmp]}")
if bsls_index >= 0 { if bsls_index >= 0 {
i = i + tmp i = i + tmp
re.prog[pc].ist = u32(0) | IST_BSLS_CHAR re.prog[pc].ist = u32(0) | IST_BSLS_CHAR
@ -1141,7 +1152,7 @@ pub fn (re mut RE) compile(in_txt string) (int,int) {
re.prog[pc].ch_len = char_len re.prog[pc].ch_len = char_len
re.prog[pc].rep_min = 1 re.prog[pc].rep_min = 1
re.prog[pc].rep_max = 1 re.prog[pc].rep_max = 1
//C.printf("char: %c\n",char_tmp) //println("char: ${char_tmp:c}")
pc = pc +1 pc = pc +1
i+=char_len i+=char_len
@ -1215,7 +1226,7 @@ pub fn (re mut RE) compile(in_txt string) (int,int) {
} }
pc2++ pc2++
} }
//C.printf("Compile OR postproc. [%d,OR %d,%d]\n",pc1,pc1+1,pc2) //println("Compile OR postproc. [$pc1,OR ${pc1+1},$pc2]")
pc1 = pc2 pc1 = pc2
continue continue
} }
@ -1496,7 +1507,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
if pc >= 0 && pc < re.prog.len { if pc >= 0 && pc < re.prog.len {
ist = re.prog[pc].ist ist = re.prog[pc].ist
}else if pc >= re.prog.len { }else if pc >= re.prog.len {
//C.printf("ERROR!! PC overflow!!\n") //println("ERROR!! PC overflow!!")
return ERR_INTERNAL_ERROR, i return ERR_INTERNAL_ERROR, i
} }
@ -1578,19 +1589,13 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// manage groups // manage groups
if group_index >= 0 && state.match_index >= 0 { if group_index >= 0 && state.match_index >= 0 {
//C.printf("End text with open groups!\n") //println("End text with open groups!")
// close the groups // close the groups
for group_index >= 0 { for group_index >= 0 {
tmp_pc := group_data[group_index] tmp_pc := group_data[group_index]
re.prog[tmp_pc].group_rep++ re.prog[tmp_pc].group_rep++
/* //println("Closing group $group_index {${re.prog[tmp_pc].rep_min},${re.prog[tmp_pc].rep_max}}:${re.prog[tmp_pc].group_rep}")
C.printf("Closing group %d {%d,%d}:%d\n",
group_index,
re.prog[tmp_pc].rep_min,
re.prog[tmp_pc].rep_max,
re.prog[tmp_pc].group_rep
)
*/
if re.prog[tmp_pc].group_rep >= re.prog[tmp_pc].rep_min && re.prog[tmp_pc].group_id >= 0{ if re.prog[tmp_pc].group_rep >= re.prog[tmp_pc].rep_min && re.prog[tmp_pc].group_id >= 0{
start_i := group_stack[group_index] start_i := group_stack[group_index]
group_stack[group_index]=-1 group_stack[group_index]=-1
@ -1644,7 +1649,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
re.prog[pc].reset() re.prog[pc].reset()
// check if we are in the program bounds // check if we are in the program bounds
if pc < 0 || pc > re.prog.len { if pc < 0 || pc > re.prog.len {
//C.printf("ERROR!! PC overflow!!\n") //println("ERROR!! PC overflow!!")
return ERR_INTERNAL_ERROR, i return ERR_INTERNAL_ERROR, i
} }
m_state = .ist_load m_state = .ist_load
@ -1656,7 +1661,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
pc = pc + 1 pc = pc + 1
// check if we are in the program bounds // check if we are in the program bounds
if pc < 0 || pc > re.prog.len { if pc < 0 || pc > re.prog.len {
//C.printf("ERROR!! PC overflow!!\n") //println("ERROR!! PC overflow!!")
return ERR_INTERNAL_ERROR, i return ERR_INTERNAL_ERROR, i
} }
m_state = .ist_load m_state = .ist_load
@ -1687,7 +1692,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
} }
// if we are in restore state ,do it and restart // if we are in restore state ,do it and restart
//C.printf("re.state_stack_index %d\n",re.state_stack_index ) //println("re.state_stack_index ${re.state_stack_index}")
if re.state_stack_index >=0 && re.state_stack[re.state_stack_index].pc >= 0 { if re.state_stack_index >=0 && re.state_stack[re.state_stack_index].pc >= 0 {
i = re.state_stack[re.state_stack_index].i i = re.state_stack[re.state_stack_index].i
pc = re.state_stack[re.state_stack_index].pc pc = re.state_stack[re.state_stack_index].pc
@ -1718,7 +1723,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
} }
// we have a DOT MATCH on going // we have a DOT MATCH on going
//C.printf("IST_PROG_END l_ist: %08x\n", l_ist) //println("IST_PROG_END l_ist: ${l_ist:08x}", l_ist)
if re.state_stack_index>=0 && l_ist == IST_DOT_CHAR { if re.state_stack_index>=0 && l_ist == IST_DOT_CHAR {
m_state = .stop m_state = .stop
continue continue
@ -1735,7 +1740,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
group_index++ group_index++
group_data[group_index] = re.prog[pc].goto_pc // save where is IST_GROUP_END, we will use it for escape group_data[group_index] = re.prog[pc].goto_pc // save where is IST_GROUP_END, we will use it for escape
group_stack[group_index]=i // index where we start to manage group_stack[group_index]=i // index where we start to manage
//C.printf("group_index %d rep %d\n", group_index, re.prog[re.prog[pc].goto_pc].group_rep) //println("group_index $group_index rep ${re.prog[re.prog[pc].goto_pc].group_rep}")
m_state = .ist_next m_state = .ist_next
continue continue
@ -1747,7 +1752,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
if state.match_index >= 0 { if state.match_index >= 0 {
// restore txt index stack and save the group data // restore txt index stack and save the group data
//C.printf("g.id: %d group_index: %d\n", re.prog[pc].group_id, group_index) //println("g.id: ${re.prog[pc].group_id} group_index: ${group_index}")
if group_index >= 0 && re.prog[pc].group_id >= 0 { if group_index >= 0 && re.prog[pc].group_id >= 0 {
start_i := group_stack[group_index] start_i := group_stack[group_index]
//group_stack[group_index]=-1 //group_stack[group_index]=-1
@ -1760,7 +1765,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
re.groups[g_index] = 0 re.groups[g_index] = 0
} }
re.groups[g_index+1] = i re.groups[g_index+1] = i
//C.printf("GROUP %d END [%d, %d]\n", re.prog[pc].group_id, re.groups[g_index], re.groups[g_index+1]) //println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]")
// continuous save, save until we have space // continuous save, save until we have space
if re.group_csave_index > 0 { if re.group_csave_index > 0 {
@ -1777,7 +1782,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
} }
re.prog[pc].group_rep++ // increase repetitions re.prog[pc].group_rep++ // increase repetitions
//C.printf("GROUP %d END %d\n", group_index, re.prog[pc].group_rep) //println("GROUP $group_index END ${re.prog[pc].group_rep}")
m_state = .ist_quant_pg m_state = .ist_quant_pg
continue continue
@ -1791,10 +1796,10 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
else if ist == IST_OR_BRANCH { else if ist == IST_OR_BRANCH {
if state.match_index >= 0 { if state.match_index >= 0 {
pc = re.prog[pc].rep_max pc = re.prog[pc].rep_max
//C.printf("IST_OR_BRANCH True pc: %d\n", pc) //println("IST_OR_BRANCH True pc: $pc")
}else{ }else{
pc = re.prog[pc].rep_min pc = re.prog[pc].rep_min
//C.printf("IST_OR_BRANCH False pc: %d\n", pc) //println("IST_OR_BRANCH False pc: $pc")
} }
re.prog[pc].reset() re.prog[pc].reset()
m_state == .ist_load m_state == .ist_load
@ -1803,7 +1808,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// check IST_DOT_CHAR // check IST_DOT_CHAR
else if ist == IST_DOT_CHAR { else if ist == IST_DOT_CHAR {
//C.printf("IST_DOT_CHAR rep: %d\n", re.prog[pc].rep) //println("IST_DOT_CHAR rep: ${re.prog[pc].rep}")
state.match_flag = true state.match_flag = true
l_ist = u32(IST_DOT_CHAR) l_ist = u32(IST_DOT_CHAR)
@ -1815,7 +1820,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
//if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max { //if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max {
if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max { if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max {
//C.printf("DOT CHAR save state : %d\n", re.state_stack_index) //println("DOT CHAR save state : ${re.state_stack_index}")
// save the state // save the state
// manage first dot char // manage first dot char
@ -1884,7 +1889,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
else if ist == IST_BSLS_CHAR { else if ist == IST_BSLS_CHAR {
state.match_flag = false state.match_flag = false
tmp_res := re.prog[pc].validator(byte(ch)) tmp_res := re.prog[pc].validator(byte(ch))
//C.printf("BSLS in_ch: %c res: %d\n", ch, tmp_res) //println("BSLS in_ch: ${ch:c} res: $tmp_res")
if tmp_res { if tmp_res {
state.match_flag = true state.match_flag = true
l_ist = u32(IST_BSLS_CHAR) l_ist = u32(IST_BSLS_CHAR)
@ -1906,7 +1911,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// simple char IST // simple char IST
else if ist == IST_SIMPLE_CHAR { else if ist == IST_SIMPLE_CHAR {
//C.printf("IST_SIMPLE_CHAR\n") //println("IST_SIMPLE_CHAR")
state.match_flag = false state.match_flag = false
if re.prog[pc].ch == ch if re.prog[pc].ch == ch
@ -1917,7 +1922,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
if first_match < 0 { if first_match < 0 {
first_match = i first_match = i
} }
//C.printf("state.match_index: %d\n", state.match_index) //println("state.match_index: ${state.match_index}")
state.match_index = i state.match_index = i
re.prog[pc].rep++ // increase repetitions re.prog[pc].rep++ // increase repetitions
@ -1929,7 +1934,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
continue continue
} }
/* UNREACHABLE */ /* UNREACHABLE */
//C.printf("PANIC2!! state: %d\n", m_state) //println("PANIC2!! state: $m_state")
return ERR_INTERNAL_ERROR, i return ERR_INTERNAL_ERROR, i
} }
@ -1942,7 +1947,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// we are finished here // we are finished here
if group_index < 0 { if group_index < 0 {
//C.printf("Early stop!\n") //println("Early stop!")
result = NO_MATCH_FOUND result = NO_MATCH_FOUND
m_state = .stop m_state = .stop
continue continue
@ -1952,10 +1957,10 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
rep := re.prog[tmp_pc].group_rep // use a temp variable rep := re.prog[tmp_pc].group_rep // use a temp variable
re.prog[tmp_pc].group_rep = 0 // clear the repetitions re.prog[tmp_pc].group_rep = 0 // clear the repetitions
//C.printf(".ist_quant_ng group_pc_end: %d rep: %d\n", tmp_pc,rep) //println(".ist_quant_ng group_pc_end: $tmp_pc rep: $rep")
if rep >= re.prog[tmp_pc].rep_min { if rep >= re.prog[tmp_pc].rep_min {
//C.printf("ist_quant_ng GROUP CLOSED OK group_index: %d\n", group_index) //println("ist_quant_ng GROUP CLOSED OK group_index: $group_index")
i = group_stack[group_index] i = group_stack[group_index]
pc = tmp_pc pc = tmp_pc
@ -1964,7 +1969,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
continue continue
} }
else if re.prog[tmp_pc].next_is_or { else if re.prog[tmp_pc].next_is_or {
//C.printf("ist_quant_ng OR Negative branch\n") //println("ist_quant_ng OR Negative branch")
i = group_stack[group_index] i = group_stack[group_index]
pc = re.prog[tmp_pc+1].rep_min -1 pc = re.prog[tmp_pc+1].rep_min -1
@ -1973,7 +1978,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
continue continue
} }
else if rep>0 && rep < re.prog[tmp_pc].rep_min { else if rep>0 && rep < re.prog[tmp_pc].rep_min {
//C.printf("ist_quant_ng UNDER THE MINIMUM g.i: %d\n", group_index) //println("ist_quant_ng UNDER THE MINIMUM g.i: $group_index")
// check if we are inside a group, if yes exit from the nested groups // check if we are inside a group, if yes exit from the nested groups
if group_index > 0{ if group_index > 0{
@ -1995,7 +2000,7 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
continue continue
} }
else if rep==0 && rep < re.prog[tmp_pc].rep_min { else if rep==0 && rep < re.prog[tmp_pc].rep_min {
//C.printf("ist_quant_ng ZERO UNDER THE MINIMUM g.i: %d\n", group_index) //println("ist_quant_ng ZERO UNDER THE MINIMUM g.i: $group_index")
if group_index > 0{ if group_index > 0{
group_index-- group_index--
@ -2009,14 +2014,14 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
continue continue
} }
//C.printf("DO NOT STAY HERE!! {%d,%d}:%d\n", re.prog[tmp_pc].rep_min, re.prog[tmp_pc].rep_max, rep) //println("DO NOT STAY HERE!! {${re.prog[tmp_pc].rep_min},${re.prog[tmp_pc].rep_max}}:$rep")
/* UNREACHABLE */ /* UNREACHABLE */
return ERR_INTERNAL_ERROR, i return ERR_INTERNAL_ERROR, i
} }
// ist_quant_pg // ist_quant_pg
else if m_state == .ist_quant_pg { else if m_state == .ist_quant_pg {
//C.printf(".ist_quant_pg\n") //println(".ist_quant_pg")
mut tmp_pc := pc mut tmp_pc := pc
if group_index >= 0 { if group_index >= 0 {
tmp_pc = group_data[group_index] tmp_pc = group_data[group_index]
@ -2025,20 +2030,20 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
rep := re.prog[tmp_pc].group_rep rep := re.prog[tmp_pc].group_rep
if rep < re.prog[tmp_pc].rep_min { if rep < re.prog[tmp_pc].rep_min {
//C.printf("ist_quant_pg UNDER RANGE\n") //println("ist_quant_pg UNDER RANGE")
pc = re.prog[tmp_pc].goto_pc pc = re.prog[tmp_pc].goto_pc
m_state = .ist_next m_state = .ist_next
continue continue
} }
else if rep == re.prog[tmp_pc].rep_max { else if rep == re.prog[tmp_pc].rep_max {
//C.printf("ist_quant_pg MAX RANGE\n") //println("ist_quant_pg MAX RANGE")
re.prog[tmp_pc].group_rep = 0 // clear the repetitions re.prog[tmp_pc].group_rep = 0 // clear the repetitions
group_index-- group_index--
m_state = .ist_next m_state = .ist_next
continue continue
} }
else if rep >= re.prog[tmp_pc].rep_min { else if rep >= re.prog[tmp_pc].rep_min {
//C.printf("ist_quant_pg IN RANGE group_index:%d\n", group_index) //println("ist_quant_pg IN RANGE group_index:$group_index")
// check greedy flag, if true exit on minimum // check greedy flag, if true exit on minimum
if re.prog[tmp_pc].greedy == true { if re.prog[tmp_pc].greedy == true {
@ -2055,31 +2060,31 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
} }
/* UNREACHABLE */ /* UNREACHABLE */
//C.printf("PANIC3!! state: %d\n", m_state) //println("PANIC3!! state: $m_state")
return ERR_INTERNAL_ERROR, i return ERR_INTERNAL_ERROR, i
} }
// ist_quant_n // ist_quant_n
else if m_state == .ist_quant_n { else if m_state == .ist_quant_n {
rep := re.prog[pc].rep rep := re.prog[pc].rep
//C.printf("Here!! PC %d is_next_or: %d \n", pc, re.prog[pc].next_is_or) //println("Here!! PC $pc is_next_or: ${re.prog[pc].next_is_or}")
// zero quantifier * or ? // zero quantifier * or ?
if rep == 0 && re.prog[pc].rep_min == 0 { if rep == 0 && re.prog[pc].rep_min == 0 {
//C.printf("ist_quant_n ZERO RANGE MIN\n") //println("ist_quant_n ZERO RANGE MIN")
m_state = .ist_next // go to next ist m_state = .ist_next // go to next ist
continue continue
} }
// match + or * // match + or *
else if rep >= re.prog[pc].rep_min { else if rep >= re.prog[pc].rep_min {
//C.printf("ist_quant_n MATCH RANGE\n") //println("ist_quant_n MATCH RANGE")
m_state = .ist_next m_state = .ist_next
continue continue
} }
// check the OR if present // check the OR if present
if re.prog[pc].next_is_or { if re.prog[pc].next_is_or {
//C.printf("OR present on failing\n") //println("OR present on failing")
state.match_index = -1 state.match_index = -1
m_state = .ist_next m_state = .ist_next
continue continue
@ -2087,13 +2092,13 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// we are in a group manage no match from here // we are in a group manage no match from here
if group_index >= 0 { if group_index >= 0 {
//C.printf("ist_quant_n FAILED insied a GROUP group_index:%d\n", group_index) //println("ist_quant_n FAILED insied a GROUP group_index:$group_index")
m_state = .ist_quant_ng m_state = .ist_quant_ng
continue continue
} }
// no other options // no other options
//C.printf("ist_quant_n NO_MATCH_FOUND\n") //println("ist_quant_n NO_MATCH_FOUND")
result = NO_MATCH_FOUND result = NO_MATCH_FOUND
m_state = .stop m_state = .stop
continue continue
@ -2111,14 +2116,14 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// under range // under range
if rep > 0 && rep < re.prog[pc].rep_min { if rep > 0 && rep < re.prog[pc].rep_min {
//C.printf("ist_quant_p UNDER RANGE\n") //println("ist_quant_p UNDER RANGE")
m_state = .ist_load // continue the loop m_state = .ist_load // continue the loop
continue continue
} }
// range ok, continue loop // range ok, continue loop
else if rep >= re.prog[pc].rep_min && rep < re.prog[pc].rep_max { else if rep >= re.prog[pc].rep_min && rep < re.prog[pc].rep_max {
//C.printf("ist_quant_p IN RANGE\n") //println("ist_quant_p IN RANGE")
// check greedy flag, if true exit on minimum // check greedy flag, if true exit on minimum
if re.prog[pc].greedy == true { if re.prog[pc].greedy == true {
@ -2131,28 +2136,28 @@ pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
// max reached // max reached
else if rep == re.prog[pc].rep_max { else if rep == re.prog[pc].rep_max {
//C.printf("ist_quant_p MAX RANGE\n") //println("ist_quant_p MAX RANGE")
m_state = .ist_next m_state = .ist_next
continue continue
} }
} }
/* UNREACHABLE */ /* UNREACHABLE */
//C.printf("PANIC4!! state: %d\n", m_state) //println("PANIC4!! state: $m_state")
return ERR_INTERNAL_ERROR, i return ERR_INTERNAL_ERROR, i
} }
// Check the results // Check the results
if state.match_index >= 0 { if state.match_index >= 0 {
if group_index < 0 { if group_index < 0 {
//C.printf("OK match,natural end [%d,%d]\n", first_match, i) //println("OK match,natural end [$first_match,$i]")
return first_match, i return first_match, i
} else { } else {
//C.printf("Skip last group\n") //println("Skip last group")
return first_match,group_stack[group_index--] return first_match,group_stack[group_index--]
} }
} }
//C.printf("NO_MATCH_FOUND, natural end\n") //println("NO_MATCH_FOUND, natural end")
return NO_MATCH_FOUND, 0 return NO_MATCH_FOUND, 0
} }

View File

@ -72,6 +72,8 @@ match_test_suite = [
TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11}, TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11},
TestItem{" abb",r"\s(.*)",0,4}, TestItem{" abb",r"\s(.*)",0,4},
TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29}
// negative // negative
TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0}, TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
@ -81,6 +83,7 @@ match_test_suite = [
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0}, TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
TestItem{"/home/us_er/pippo/info-01.jpeg", r"(/?[-\w_]+)*\.txt$",-1,0}
// check unicode // check unicode
TestItem{"this is a test",r".*a [-Ⅵ ]+",0,34}, TestItem{"this is a test",r".*a [-Ⅵ ]+",0,34},
@ -94,7 +97,6 @@ struct TestItemFa {
r []int r []int
} }
const ( const (
match_test_suite_fa = [ match_test_suite_fa = [
// find_all tests // find_all tests