regex: fix compilation issues with gcc under ubuntu (#7112)
							parent
							
								
									793f9ae9e3
								
							
						
					
					
						commit
						15ffce1317
					
				| 
						 | 
					@ -27,7 +27,7 @@ pub const(
 | 
				
			||||||
	// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
 | 
						// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
 | 
				
			||||||
	spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
 | 
						spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
 | 
				
			||||||
	// new line chars for now only '\n'
 | 
						// new line chars for now only '\n'
 | 
				
			||||||
	new_line_list = [`\n`,`\r`]
 | 
						new_line_list = [`\n`, `\r`]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Results
 | 
						// Results
 | 
				
			||||||
	no_match_found          = -1
 | 
						no_match_found          = -1
 | 
				
			||||||
| 
						 | 
					@ -49,7 +49,7 @@ const(
 | 
				
			||||||
	//*************************************
 | 
						//*************************************
 | 
				
			||||||
	// regex program instructions
 | 
						// regex program instructions
 | 
				
			||||||
	//*************************************
 | 
						//*************************************
 | 
				
			||||||
	ist_simple_char  = u32(0x7FFFFFFF)   // single char instruction, 31 bit available to char
 | 
						ist_simple_char  = u32(0x7FFFFFFF)  // single char instruction, 31 bit available to char
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// char class 11 0100 AA xxxxxxxx
 | 
						// char class 11 0100 AA xxxxxxxx
 | 
				
			||||||
	// AA = 00  regular class
 | 
						// AA = 00  regular class
 | 
				
			||||||
| 
						 | 
					@ -92,9 +92,7 @@ fn utf8util_char_len(b byte) int {
 | 
				
			||||||
fn (re RE) get_char(in_txt string, i int) (u32,int) {
 | 
					fn (re RE) get_char(in_txt string, i int) (u32,int) {
 | 
				
			||||||
	ini := unsafe {in_txt.str[i]}
 | 
						ini := unsafe {in_txt.str[i]}
 | 
				
			||||||
	// ascii 8 bit
 | 
						// ascii 8 bit
 | 
				
			||||||
	if (re.flag & f_bin) !=0 ||
 | 
						if (re.flag & f_bin) !=0 ||	ini & 0x80 == 0 {
 | 
				
			||||||
		ini & 0x80 == 0
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		return u32(ini), 1
 | 
							return u32(ini), 1
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	// unicode char
 | 
						// unicode char
 | 
				
			||||||
| 
						 | 
					@ -102,7 +100,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {
 | 
				
			||||||
	mut tmp := 0
 | 
						mut tmp := 0
 | 
				
			||||||
	mut ch := u32(0)
 | 
						mut ch := u32(0)
 | 
				
			||||||
	for tmp < char_len {
 | 
						for tmp < char_len {
 | 
				
			||||||
		ch = (ch << 8) | unsafe {in_txt.str[i+tmp]}
 | 
							ch = (ch << 8) | unsafe {in_txt.str[i + tmp]}
 | 
				
			||||||
		tmp++
 | 
							tmp++
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return ch,char_len
 | 
						return ch,char_len
 | 
				
			||||||
| 
						 | 
					@ -112,9 +110,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {
 | 
				
			||||||
[inline]
 | 
					[inline]
 | 
				
			||||||
fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 | 
					fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 | 
				
			||||||
	// ascii 8 bit
 | 
						// ascii 8 bit
 | 
				
			||||||
	if (re.flag & f_bin) !=0 ||
 | 
						if (re.flag & f_bin) !=0 ||	unsafe {in_txt[i]} & 0x80 == 0 {
 | 
				
			||||||
		unsafe {in_txt[i]} & 0x80 == 0
 | 
					 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		return u32(unsafe {in_txt[i]}), 1
 | 
							return u32(unsafe {in_txt[i]}), 1
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	// unicode char
 | 
						// unicode char
 | 
				
			||||||
| 
						 | 
					@ -122,7 +118,7 @@ fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 | 
				
			||||||
	mut tmp := 0
 | 
						mut tmp := 0
 | 
				
			||||||
	mut ch := u32(0)
 | 
						mut ch := u32(0)
 | 
				
			||||||
	for tmp < char_len {
 | 
						for tmp < char_len {
 | 
				
			||||||
		ch = (ch << 8) | unsafe {in_txt[i+tmp]}
 | 
							ch = (ch << 8) | unsafe {in_txt[i + tmp]}
 | 
				
			||||||
		tmp++
 | 
							tmp++
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return ch,char_len
 | 
						return ch,char_len
 | 
				
			||||||
| 
						 | 
					@ -131,11 +127,11 @@ fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 | 
				
			||||||
[inline]
 | 
					[inline]
 | 
				
			||||||
fn is_alnum(in_char byte) bool {
 | 
					fn is_alnum(in_char byte) bool {
 | 
				
			||||||
	mut tmp := in_char - `A`
 | 
						mut tmp := in_char - `A`
 | 
				
			||||||
	if tmp >= 0x00 && tmp <= 25 { return true }
 | 
						if tmp <= 25 { return true }
 | 
				
			||||||
	tmp = in_char - `a`
 | 
						tmp = in_char - `a`
 | 
				
			||||||
	if tmp >= 0x00 && tmp <= 25 { return true }
 | 
						if tmp <= 25 { return true }
 | 
				
			||||||
	tmp = in_char - `0`
 | 
						tmp = in_char - `0`
 | 
				
			||||||
	if tmp >= 0x00 && tmp <= 9  { return true }
 | 
						if tmp <= 9  { return true }
 | 
				
			||||||
	if tmp == `_` { return true }
 | 
						if tmp == `_` { return true }
 | 
				
			||||||
	return false
 | 
						return false
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -158,7 +154,7 @@ fn is_not_space(in_char byte) bool {
 | 
				
			||||||
[inline]
 | 
					[inline]
 | 
				
			||||||
fn is_digit(in_char byte) bool {
 | 
					fn is_digit(in_char byte) bool {
 | 
				
			||||||
	tmp := in_char - `0`
 | 
						tmp := in_char - `0`
 | 
				
			||||||
	return tmp <= 0x09 && tmp >= 0
 | 
						return tmp <= 0x09
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[inline]
 | 
					[inline]
 | 
				
			||||||
| 
						 | 
					@ -179,13 +175,13 @@ fn is_not_wordchar(in_char byte) bool {
 | 
				
			||||||
[inline]
 | 
					[inline]
 | 
				
			||||||
fn is_lower(in_char byte) bool {
 | 
					fn is_lower(in_char byte) bool {
 | 
				
			||||||
	tmp := in_char - `a`
 | 
						tmp := in_char - `a`
 | 
				
			||||||
	return  tmp >= 0x00 && tmp <= 25
 | 
						return tmp <= 25
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[inline]
 | 
					[inline]
 | 
				
			||||||
fn is_upper(in_char byte) bool {
 | 
					fn is_upper(in_char byte) bool {
 | 
				
			||||||
	tmp := in_char - `A`
 | 
						tmp := in_char - `A`
 | 
				
			||||||
	return  tmp >= 0x00 && tmp <= 25
 | 
						return tmp <= 25
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn (re RE) get_parse_error_string(err int) string {
 | 
					pub fn (re RE) get_parse_error_string(err int) string {
 | 
				
			||||||
| 
						 | 
					@ -211,7 +207,7 @@ fn utf8_str(ch rune) string {
 | 
				
			||||||
	mut i := 4
 | 
						mut i := 4
 | 
				
			||||||
	mut res := ""
 | 
						mut res := ""
 | 
				
			||||||
	for i > 0 {
 | 
						for i > 0 {
 | 
				
			||||||
		v := byte((ch >> ((i-1)*8)) & 0xFF)
 | 
							v := byte((ch >> ((i - 1) * 8)) & 0xFF)
 | 
				
			||||||
		if v != 0{
 | 
							if v != 0{
 | 
				
			||||||
			res += "${v:1c}"
 | 
								res += "${v:1c}"
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -236,30 +232,30 @@ mut:
 | 
				
			||||||
	ist rune
 | 
						ist rune
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// char
 | 
						// char
 | 
				
			||||||
	ch rune   // char of the token if any
 | 
						ch rune                     // char of the token if any
 | 
				
			||||||
	ch_len byte             // char len
 | 
						ch_len byte                 // char len
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Quantifiers / branch
 | 
						// Quantifiers / branch
 | 
				
			||||||
	rep_min         int         // used also for jump next in the OR branch [no match] pc jump
 | 
						rep_min         int         // used also for jump next in the OR branch [no match] pc jump
 | 
				
			||||||
	rep_max         int         // used also for jump next in the OR branch [   match] pc jump
 | 
						rep_max         int         // used also for jump next in the OR branch [   match] pc jump
 | 
				
			||||||
	greedy          bool    // greedy quantifier flag
 | 
						greedy          bool        // greedy quantifier flag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Char class
 | 
						// Char class
 | 
				
			||||||
	cc_index        int    = -1
 | 
						cc_index        int = -1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// counters for quantifier check (repetitions)
 | 
						// counters for quantifier check (repetitions)
 | 
				
			||||||
	rep             int
 | 
						rep             int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// validator function pointer
 | 
						// validator function pointer
 | 
				
			||||||
	validator FnValidator
 | 
						validator       FnValidator
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// groups variables
 | 
						// groups variables
 | 
				
			||||||
	group_rep          int      // repetition of the group
 | 
						group_rep       int        // repetition of the group
 | 
				
			||||||
	group_id           int = -1    // id of the group
 | 
						group_id        int = -1   // id of the group
 | 
				
			||||||
	goto_pc            int = -1    // jump to this PC if is needed
 | 
						goto_pc         int = -1   // jump to this PC if is needed
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// OR flag for the token
 | 
						// OR flag for the token
 | 
				
			||||||
	next_is_or bool        // true if the next token is an OR
 | 
						next_is_or      bool       // true if the next token is an OR
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[inline]
 | 
					[inline]
 | 
				
			||||||
| 
						 | 
					@ -310,34 +306,34 @@ pub mut:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// groups
 | 
						// groups
 | 
				
			||||||
	group_count int        // number of groups in this regex struct
 | 
						group_count       int              // number of groups in this regex struct
 | 
				
			||||||
	groups []int               // groups index results
 | 
						groups            []int            // groups index results
 | 
				
			||||||
	group_max_nested int = 3   // max nested group
 | 
						group_max_nested  int  = 3         // max nested group
 | 
				
			||||||
	group_max int        = 8   // max allowed number of different groups
 | 
						group_max         int  = 8         // max allowed number of different groups
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	group_csave []int    = []int{}  // groups continuous save array
 | 
						group_csave       []int = []int{}  // groups continuous save array
 | 
				
			||||||
	group_csave_index int= -1       // groups continuous save index
 | 
						group_csave_index int = -1         // groups continuous save index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	group_map map[string]int   // groups names map
 | 
						group_map         map[string]int   // groups names map
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// flags
 | 
						// flags
 | 
				
			||||||
	flag int                   // flag for optional parameters
 | 
						flag              int              // flag for optional parameters
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Debug/log
 | 
						// Debug/log
 | 
				
			||||||
	debug int                          // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
 | 
						debug             int             // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
 | 
				
			||||||
	log_func FnLog       = simple_log  // log function, can be customized by the user
 | 
						log_func          FnLog = simple_log  // log function, can be customized by the user
 | 
				
			||||||
	query string                   // query string
 | 
						query             string          // query string
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Reset RE object
 | 
					// Reset RE object
 | 
				
			||||||
//[inline]
 | 
					//[inline]
 | 
				
			||||||
fn (mut re RE) reset(){
 | 
					fn (mut re RE) reset(){
 | 
				
			||||||
	re.cc_index         = 0
 | 
						re.cc_index = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut i := 0
 | 
						mut i := 0
 | 
				
			||||||
	for i < re.prog.len {
 | 
						for i < re.prog.len {
 | 
				
			||||||
		re.prog[i].group_rep          = 0 // clear repetition of the group
 | 
							re.prog[i].group_rep = 0 // clear repetition of the group
 | 
				
			||||||
		re.prog[i].rep                = 0 // clear repetition of the token
 | 
							re.prog[i].rep       = 0 // clear repetition of the token
 | 
				
			||||||
		i++
 | 
							i++
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	re.groups = [-1].repeat(re.group_count*2)
 | 
						re.groups = [-1].repeat(re.group_count*2)
 | 
				
			||||||
| 
						 | 
					@ -347,7 +343,7 @@ fn (mut re RE) reset(){
 | 
				
			||||||
	// reset group_csave
 | 
						// reset group_csave
 | 
				
			||||||
	if re.group_csave.len > 0 {
 | 
						if re.group_csave.len > 0 {
 | 
				
			||||||
		re.group_csave_index = 1
 | 
							re.group_csave_index = 1
 | 
				
			||||||
		re.group_csave[0] = 0     // reset the capture count
 | 
							re.group_csave[0]    = 0 // reset the capture count
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -356,8 +352,8 @@ fn (mut re RE) reset(){
 | 
				
			||||||
fn (mut re RE) reset_src(){
 | 
					fn (mut re RE) reset_src(){
 | 
				
			||||||
	mut i := 0
 | 
						mut i := 0
 | 
				
			||||||
	for i < re.prog.len {
 | 
						for i < re.prog.len {
 | 
				
			||||||
		re.prog[i].group_rep          = 0 // clear repetition of the group
 | 
							re.prog[i].group_rep = 0 // clear repetition of the group
 | 
				
			||||||
		re.prog[i].rep                = 0 // clear repetition of the token
 | 
							re.prog[i].rep       = 0 // clear repetition of the token
 | 
				
			||||||
		i++
 | 
							i++
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	re.state_stack_index = -1
 | 
						re.state_stack_index = -1
 | 
				
			||||||
| 
						 | 
					@ -367,8 +363,8 @@ fn (mut re RE) reset_src(){
 | 
				
			||||||
pub fn (re RE) get_group(group_name string) (int, int) {
 | 
					pub fn (re RE) get_group(group_name string) (int, int) {
 | 
				
			||||||
	if group_name in re.group_map {
 | 
						if group_name in re.group_map {
 | 
				
			||||||
		tmp_index := re.group_map[group_name]-1
 | 
							tmp_index := re.group_map[group_name]-1
 | 
				
			||||||
		start := re.groups[tmp_index*2]
 | 
							start     := re.groups[tmp_index * 2]
 | 
				
			||||||
		end := re.groups[tmp_index*2+1]
 | 
							end       := re.groups[tmp_index * 2 + 1]
 | 
				
			||||||
		return start,end
 | 
							return start,end
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return -1, -1
 | 
						return -1, -1
 | 
				
			||||||
| 
						 | 
					@ -397,7 +393,7 @@ const(
 | 
				
			||||||
	]
 | 
						]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// these chars are escape if preceded by a \
 | 
						// these chars are escape if preceded by a \
 | 
				
			||||||
	bsls_escape_list = [ `\\`,`|`,`.`,`*`,`+`,`-`,`{`,`}`,`[`,`]` ]
 | 
						bsls_escape_list = [`\\`, `|`, `.`, `*`, `+`, `-`, `{`, `}`, `[`, `]`]
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum BSLS_parse_state {
 | 
					enum BSLS_parse_state {
 | 
				
			||||||
| 
						 | 
					@ -414,7 +410,7 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for i < in_txt.len {
 | 
						for i < in_txt.len {
 | 
				
			||||||
		// get our char
 | 
							// get our char
 | 
				
			||||||
		char_tmp,char_len := re.get_char(in_txt,i)
 | 
							char_tmp, char_len := re.get_char(in_txt, i)
 | 
				
			||||||
		ch := byte(char_tmp)
 | 
							ch := byte(char_tmp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if status == .start && ch == `\\` {
 | 
							if status == .start && ch == `\\` {
 | 
				
			||||||
| 
						 | 
					@ -427,7 +423,7 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
 | 
				
			||||||
		if status == .bsls_found {
 | 
							if status == .bsls_found {
 | 
				
			||||||
			for c,x in bsls_validator_array {
 | 
								for c,x in bsls_validator_array {
 | 
				
			||||||
				if x.ch == ch {
 | 
									if x.ch == ch {
 | 
				
			||||||
					return c,i-in_i+1
 | 
										return c, i-in_i+1
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			status = .normal_char
 | 
								status = .normal_char
 | 
				
			||||||
| 
						 | 
					@ -437,9 +433,9 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
 | 
				
			||||||
		// no BSLS validator, manage as normal escape char char
 | 
							// no BSLS validator, manage as normal escape char char
 | 
				
			||||||
		if status == .normal_char {
 | 
							if status == .normal_char {
 | 
				
			||||||
			if ch in bsls_escape_list {
 | 
								if ch in bsls_escape_list {
 | 
				
			||||||
				return no_match_found,i-in_i+1
 | 
									return no_match_found, i-in_i+1
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			return err_syntax_error,i-in_i+1
 | 
								return err_syntax_error, i-in_i+1
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// at the present time we manage only one char after the \
 | 
							// at the present time we manage only one char after the \
 | 
				
			||||||
| 
						 | 
					@ -465,10 +461,10 @@ const(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct CharClass {
 | 
					struct CharClass {
 | 
				
			||||||
mut:
 | 
					mut:
 | 
				
			||||||
	cc_type int = cc_null      // type of cc token
 | 
						cc_type   int = cc_null // type of cc token
 | 
				
			||||||
	ch0 rune       // first char of the interval a-b  a in this case
 | 
						ch0       rune          // first char of the interval a-b  a in this case
 | 
				
			||||||
	ch1 rune	   // second char of the interval a-b b in this case
 | 
						ch1       rune	        // second char of the interval a-b b in this case
 | 
				
			||||||
	validator FnValidator      // validator function pointer
 | 
						validator FnValidator   // validator function pointer
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum CharClass_parse_state {
 | 
					enum CharClass_parse_state {
 | 
				
			||||||
| 
						 | 
					@ -562,7 +558,7 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
 | 
				
			||||||
	mut i := in_i
 | 
						mut i := in_i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut tmp_index := re.cc_index
 | 
						mut tmp_index := re.cc_index
 | 
				
			||||||
	res_index := re.cc_index
 | 
						res_index     := re.cc_index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut cc_type := u32(ist_char_class_pos)
 | 
						mut cc_type := u32(ist_char_class_pos)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -570,7 +566,7 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// check if we are out of memory for char classes
 | 
							// check if we are out of memory for char classes
 | 
				
			||||||
		if tmp_index >= re.cc.len {
 | 
							if tmp_index >= re.cc.len {
 | 
				
			||||||
			return err_cc_alloc_overflow,0,u32(0)
 | 
								return err_cc_alloc_overflow, 0, u32(0)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// get our char
 | 
							// get our char
 | 
				
			||||||
| 
						 | 
					@ -710,7 +706,7 @@ fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int, bool) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// exit on no compatible char with {} quantifier
 | 
							// exit on no compatible char with {} quantifier
 | 
				
			||||||
		if utf8util_char_len(ch) != 1 {
 | 
							if utf8util_char_len(ch) != 1 {
 | 
				
			||||||
			return err_syntax_error,i,0,false
 | 
								return err_syntax_error, i, 0, false
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// min parsing skip if comma present
 | 
							// min parsing skip if comma present
 | 
				
			||||||
| 
						 | 
					@ -913,14 +909,13 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
 | 
					// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
 | 
				
			||||||
[deprecated]
 | 
					[deprecated]
 | 
				
			||||||
pub fn (mut re RE) compile(in_txt string) (int,int) {
 | 
					pub fn (mut re RE) compile(in_txt string) (int, int) {
 | 
				
			||||||
	return re.impl_compile(in_txt)
 | 
						return re.impl_compile(in_txt)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
					fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
				
			||||||
	mut i        := 0      // input string index
 | 
						mut i        := 0      // input string index
 | 
				
			||||||
	mut pc       := 0      // program counter
 | 
						mut pc       := 0      // program counter
 | 
				
			||||||
	mut tmp_code := u32(0)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// group management variables
 | 
						// group management variables
 | 
				
			||||||
	mut group_count           := -1
 | 
						mut group_count           := -1
 | 
				
			||||||
| 
						 | 
					@ -932,7 +927,6 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	i = 0
 | 
						i = 0
 | 
				
			||||||
	for i < in_txt.len {
 | 
						for i < in_txt.len {
 | 
				
			||||||
		tmp_code = u32(0)
 | 
					 | 
				
			||||||
		mut char_tmp := u32(0)
 | 
							mut char_tmp := u32(0)
 | 
				
			||||||
		mut char_len := 0
 | 
							mut char_len := 0
 | 
				
			||||||
		//println("i: ${i:3d} ch: ${in_txt.str[i]:c}")
 | 
							//println("i: ${i:3d} ch: ${in_txt.str[i]:c}")
 | 
				
			||||||
| 
						 | 
					@ -958,20 +952,20 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			//check max groups allowed
 | 
								//check max groups allowed
 | 
				
			||||||
			if group_count > re.group_max {
 | 
								if group_count > re.group_max {
 | 
				
			||||||
				return err_groups_overflow,i+1
 | 
									return err_groups_overflow, i+1
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			group_stack_index++
 | 
								group_stack_index++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			// check max nested groups allowed
 | 
								// check max nested groups allowed
 | 
				
			||||||
			if group_stack_index > re.group_max_nested {
 | 
								if group_stack_index > re.group_max_nested {
 | 
				
			||||||
				return err_groups_max_nested,i+1
 | 
									return err_groups_max_nested, i+1
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			tmp_res, cgroup_flag, cgroup_name, next_i := re.parse_groups(in_txt,i)
 | 
								tmp_res, cgroup_flag, cgroup_name, next_i := re.parse_groups(in_txt,i)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			// manage question mark format error
 | 
								// manage question mark format error
 | 
				
			||||||
			if tmp_res < -1 {
 | 
								if tmp_res < -1 {
 | 
				
			||||||
				return err_group_qm_notation,next_i
 | 
									return err_group_qm_notation, next_i
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			//println("Parse group: [$tmp_res, $cgroup_flag, ($i,$next_i), '${in_txt[i..next_i]}' ]")
 | 
								//println("Parse group: [$tmp_res, $cgroup_flag, ($i,$next_i), '${in_txt[i..next_i]}' ]")
 | 
				
			||||||
| 
						 | 
					@ -988,10 +982,10 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
				
			||||||
			if cgroup_name.len > 0 {
 | 
								if cgroup_name.len > 0 {
 | 
				
			||||||
				//println("GROUP NAME: ${cgroup_name}")
 | 
									//println("GROUP NAME: ${cgroup_name}")
 | 
				
			||||||
				if cgroup_name in re.group_map{
 | 
									if cgroup_name in re.group_map{
 | 
				
			||||||
					group_id = re.group_map[cgroup_name]-1
 | 
										group_id = re.group_map[cgroup_name] - 1
 | 
				
			||||||
					group_count--
 | 
										group_count--
 | 
				
			||||||
				} else {
 | 
									} else {
 | 
				
			||||||
					re.group_map[cgroup_name] = group_id+1
 | 
										re.group_map[cgroup_name] = group_id + 1
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1018,7 +1012,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
				
			||||||
		// ist_group_end
 | 
							// ist_group_end
 | 
				
			||||||
		if char_len==1 && pc > 0 && byte(char_tmp) == `)` {
 | 
							if char_len==1 && pc > 0 && byte(char_tmp) == `)` {
 | 
				
			||||||
			if group_stack_index < 0 {
 | 
								if group_stack_index < 0 {
 | 
				
			||||||
				return err_group_not_balanced,i+1
 | 
									return err_group_not_balanced, i+1
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			goto_pc := group_stack[group_stack_index]
 | 
								goto_pc := group_stack[group_stack_index]
 | 
				
			||||||
| 
						 | 
					@ -1161,7 +1155,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				// if not an escape or a bsls char then it is an error (at least for now!)
 | 
									// if not an escape or a bsls char then it is an error (at least for now!)
 | 
				
			||||||
				else {
 | 
									else {
 | 
				
			||||||
					return bsls_index,i+tmp
 | 
										return bsls_index, i+tmp
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -1192,7 +1186,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// store the number of groups in the query
 | 
						// store the number of groups in the query
 | 
				
			||||||
	re.group_count = group_count+1
 | 
						re.group_count = group_count + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	//******************************************
 | 
						//******************************************
 | 
				
			||||||
	// Post processing
 | 
						// Post processing
 | 
				
			||||||
| 
						 | 
					@ -1482,33 +1476,35 @@ fn state_str(s Match_state) string {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct StateObj {
 | 
					struct StateObj {
 | 
				
			||||||
pub mut:
 | 
					pub mut:
 | 
				
			||||||
	match_flag bool
 | 
						match_flag  bool
 | 
				
			||||||
	match_index int = -1
 | 
						match_index int = -1
 | 
				
			||||||
	match_first int = -1
 | 
						match_first int = -1
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
					pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
				
			||||||
	// result status
 | 
						// result status
 | 
				
			||||||
	mut result := no_match_found     // function return
 | 
						mut result      := no_match_found // function return
 | 
				
			||||||
	mut first_match := -1             //index of the first match
 | 
						mut first_match := -1             //index of the first match
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut i := 0                       // source string index
 | 
						mut i        := 0                 // source string index
 | 
				
			||||||
	mut ch := rune(0)                 // examinated char
 | 
						mut ch       := rune(0)           // examinated char
 | 
				
			||||||
	mut char_len := 0                // utf8 examinated char len
 | 
						mut char_len := 0                 // utf8 examinated char len
 | 
				
			||||||
	mut m_state := Match_state.start // start point for the matcher FSM
 | 
						mut m_state  := Match_state.start // start point for the matcher FSM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut pc := -1                     // program counter
 | 
						mut pc    := -1                   // program counter
 | 
				
			||||||
	mut state := StateObj{}          // actual state
 | 
						mut state := StateObj{}           // actual state
 | 
				
			||||||
	mut ist := rune(0)                // actual instruction
 | 
						mut ist   := rune(0)              // actual instruction
 | 
				
			||||||
	mut l_ist :=rune(0)              // last matched instruction
 | 
						mut l_ist :=rune(0)               // last matched instruction
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut group_stack      := [-1].repeat(re.group_max)
 | 
						//mut group_stack      := [-1].repeat(re.group_max)
 | 
				
			||||||
	mut group_data       := [-1].repeat(re.group_max)
 | 
						//mut group_data       := [-1].repeat(re.group_max)
 | 
				
			||||||
 | 
						mut group_stack := []int{len: re.group_max, init: -1}
 | 
				
			||||||
 | 
						mut group_data  := []int{len: re.group_max, init: -1}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut group_index := -1            // group id used to know how many groups are open
 | 
						mut group_index := -1             // group id used to know how many groups are open
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mut step_count := 0              // stats for debug
 | 
						mut step_count  := 0              // stats for debug
 | 
				
			||||||
	mut dbg_line   := 0              // count debug line printed
 | 
						mut dbg_line    := 0              // count debug line printed
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	re.reset()
 | 
						re.reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1535,7 +1531,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
				
			||||||
		// DEBUG LOG
 | 
							// DEBUG LOG
 | 
				
			||||||
		//******************************************
 | 
							//******************************************
 | 
				
			||||||
		if re.debug>0 {
 | 
							if re.debug>0 {
 | 
				
			||||||
			mut buf2 := strings.new_builder(re.cc.len+128)
 | 
								mut buf2 := strings.new_builder(re.cc.len + 128)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			// print all the instructions
 | 
								// print all the instructions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1658,7 +1654,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
				
			||||||
		// starting and init
 | 
							// starting and init
 | 
				
			||||||
		if m_state == .start {
 | 
							if m_state == .start {
 | 
				
			||||||
			pc = -1
 | 
								pc = -1
 | 
				
			||||||
			i = 0
 | 
								i  = 0
 | 
				
			||||||
			m_state = .ist_next
 | 
								m_state = .ist_next
 | 
				
			||||||
			continue
 | 
								continue
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					@ -1962,7 +1958,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
				
			||||||
		/***********************************
 | 
							/***********************************
 | 
				
			||||||
		* Quantifier management
 | 
							* Quantifier management
 | 
				
			||||||
		***********************************/
 | 
							***********************************/
 | 
				
			||||||
		// ist_quant_ng
 | 
							// ist_quant_ng => quantifier negative test on group
 | 
				
			||||||
		if m_state == .ist_quant_ng {
 | 
							if m_state == .ist_quant_ng {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			// we are finished here
 | 
								// we are finished here
 | 
				
			||||||
| 
						 | 
					@ -2039,7 +2035,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
				
			||||||
			return err_internal_error, i
 | 
								return err_internal_error, i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		// ist_quant_pg
 | 
							// ist_quant_pg => quantifier positive test on group
 | 
				
			||||||
		else if m_state == .ist_quant_pg {
 | 
							else if m_state == .ist_quant_pg {
 | 
				
			||||||
			//println(".ist_quant_pg")
 | 
								//println(".ist_quant_pg")
 | 
				
			||||||
			mut tmp_pc := pc
 | 
								mut tmp_pc := pc
 | 
				
			||||||
| 
						 | 
					@ -2084,7 +2080,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
				
			||||||
			return err_internal_error, i
 | 
								return err_internal_error, i
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// ist_quant_n
 | 
							// ist_quant_n => quantifier negative test on token
 | 
				
			||||||
		else if m_state == .ist_quant_n {
 | 
							else if m_state == .ist_quant_n {
 | 
				
			||||||
			rep := re.prog[pc].rep
 | 
								rep := re.prog[pc].rep
 | 
				
			||||||
			//println("Here!! PC $pc is_next_or: ${re.prog[pc].next_is_or}")
 | 
								//println("Here!! PC $pc is_next_or: ${re.prog[pc].next_is_or}")
 | 
				
			||||||
| 
						 | 
					@ -2125,7 +2121,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
				
			||||||
			//return no_match_found, 0
 | 
								//return no_match_found, 0
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// ist_quant_p
 | 
							// ist_quant_p => quantifier positive test on token
 | 
				
			||||||
		else if m_state == .ist_quant_p {
 | 
							else if m_state == .ist_quant_p {
 | 
				
			||||||
			// exit on first match
 | 
								// exit on first match
 | 
				
			||||||
			if (re.flag & f_efm) != 0 {
 | 
								if (re.flag & f_efm) != 0 {
 | 
				
			||||||
| 
						 | 
					@ -2255,7 +2251,7 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
 | 
				
			||||||
	start, end := re.match_base(in_txt.str, in_txt.len)
 | 
						start, end := re.match_base(in_txt.str, in_txt.len)
 | 
				
			||||||
	re.flag = old_flag
 | 
						re.flag = old_flag
 | 
				
			||||||
	if start >= 0 && end > start {
 | 
						if start >= 0 && end > start {
 | 
				
			||||||
		return start,end
 | 
							return start, end
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return no_match_found, 0
 | 
						return no_match_found, 0
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue