2546 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			V
		
	
	
			
		
		
	
	
			2546 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			V
		
	
	
/*
 | 
						|
regex 1.0 alpha
 | 
						|
 | 
						|
Copyright (c) 2019-2022 Dario Deledda. All rights reserved.
 | 
						|
Use of this source code is governed by an MIT license
 | 
						|
that can be found in the LICENSE file.
 | 
						|
 | 
						|
This file contains regex module
 | 
						|
 | 
						|
Know limitation:
 | 
						|
- find is implemented in a trivial way
 | 
						|
- not full compliant PCRE
 | 
						|
- not compliant POSIX ERE
 | 
						|
*/
 | 
						|
module regex
 | 
						|
 | 
						|
import strings
 | 
						|
 | 
						|
pub const (
 | 
						|
	v_regex_version          = '1.0 alpha' // regex module version
 | 
						|
 | 
						|
	max_code_len             = 256 // default small base code len for the regex programs
 | 
						|
	max_quantifier           = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
 | 
						|
	// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
 | 
						|
	spaces                   = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
 | 
						|
	// new line chars for now only '\n'
 | 
						|
	new_line_list            = [`\n`, `\r`]
 | 
						|
 | 
						|
	// Results
 | 
						|
	no_match_found           = -1
 | 
						|
 | 
						|
	// Errors
 | 
						|
	compile_ok               = 0 // the regex string compiled, all ok
 | 
						|
	err_char_unknown         = -2 // the char used is unknow to the system
 | 
						|
	err_undefined            = -3 // the compiler symbol is undefined
 | 
						|
	err_internal_error       = -4 // Bug in the regex system!!
 | 
						|
	err_cc_alloc_overflow    = -5 // memory for char class full!!
 | 
						|
	err_syntax_error         = -6 // syntax error in regex compiling
 | 
						|
	err_groups_overflow      = -7 // max number of groups reached
 | 
						|
	err_groups_max_nested    = -8 // max number of nested group reached
 | 
						|
	err_group_not_balanced   = -9 // group not balanced
 | 
						|
	err_group_qm_notation    = -10 // group invalid notation
 | 
						|
	err_invalid_or_with_cc   = -11 // invalid or on two consecutive char class
 | 
						|
	err_neg_group_quantifier = -12 // negation groups can not have quantifier
 | 
						|
	err_consecutive_dots     = -13 // two consecutive dots is an error
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	//*************************************
 | 
						|
	// regex program instructions
 | 
						|
	//*************************************
 | 
						|
	ist_simple_char    = u32(0x7FFFFFFF) // single char instruction, 31 bit available to char
 | 
						|
	// char class 11 0100 AA xxxxxxxx
 | 
						|
	// AA = 00  regular class
 | 
						|
	// AA = 01  Negated class ^ char
 | 
						|
	ist_char_class     = 0xD1000000 // MASK
 | 
						|
	ist_char_class_pos = 0xD0000000 // char class normal [abc]
 | 
						|
	ist_char_class_neg = 0xD1000000 // char class negate [^abc]
 | 
						|
	// dot char        10 0110 xx xxxxxxxx
 | 
						|
	ist_dot_char       = 0x98000000 // match any char except \n
 | 
						|
	// backslash chars 10 0100 xx xxxxxxxx
 | 
						|
	ist_bsls_char      = 0x90000000 // backslash char
 | 
						|
	// OR |            10 010Y xx xxxxxxxx
 | 
						|
	ist_or_branch      = 0x91000000 // OR case
 | 
						|
	// groups          10 010Y xx xxxxxxxx
 | 
						|
	ist_group_start    = 0x92000000 // group start (
 | 
						|
	ist_group_end      = 0x94000000 // group end   )
 | 
						|
	// control instructions
 | 
						|
	ist_prog_end       = u32(0x88000000) // 10 0010 xx xxxxxxxx
 | 
						|
		//*************************************
 | 
						|
)
 | 
						|
 | 
						|
/*
 | 
						|
General Utilities
 | 
						|
*/
 | 
						|
// utf8util_char_len calculate the length in bytes of a utf8 char
 | 
						|
[inline]
 | 
						|
fn utf8util_char_len(b byte) int {
 | 
						|
	return ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3) + 1
 | 
						|
}
 | 
						|
 | 
						|
// get_char get a char from position i and return an u32 with the unicode code
 | 
						|
[direct_array_access; inline]
 | 
						|
fn (re RE) get_char(in_txt string, i int) (u32, int) {
 | 
						|
	ini := unsafe { in_txt.str[i] }
 | 
						|
	// ascii 8 bit
 | 
						|
	if (re.flag & regex.f_bin) != 0 || ini & 0x80 == 0 {
 | 
						|
		return u32(ini), 1
 | 
						|
	}
 | 
						|
	// unicode char
 | 
						|
	char_len := utf8util_char_len(ini)
 | 
						|
	mut tmp := 0
 | 
						|
	mut ch := u32(0)
 | 
						|
	for tmp < char_len {
 | 
						|
		ch = (ch << 8) | unsafe { in_txt.str[i + tmp] }
 | 
						|
		tmp++
 | 
						|
	}
 | 
						|
	return ch, char_len
 | 
						|
}
 | 
						|
 | 
						|
// get_charb get a char from position i and return an u32 with the unicode code
 | 
						|
[direct_array_access; inline]
 | 
						|
fn (re RE) get_charb(in_txt &byte, i int) (u32, int) {
 | 
						|
	// ascii 8 bit
 | 
						|
	if (re.flag & regex.f_bin) != 0 || unsafe { in_txt[i] } & 0x80 == 0 {
 | 
						|
		return u32(unsafe { in_txt[i] }), 1
 | 
						|
	}
 | 
						|
	// unicode char
 | 
						|
	char_len := utf8util_char_len(unsafe { in_txt[i] })
 | 
						|
	mut tmp := 0
 | 
						|
	mut ch := u32(0)
 | 
						|
	for tmp < char_len {
 | 
						|
		ch = (ch << 8) | unsafe { in_txt[i + tmp] }
 | 
						|
		tmp++
 | 
						|
	}
 | 
						|
	return ch, char_len
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_alnum(in_char byte) bool {
 | 
						|
	mut tmp := in_char - `A`
 | 
						|
	if tmp <= 25 {
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	tmp = in_char - `a`
 | 
						|
	if tmp <= 25 {
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	tmp = in_char - `0`
 | 
						|
	if tmp <= 9 {
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	if in_char == `_` {
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_not_alnum(in_char byte) bool {
 | 
						|
	return !is_alnum(in_char)
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_space(in_char byte) bool {
 | 
						|
	return in_char in regex.spaces
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_not_space(in_char byte) bool {
 | 
						|
	return !is_space(in_char)
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_digit(in_char byte) bool {
 | 
						|
	tmp := in_char - `0`
 | 
						|
	return tmp <= 0x09
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_not_digit(in_char byte) bool {
 | 
						|
	return !is_digit(in_char)
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
[inline]
 | 
						|
fn is_wordchar(in_char byte) bool {
 | 
						|
	return is_alnum(in_char) || in_char == `_`
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_not_wordchar(in_char byte) bool {
 | 
						|
	return !is_alnum(in_char)
 | 
						|
}
 | 
						|
*/
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_lower(in_char byte) bool {
 | 
						|
	tmp := in_char - `a`
 | 
						|
	return tmp <= 25
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn is_upper(in_char byte) bool {
 | 
						|
	tmp := in_char - `A`
 | 
						|
	return tmp <= 25
 | 
						|
}
 | 
						|
 | 
						|
pub fn (re RE) get_parse_error_string(err int) string {
 | 
						|
	match err {
 | 
						|
		regex.compile_ok { return 'compile_ok' }
 | 
						|
		regex.no_match_found { return 'no_match_found' }
 | 
						|
		regex.err_char_unknown { return 'err_char_unknown' }
 | 
						|
		regex.err_undefined { return 'err_undefined' }
 | 
						|
		regex.err_internal_error { return 'err_internal_error' }
 | 
						|
		regex.err_cc_alloc_overflow { return 'err_cc_alloc_overflow' }
 | 
						|
		regex.err_syntax_error { return 'err_syntax_error' }
 | 
						|
		regex.err_groups_overflow { return 'err_groups_overflow' }
 | 
						|
		regex.err_groups_max_nested { return 'err_groups_max_nested' }
 | 
						|
		regex.err_group_not_balanced { return 'err_group_not_balanced' }
 | 
						|
		regex.err_group_qm_notation { return 'err_group_qm_notation' }
 | 
						|
		regex.err_invalid_or_with_cc { return 'err_invalid_or_with_cc' }
 | 
						|
		regex.err_neg_group_quantifier { return 'err_neg_group_quantifier' }
 | 
						|
		regex.err_consecutive_dots { return 'err_consecutive_dots' }
 | 
						|
		else { return 'err_unknown' }
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// utf8_str convert and utf8 sequence to a printable string
 | 
						|
[inline]
 | 
						|
fn utf8_str(ch rune) string {
 | 
						|
	mut i := 4
 | 
						|
	mut res := ''
 | 
						|
	for i > 0 {
 | 
						|
		v := byte((ch >> ((i - 1) * 8)) & 0xFF)
 | 
						|
		if v != 0 {
 | 
						|
			res += '${v:1c}'
 | 
						|
		}
 | 
						|
		i--
 | 
						|
	}
 | 
						|
	return res
 | 
						|
}
 | 
						|
 | 
						|
// simple_log default log function
 | 
						|
fn simple_log(txt string) {
 | 
						|
	print(txt)
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************
 | 
						|
*
 | 
						|
* Token Structs
 | 
						|
*
 | 
						|
******************************************************************************/
 | 
						|
pub type FnValidator = fn (byte) bool
 | 
						|
 | 
						|
struct Token {
 | 
						|
mut:
 | 
						|
	ist rune
 | 
						|
	// char
 | 
						|
	ch     rune // char of the token if any
 | 
						|
	ch_len byte // char len
 | 
						|
	// Quantifiers / branch
 | 
						|
	rep_min int  // used also for jump next in the OR branch [no match] pc jump
 | 
						|
	rep_max int  // used also for jump next in the OR branch [   match] pc jump
 | 
						|
	greedy  bool // greedy quantifier flag
 | 
						|
	// Char class
 | 
						|
	cc_index int = -1
 | 
						|
	// counters for quantifier check (repetitions)
 | 
						|
	rep int
 | 
						|
	// validator function pointer
 | 
						|
	validator FnValidator
 | 
						|
	// groups variables
 | 
						|
	group_neg bool // negation flag for the group, 0 => no negation > 0 => negataion
 | 
						|
	group_rep int  // repetition of the group
 | 
						|
	group_id  int = -1 // id of the group
 | 
						|
	goto_pc   int = -1 // jump to this PC if is needed
 | 
						|
	// OR flag for the token
 | 
						|
	next_is_or bool // true if the next token is an OR
 | 
						|
	// dot_char token variables
 | 
						|
	dot_check_pc  int = -1 // pc of the next token to check for dots
 | 
						|
	bsls_check_pc int = -1 // pc of the next token to check for bsls
 | 
						|
	last_dot_flag bool // if true indicate that is the last dot_char in the regex
 | 
						|
	// debug fields
 | 
						|
	source_index int
 | 
						|
}
 | 
						|
 | 
						|
[inline]
 | 
						|
fn (mut tok Token) reset() {
 | 
						|
	tok.rep = 0
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************
 | 
						|
*
 | 
						|
* Regex struct
 | 
						|
*
 | 
						|
******************************************************************************/
 | 
						|
pub const (
 | 
						|
	f_nl  = 0x00000001 // end the match when find a new line symbol
 | 
						|
	f_ms  = 0x00000002 // match true only if the match is at the start of the string
 | 
						|
	f_me  = 0x00000004 // match true only if the match is at the end of the string
 | 
						|
 | 
						|
	f_efm = 0x00000100 // exit on first token matched, used by search
 | 
						|
	f_bin = 0x00000200 // work only on bytes, ignore utf-8
 | 
						|
	// behaviour modifier flags
 | 
						|
	f_src = 0x00020000 // search mode enabled
 | 
						|
)
 | 
						|
 | 
						|
// Log function prototype
 | 
						|
pub type FnLog = fn (string)
 | 
						|
 | 
						|
pub struct RE {
 | 
						|
pub mut:
 | 
						|
	prog     []Token
 | 
						|
	prog_len int // regex program len
 | 
						|
	// char classes storage
 | 
						|
	cc       []CharClass // char class list
 | 
						|
	cc_index int // index
 | 
						|
	// groups
 | 
						|
	group_count      int   // number of groups in this regex struct
 | 
						|
	groups           []int // groups index results
 | 
						|
	group_max_nested int = 3 // max nested group
 | 
						|
	group_max        int = 8 // max allowed number of different groups
 | 
						|
 | 
						|
	state_list []StateObj
 | 
						|
 | 
						|
	group_csave_flag bool  // flag to enable continuous saving
 | 
						|
	group_csave      []int //= []int{}  // groups continuous save list
 | 
						|
 | 
						|
	group_map map[string]int // groups names map
 | 
						|
 | 
						|
	group_stack []int
 | 
						|
	group_data  []int
 | 
						|
	// flags
 | 
						|
	flag int // flag for optional parameters
 | 
						|
	// Debug/log
 | 
						|
	debug    int    // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
 | 
						|
	log_func FnLog = simple_log // log function, can be customized by the user
 | 
						|
	query    string // query string
 | 
						|
}
 | 
						|
 | 
						|
// Reset RE object
 | 
						|
[direct_array_access; inline]
 | 
						|
fn (mut re RE) reset() {
 | 
						|
	re.cc_index = 0
 | 
						|
 | 
						|
	mut i := 0
 | 
						|
	for i < re.prog_len {
 | 
						|
		re.prog[i].group_rep = 0 // clear repetition of the group
 | 
						|
		re.prog[i].rep = 0 // clear repetition of the token
 | 
						|
		i++
 | 
						|
	}
 | 
						|
 | 
						|
	// init groups array
 | 
						|
	if re.group_count > 0 {
 | 
						|
		if re.groups.len == 0 {
 | 
						|
			// first run alloc memory
 | 
						|
			re.groups = []int{len: re.group_count * 2, init: -1}
 | 
						|
		} else {
 | 
						|
			// subsequent executions, only clean up the memory
 | 
						|
			i = 0
 | 
						|
			for i < re.groups.len {
 | 
						|
				re.groups[i] = -1
 | 
						|
				i++
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// reset group_csave
 | 
						|
	if re.group_csave_flag == true {
 | 
						|
		re.group_csave.clear() // = []int{}
 | 
						|
	}
 | 
						|
 | 
						|
	// reset state list
 | 
						|
	re.state_list.clear()
 | 
						|
	re.group_stack.clear()
 | 
						|
}
 | 
						|
 | 
						|
// reset for search mode fail
 | 
						|
// gcc bug, dont use [inline] or go 5 time slower
 | 
						|
//[inline]
 | 
						|
[direct_array_access]
 | 
						|
fn (mut re RE) reset_src() {
 | 
						|
	mut i := 0
 | 
						|
	for i < re.prog_len {
 | 
						|
		re.prog[i].group_rep = 0 // clear repetition of the group
 | 
						|
		re.prog[i].rep = 0 // clear repetition of the token
 | 
						|
		i++
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************
 | 
						|
*
 | 
						|
* Backslashes chars
 | 
						|
*
 | 
						|
******************************************************************************/
 | 
						|
struct BslsStruct {
 | 
						|
	ch        rune        // meta char
 | 
						|
	validator FnValidator // validator function pointer
 | 
						|
}
 | 
						|
 | 
						|
const (
 | 
						|
	bsls_validator_array = [
 | 
						|
		BslsStruct{`w`, is_alnum},
 | 
						|
		BslsStruct{`W`, is_not_alnum},
 | 
						|
		BslsStruct{`s`, is_space},
 | 
						|
		BslsStruct{`S`, is_not_space},
 | 
						|
		BslsStruct{`d`, is_digit},
 | 
						|
		BslsStruct{`D`, is_not_digit},
 | 
						|
		BslsStruct{`a`, is_lower},
 | 
						|
		BslsStruct{`A`, is_upper},
 | 
						|
	]
 | 
						|
 | 
						|
	// these chars are escape if preceded by a \
 | 
						|
	bsls_escape_list     = [`\\`, `|`, `.`, `:`, `*`, `+`, `-`, `{`, `}`, `[`, `]`, `(`, `)`, `?`,
 | 
						|
		`^`, `!`]
 | 
						|
)
 | 
						|
 | 
						|
enum BSLS_parse_state {
 | 
						|
	start
 | 
						|
	bsls_found
 | 
						|
	bsls_char
 | 
						|
	normal_char
 | 
						|
}
 | 
						|
 | 
						|
// parse_bsls return (index, str_len) bsls_validator_array index, len of the backslash sequence if present
 | 
						|
fn (re RE) parse_bsls(in_txt string, in_i int) (int, int) {
 | 
						|
	mut status := BSLS_parse_state.start
 | 
						|
	mut i := in_i
 | 
						|
 | 
						|
	for i < in_txt.len {
 | 
						|
		// get our char
 | 
						|
		char_tmp, char_len := re.get_char(in_txt, i)
 | 
						|
		ch := byte(char_tmp)
 | 
						|
 | 
						|
		if status == .start && ch == `\\` {
 | 
						|
			status = .bsls_found
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// check if is our bsls char, for now only one length sequence
 | 
						|
		if status == .bsls_found {
 | 
						|
			for c, x in regex.bsls_validator_array {
 | 
						|
				if x.ch == ch {
 | 
						|
					return c, i - in_i + 1
 | 
						|
				}
 | 
						|
			}
 | 
						|
			status = .normal_char
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// no BSLS validator, manage as normal escape char char
 | 
						|
		if status == .normal_char {
 | 
						|
			if ch in regex.bsls_escape_list {
 | 
						|
				return regex.no_match_found, i - in_i + 1
 | 
						|
			}
 | 
						|
			return regex.err_syntax_error, i - in_i + 1
 | 
						|
		}
 | 
						|
 | 
						|
		// at the present time we manage only one char after the \
 | 
						|
		break
 | 
						|
	}
 | 
						|
	// not our bsls return KO
 | 
						|
	return regex.err_syntax_error, i
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************
 | 
						|
*
 | 
						|
* Char class
 | 
						|
*
 | 
						|
******************************************************************************/
 | 
						|
const (
 | 
						|
	cc_null = 0 // empty cc token
 | 
						|
	cc_char = 1 // simple char: a
 | 
						|
	cc_int  = 2 // char interval: a-z
 | 
						|
	cc_bsls = 3 // backslash char
 | 
						|
	cc_end  = 4 // cc sequence terminator
 | 
						|
)
 | 
						|
 | 
						|
struct CharClass {
 | 
						|
mut:
 | 
						|
	cc_type   int = regex.cc_null // type of cc token
 | 
						|
	ch0       rune        // first char of the interval a-b  a in this case
 | 
						|
	ch1       rune        // second char of the interval a-b b in this case
 | 
						|
	validator FnValidator // validator function pointer
 | 
						|
}
 | 
						|
 | 
						|
enum CharClass_parse_state {
 | 
						|
	start
 | 
						|
	in_char
 | 
						|
	in_bsls
 | 
						|
	separator
 | 
						|
	finish
 | 
						|
}
 | 
						|
 | 
						|
fn (re RE) get_char_class(pc int) string {
 | 
						|
	buf := []byte{len: (re.cc.len)}
 | 
						|
	mut buf_ptr := unsafe { &byte(&buf) }
 | 
						|
 | 
						|
	mut cc_i := re.prog[pc].cc_index
 | 
						|
	mut i := 0
 | 
						|
	mut tmp := 0
 | 
						|
	for cc_i >= 0 && cc_i < re.cc.len && re.cc[cc_i].cc_type != regex.cc_end {
 | 
						|
		if re.cc[cc_i].cc_type == regex.cc_bsls {
 | 
						|
			unsafe {
 | 
						|
				buf_ptr[i] = `\\`
 | 
						|
				i++
 | 
						|
				buf_ptr[i] = byte(re.cc[cc_i].ch0)
 | 
						|
				i++
 | 
						|
			}
 | 
						|
		} else if re.cc[cc_i].ch0 == re.cc[cc_i].ch1 {
 | 
						|
			tmp = 3
 | 
						|
			for tmp >= 0 {
 | 
						|
				x := byte((re.cc[cc_i].ch0 >> (tmp * 8)) & 0xFF)
 | 
						|
				if x != 0 {
 | 
						|
					unsafe {
 | 
						|
						buf_ptr[i] = x
 | 
						|
						i++
 | 
						|
					}
 | 
						|
				}
 | 
						|
				tmp--
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			tmp = 3
 | 
						|
			for tmp >= 0 {
 | 
						|
				x := byte((re.cc[cc_i].ch0 >> (tmp * 8)) & 0xFF)
 | 
						|
				if x != 0 {
 | 
						|
					unsafe {
 | 
						|
						buf_ptr[i] = x
 | 
						|
						i++
 | 
						|
					}
 | 
						|
				}
 | 
						|
				tmp--
 | 
						|
			}
 | 
						|
			unsafe {
 | 
						|
				buf_ptr[i] = `-`
 | 
						|
				i++
 | 
						|
			}
 | 
						|
			tmp = 3
 | 
						|
			for tmp >= 0 {
 | 
						|
				x := byte((re.cc[cc_i].ch1 >> (tmp * 8)) & 0xFF)
 | 
						|
				if x != 0 {
 | 
						|
					unsafe {
 | 
						|
						buf_ptr[i] = x
 | 
						|
						i++
 | 
						|
					}
 | 
						|
				}
 | 
						|
				tmp--
 | 
						|
			}
 | 
						|
		}
 | 
						|
		cc_i++
 | 
						|
	}
 | 
						|
	unsafe {
 | 
						|
		buf_ptr[i] = byte(0)
 | 
						|
	}
 | 
						|
	return unsafe { tos_clone(buf_ptr) }
 | 
						|
}
 | 
						|
 | 
						|
fn (re RE) check_char_class(pc int, ch rune) bool {
 | 
						|
	mut cc_i := re.prog[pc].cc_index
 | 
						|
	for cc_i >= 0 && cc_i < re.cc.len && re.cc[cc_i].cc_type != regex.cc_end {
 | 
						|
		if re.cc[cc_i].cc_type == regex.cc_bsls {
 | 
						|
			if re.cc[cc_i].validator(byte(ch)) {
 | 
						|
				return true
 | 
						|
			}
 | 
						|
		} else if ch >= re.cc[cc_i].ch0 && ch <= re.cc[cc_i].ch1 {
 | 
						|
			return true
 | 
						|
		}
 | 
						|
		cc_i++
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// parse_char_class return (index, str_len, cc_type) of a char class [abcm-p], char class start after the [ char
 | 
						|
fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
 | 
						|
	mut status := CharClass_parse_state.start
 | 
						|
	mut i := in_i
 | 
						|
 | 
						|
	mut tmp_index := re.cc_index
 | 
						|
	res_index := re.cc_index
 | 
						|
 | 
						|
	mut cc_type := u32(regex.ist_char_class_pos)
 | 
						|
 | 
						|
	for i < in_txt.len {
 | 
						|
		// check if we are out of memory for char classes
 | 
						|
		if tmp_index >= re.cc.len {
 | 
						|
			return regex.err_cc_alloc_overflow, 0, u32(0)
 | 
						|
		}
 | 
						|
 | 
						|
		// get our char
 | 
						|
		char_tmp, char_len := re.get_char(in_txt, i)
 | 
						|
		ch := byte(char_tmp)
 | 
						|
 | 
						|
		// println("CC #${i:3d} ch: ${ch:c}")
 | 
						|
 | 
						|
		// negation
 | 
						|
		if status == .start && ch == `^` {
 | 
						|
			cc_type = u32(regex.ist_char_class_neg)
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// minus symbol
 | 
						|
		if status == .start && ch == `-` {
 | 
						|
			re.cc[tmp_index].cc_type = regex.cc_char
 | 
						|
			re.cc[tmp_index].ch0 = char_tmp
 | 
						|
			re.cc[tmp_index].ch1 = char_tmp
 | 
						|
			i += char_len
 | 
						|
			tmp_index++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// bsls
 | 
						|
		if (status == .start || status == .in_char) && ch == `\\` {
 | 
						|
			// println("CC bsls.")
 | 
						|
			status = .in_bsls
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if status == .in_bsls {
 | 
						|
			// println("CC bsls validation.")
 | 
						|
			for c, x in regex.bsls_validator_array {
 | 
						|
				if x.ch == ch {
 | 
						|
					// println("CC bsls found [${ch:c}]")
 | 
						|
					re.cc[tmp_index].cc_type = regex.cc_bsls
 | 
						|
					re.cc[tmp_index].ch0 = regex.bsls_validator_array[c].ch
 | 
						|
					re.cc[tmp_index].ch1 = regex.bsls_validator_array[c].ch
 | 
						|
					re.cc[tmp_index].validator = regex.bsls_validator_array[c].validator
 | 
						|
					i += char_len
 | 
						|
					tmp_index++
 | 
						|
					status = .in_char
 | 
						|
					break
 | 
						|
				}
 | 
						|
			}
 | 
						|
			if status == .in_bsls {
 | 
						|
				// manage as a simple char
 | 
						|
				// println("CC bsls not found [${ch:c}]")
 | 
						|
				re.cc[tmp_index].cc_type = regex.cc_char
 | 
						|
				re.cc[tmp_index].ch0 = char_tmp
 | 
						|
				re.cc[tmp_index].ch1 = char_tmp
 | 
						|
				i += char_len
 | 
						|
				tmp_index++
 | 
						|
				status = .in_char
 | 
						|
				continue
 | 
						|
			} else {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// simple char
 | 
						|
		if (status == .start || status == .in_char) && ch != `-` && ch != `]` {
 | 
						|
			status = .in_char
 | 
						|
 | 
						|
			re.cc[tmp_index].cc_type = regex.cc_char
 | 
						|
			re.cc[tmp_index].ch0 = char_tmp
 | 
						|
			re.cc[tmp_index].ch1 = char_tmp
 | 
						|
 | 
						|
			i += char_len
 | 
						|
			tmp_index++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// check range separator
 | 
						|
		if status == .in_char && ch == `-` {
 | 
						|
			status = .separator
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// check range end
 | 
						|
		if status == .separator && ch != `]` && ch != `-` {
 | 
						|
			status = .in_char
 | 
						|
			re.cc[tmp_index - 1].cc_type = regex.cc_int
 | 
						|
			re.cc[tmp_index - 1].ch1 = char_tmp
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// char class end
 | 
						|
		if status == .in_char && ch == `]` {
 | 
						|
			re.cc[tmp_index].cc_type = regex.cc_end
 | 
						|
			re.cc[tmp_index].ch0 = 0
 | 
						|
			re.cc[tmp_index].ch1 = 0
 | 
						|
			re.cc_index = tmp_index + 1
 | 
						|
 | 
						|
			return res_index, i - in_i + 2, cc_type
 | 
						|
		}
 | 
						|
 | 
						|
		i++
 | 
						|
	}
 | 
						|
	return regex.err_syntax_error, 0, u32(0)
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************
 | 
						|
*
 | 
						|
* Re Compiler
 | 
						|
*
 | 
						|
******************************************************************************/
 | 
						|
//
 | 
						|
// Quantifier
 | 
						|
//
 | 
						|
enum Quant_parse_state {
 | 
						|
	start
 | 
						|
	min_parse
 | 
						|
	comma_checked
 | 
						|
	max_parse
 | 
						|
	greedy
 | 
						|
	gredy_parse
 | 
						|
	finish
 | 
						|
}
 | 
						|
 | 
						|
// parse_quantifier return (min, max, str_len, greedy_flag) of a {min,max}? quantifier starting after the { char
 | 
						|
fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int, bool) {
 | 
						|
	mut status := Quant_parse_state.start
 | 
						|
	mut i := in_i
 | 
						|
 | 
						|
	mut q_min := 0 // default min in a {} quantifier is 1
 | 
						|
	mut q_max := 0 // deafult max in a {} quantifier is max_quantifier
 | 
						|
 | 
						|
	mut ch := byte(0)
 | 
						|
 | 
						|
	for i < in_txt.len {
 | 
						|
		unsafe {
 | 
						|
			ch = in_txt.str[i]
 | 
						|
		}
 | 
						|
		// println("${ch:c} status: $status")
 | 
						|
 | 
						|
		// exit on no compatible char with {} quantifier
 | 
						|
		if utf8util_char_len(ch) != 1 {
 | 
						|
			return regex.err_syntax_error, i, 0, false
 | 
						|
		}
 | 
						|
 | 
						|
		// min parsing skip if comma present
 | 
						|
		if status == .start && ch == `,` {
 | 
						|
			q_min = 0 // default min in a {} quantifier is 0
 | 
						|
			status = .comma_checked
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if status == .start && is_digit(ch) {
 | 
						|
			status = .min_parse
 | 
						|
			q_min *= 10
 | 
						|
			q_min += int(ch - `0`)
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if status == .min_parse && is_digit(ch) {
 | 
						|
			q_min *= 10
 | 
						|
			q_min += int(ch - `0`)
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// we have parsed the min, now check the max
 | 
						|
		if status == .min_parse && ch == `,` {
 | 
						|
			status = .comma_checked
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// single value {4}
 | 
						|
		if status == .min_parse && ch == `}` {
 | 
						|
			q_max = q_min
 | 
						|
			status = .greedy
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// end without max
 | 
						|
		if status == .comma_checked && ch == `}` {
 | 
						|
			q_max = regex.max_quantifier
 | 
						|
			status = .greedy
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// start max parsing
 | 
						|
		if status == .comma_checked && is_digit(ch) {
 | 
						|
			status = .max_parse
 | 
						|
			q_max *= 10
 | 
						|
			q_max += int(ch - `0`)
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// parse the max
 | 
						|
		if status == .max_parse && is_digit(ch) {
 | 
						|
			q_max *= 10
 | 
						|
			q_max += int(ch - `0`)
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// finished the quantifier
 | 
						|
		if status == .max_parse && ch == `}` {
 | 
						|
			status = .greedy
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// check if greedy flag char ? is present
 | 
						|
		if status == .greedy {
 | 
						|
			if i + 1 < in_txt.len {
 | 
						|
				i++
 | 
						|
				status = .gredy_parse
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			return q_min, q_max, i - in_i + 2, false
 | 
						|
		}
 | 
						|
 | 
						|
		// check the greedy flag
 | 
						|
		if status == .gredy_parse {
 | 
						|
			if ch == `?` {
 | 
						|
				return q_min, q_max, i - in_i + 2, true
 | 
						|
			} else {
 | 
						|
				i--
 | 
						|
				return q_min, q_max, i - in_i + 2, false
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// not  a {} quantifier, exit
 | 
						|
		return regex.err_syntax_error, i, 0, false
 | 
						|
	}
 | 
						|
 | 
						|
	// not a conform {} quantifier
 | 
						|
	return regex.err_syntax_error, i, 0, false
 | 
						|
}
 | 
						|
 | 
						|
//
 | 
						|
// Groups
 | 
						|
//
 | 
						|
enum Group_parse_state {
 | 
						|
	start
 | 
						|
	q_mark // (?
 | 
						|
	q_mark1 // (?:|P  checking
 | 
						|
	p_status // (?P
 | 
						|
	p_start // (?P<
 | 
						|
	p_end // (?P<...>
 | 
						|
	p_in_name // (?P<...
 | 
						|
	finish
 | 
						|
}
 | 
						|
 | 
						|
// parse_groups parse a group for ? (question mark) syntax, if found, return (error, capture_flag, negate_flag, name_of_the_group, next_index)
 | 
						|
fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, bool, string, int) {
 | 
						|
	mut status := Group_parse_state.start
 | 
						|
	mut i := in_i
 | 
						|
	mut name := ''
 | 
						|
 | 
						|
	for i < in_txt.len && status != .finish {
 | 
						|
		// get our char
 | 
						|
		char_tmp, char_len := re.get_char(in_txt, i)
 | 
						|
		ch := byte(char_tmp)
 | 
						|
 | 
						|
		// start
 | 
						|
		if status == .start && ch == `(` {
 | 
						|
			status = .q_mark
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// check for question marks
 | 
						|
		if status == .q_mark && ch == `?` {
 | 
						|
			status = .q_mark1
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// negate group
 | 
						|
		if status == .q_mark1 && ch == `!` {
 | 
						|
			i += char_len
 | 
						|
			return 0, false, true, name, i
 | 
						|
		}
 | 
						|
 | 
						|
		// non capturing group
 | 
						|
		if status == .q_mark1 && ch == `:` {
 | 
						|
			i += char_len
 | 
						|
			return 0, false, false, name, i
 | 
						|
		}
 | 
						|
 | 
						|
		// enter in P section
 | 
						|
		if status == .q_mark1 && ch == `P` {
 | 
						|
			status = .p_status
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// not a valid q mark found
 | 
						|
		if status == .q_mark1 {
 | 
						|
			// println("NO VALID Q MARK")
 | 
						|
			return -2, true, false, name, i
 | 
						|
		}
 | 
						|
 | 
						|
		if status == .p_status && ch == `<` {
 | 
						|
			status = .p_start
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if status == .p_start && ch != `>` {
 | 
						|
			status = .p_in_name
 | 
						|
			name += '${ch:1c}' // TODO: manage utf8 chars
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// colect name
 | 
						|
		if status == .p_in_name && ch != `>` && is_alnum(ch) {
 | 
						|
			name += '${ch:1c}' // TODO: manage utf8 chars
 | 
						|
			i += char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// end name
 | 
						|
		if status == .p_in_name && ch == `>` {
 | 
						|
			i += char_len
 | 
						|
			return 0, true, false, name, i
 | 
						|
		}
 | 
						|
 | 
						|
		// error on name group
 | 
						|
		if status == .p_in_name {
 | 
						|
			return -2, true, false, name, i
 | 
						|
		}
 | 
						|
 | 
						|
		// normal group, nothig to do, exit
 | 
						|
		return 0, true, false, name, i
 | 
						|
	}
 | 
						|
	// UNREACHABLE
 | 
						|
	// println("ERROR!! NOT MEANT TO BE HERE!!1")
 | 
						|
	return -2, true, false, name, i
 | 
						|
}
 | 
						|
 | 
						|
const (
 | 
						|
	quntifier_chars = [rune(`+`), `*`, `?`, `{`]
 | 
						|
)
 | 
						|
 | 
						|
//
 | 
						|
// main compiler
 | 
						|
//
 | 
						|
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
 | 
						|
fn (mut re RE) impl_compile(in_txt string) (int, int) {
 | 
						|
	mut i := 0 // input string index
 | 
						|
	mut pc := 0 // program counter
 | 
						|
 | 
						|
	// group management variables
 | 
						|
	mut group_count := -1
 | 
						|
	mut group_stack := []int{len: re.group_max_nested, init: 0}
 | 
						|
	mut group_stack_txt_index := []int{len: re.group_max_nested, init: -1}
 | 
						|
	mut group_stack_index := -1
 | 
						|
 | 
						|
	re.query = in_txt // save the query string
 | 
						|
 | 
						|
	i = 0
 | 
						|
	for i < in_txt.len {
 | 
						|
		mut char_tmp := u32(0)
 | 
						|
		mut char_len := 0
 | 
						|
		// println("i: ${i:3d} ch: ${in_txt.str[i]:c}")
 | 
						|
 | 
						|
		char_tmp, char_len = re.get_char(in_txt, i)
 | 
						|
 | 
						|
		//
 | 
						|
		// check special cases: $ ^
 | 
						|
		//
 | 
						|
		if char_len == 1 && i == 0 && byte(char_tmp) == `^` {
 | 
						|
			re.flag = regex.f_ms
 | 
						|
			i = i + char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if char_len == 1 && i == (in_txt.len - 1) && byte(char_tmp) == `$` {
 | 
						|
			re.flag = regex.f_me
 | 
						|
			i = i + char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// ist_group_start
 | 
						|
		if char_len == 1 && pc >= 0 && byte(char_tmp) == `(` {
 | 
						|
			// check max groups allowed
 | 
						|
			if group_count > re.group_max {
 | 
						|
				return regex.err_groups_overflow, i + 1
 | 
						|
			}
 | 
						|
			group_stack_index++
 | 
						|
 | 
						|
			// check max nested groups allowed
 | 
						|
			if group_stack_index > re.group_max_nested {
 | 
						|
				return regex.err_groups_max_nested, i + 1
 | 
						|
			}
 | 
						|
 | 
						|
			tmp_res, cgroup_flag, negate_flag, cgroup_name, next_i := re.parse_groups(in_txt,
 | 
						|
				i)
 | 
						|
 | 
						|
			// manage question mark format error
 | 
						|
			if tmp_res < -1 {
 | 
						|
				return regex.err_group_qm_notation, next_i
 | 
						|
			}
 | 
						|
 | 
						|
			// println("Parse group: [$tmp_res, $cgroup_flag, ($i,$next_i), '${in_txt[i..next_i]}' ]")
 | 
						|
			i = next_i
 | 
						|
 | 
						|
			if cgroup_flag == true {
 | 
						|
				group_count++
 | 
						|
			}
 | 
						|
 | 
						|
			// calculate the group id
 | 
						|
			// if it is a named group, recycle the group id
 | 
						|
			// NOTE: **** the group index is +1 because map return 0 when not found!! ****
 | 
						|
			mut group_id := group_count
 | 
						|
			if cgroup_name.len > 0 {
 | 
						|
				// println("GROUP NAME: ${cgroup_name}")
 | 
						|
				if cgroup_name in re.group_map {
 | 
						|
					group_id = re.group_map[cgroup_name] - 1
 | 
						|
					group_count--
 | 
						|
				} else {
 | 
						|
					re.group_map[cgroup_name] = group_id + 1
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			group_stack_txt_index[group_stack_index] = i
 | 
						|
			group_stack[group_stack_index] = pc
 | 
						|
 | 
						|
			re.prog[pc].ist = u32(0) | regex.ist_group_start
 | 
						|
			re.prog[pc].rep_min = 1
 | 
						|
			re.prog[pc].rep_max = 1
 | 
						|
 | 
						|
			// manage negation groups
 | 
						|
			if negate_flag == true {
 | 
						|
				re.prog[pc].group_neg = true
 | 
						|
				re.prog[pc].rep_min = 0 // may be not catched, but it is ok
 | 
						|
			}
 | 
						|
 | 
						|
			// set the group id
 | 
						|
			if cgroup_flag == false {
 | 
						|
				// println("NO CAPTURE GROUP")
 | 
						|
				re.prog[pc].group_id = -1
 | 
						|
			} else {
 | 
						|
				re.prog[pc].group_id = group_id
 | 
						|
			}
 | 
						|
 | 
						|
			pc = pc + 1
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// ist_group_end
 | 
						|
		if char_len == 1 && pc > 0 && byte(char_tmp) == `)` {
 | 
						|
			if group_stack_index < 0 {
 | 
						|
				return regex.err_group_not_balanced, i + 1
 | 
						|
			}
 | 
						|
 | 
						|
			goto_pc := group_stack[group_stack_index]
 | 
						|
			group_stack_index--
 | 
						|
 | 
						|
			re.prog[pc].ist = u32(0) | regex.ist_group_end
 | 
						|
			re.prog[pc].rep_min = 1
 | 
						|
			re.prog[pc].rep_max = 1
 | 
						|
 | 
						|
			re.prog[pc].goto_pc = goto_pc // PC where to jump if a group need
 | 
						|
			re.prog[pc].group_id = re.prog[goto_pc].group_id // id of this group, used for storing data
 | 
						|
 | 
						|
			re.prog[goto_pc].goto_pc = pc // start goto point to the end group pc
 | 
						|
			// re.prog[goto_pc].group_id = group_count         // id of this group, used for storing data
 | 
						|
 | 
						|
			// duplicate the negation group info and settings
 | 
						|
			if re.prog[goto_pc].group_neg == true {
 | 
						|
				re.prog[pc].group_neg = re.prog[goto_pc].group_neg
 | 
						|
				re.prog[pc].rep_min = re.prog[goto_pc].rep_min
 | 
						|
			}
 | 
						|
 | 
						|
			pc = pc + 1
 | 
						|
			i = i + char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// ist_dot_char match any char except the following token
 | 
						|
		if char_len == 1 && pc >= 0 && byte(char_tmp) == `.` {
 | 
						|
			// consecutive ist_dot_char is a syntax error
 | 
						|
			if pc > 0 && re.prog[pc - 1].ist == regex.ist_dot_char {
 | 
						|
				return regex.err_consecutive_dots, i
 | 
						|
			}
 | 
						|
 | 
						|
			re.prog[pc].ist = u32(0) | regex.ist_dot_char
 | 
						|
			re.prog[pc].rep_min = 1
 | 
						|
			re.prog[pc].rep_max = 1
 | 
						|
			pc = pc + 1
 | 
						|
			i = i + char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// OR branch
 | 
						|
		if char_len == 1 && pc > 0 && byte(char_tmp) == `|` {
 | 
						|
			if pc > 0 && re.prog[pc - 1].ist == regex.ist_or_branch {
 | 
						|
				return regex.err_syntax_error, i
 | 
						|
			}
 | 
						|
			re.prog[pc].ist = u32(0) | regex.ist_or_branch
 | 
						|
			re.prog[pc].source_index = i
 | 
						|
			pc = pc + 1
 | 
						|
			i = i + char_len
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// Quantifiers
 | 
						|
		if char_len == 1 && pc > 0 {
 | 
						|
			mut char_next := rune(0)
 | 
						|
			mut char_next_len := 0
 | 
						|
			if (char_len + i) < in_txt.len {
 | 
						|
				char_next, char_next_len = re.get_char(in_txt, i + char_len)
 | 
						|
			}
 | 
						|
			mut quant_flag := true
 | 
						|
 | 
						|
			// negation groups can not have quantifiers
 | 
						|
			if re.prog[pc - 1].group_neg == true && char_tmp in [`?`, `+`, `*`, `{`] {
 | 
						|
				return regex.err_neg_group_quantifier, i
 | 
						|
			}
 | 
						|
 | 
						|
			match byte(char_tmp) {
 | 
						|
				`?` {
 | 
						|
					// println("q: ${char_tmp:c}")
 | 
						|
					// check illegal quantifier sequences
 | 
						|
					if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
						|
						return regex.err_syntax_error, i
 | 
						|
					}
 | 
						|
					re.prog[pc - 1].rep_min = 0
 | 
						|
					re.prog[pc - 1].rep_max = 1
 | 
						|
				}
 | 
						|
				`+` {
 | 
						|
					// println("q: ${char_tmp:c}")
 | 
						|
					// check illegal quantifier sequences
 | 
						|
					if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
						|
						return regex.err_syntax_error, i
 | 
						|
					}
 | 
						|
					re.prog[pc - 1].rep_min = 1
 | 
						|
					re.prog[pc - 1].rep_max = regex.max_quantifier
 | 
						|
				}
 | 
						|
				`*` {
 | 
						|
					// println("q: ${char_tmp:c}")
 | 
						|
					// check illegal quantifier sequences
 | 
						|
					if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
						|
						return regex.err_syntax_error, i
 | 
						|
					}
 | 
						|
					re.prog[pc - 1].rep_min = 0
 | 
						|
					re.prog[pc - 1].rep_max = regex.max_quantifier
 | 
						|
				}
 | 
						|
				`{` {
 | 
						|
					min, max, tmp, greedy := re.parse_quantifier(in_txt, i + 1)
 | 
						|
					// it is a quantifier
 | 
						|
					if min >= 0 {
 | 
						|
						// println("{$min,$max}\n str:[${in_txt[i..i+tmp]}] greedy:$greedy")
 | 
						|
						i = i + tmp
 | 
						|
						re.prog[pc - 1].rep_min = min
 | 
						|
						re.prog[pc - 1].rep_max = max
 | 
						|
						re.prog[pc - 1].greedy = greedy
 | 
						|
						// check illegal quantifier sequences
 | 
						|
						if i <= in_txt.len {
 | 
						|
							char_next, char_next_len = re.get_char(in_txt, i)
 | 
						|
							if char_next_len == 1 && char_next in regex.quntifier_chars {
 | 
						|
								return regex.err_syntax_error, i
 | 
						|
							}
 | 
						|
						}
 | 
						|
						continue
 | 
						|
					} else {
 | 
						|
						return min, i
 | 
						|
					}
 | 
						|
 | 
						|
					// TODO: decide if the open bracket can be conform without the close bracket
 | 
						|
					/*
 | 
						|
					// no conform, parse as normal char
 | 
						|
					else {
 | 
						|
						quant_flag = false
 | 
						|
					}
 | 
						|
					*/
 | 
						|
				}
 | 
						|
				else {
 | 
						|
					quant_flag = false
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			if quant_flag {
 | 
						|
				i = i + char_len
 | 
						|
				continue
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// IST_CHAR_CLASS_*
 | 
						|
		if char_len == 1 && pc >= 0 {
 | 
						|
			if byte(char_tmp) == `[` {
 | 
						|
				cc_index, tmp, cc_type := re.parse_char_class(in_txt, i + 1)
 | 
						|
				if cc_index >= 0 {
 | 
						|
					// println("index: $cc_index str:${in_txt[i..i+tmp]}")
 | 
						|
					i = i + tmp
 | 
						|
					re.prog[pc].ist = u32(0) | cc_type
 | 
						|
					re.prog[pc].cc_index = cc_index
 | 
						|
					re.prog[pc].rep_min = 1
 | 
						|
					re.prog[pc].rep_max = 1
 | 
						|
					pc = pc + 1
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				// cc_class vector memory full
 | 
						|
				else if cc_index < 0 {
 | 
						|
					return cc_index, i
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// ist_bsls_char
 | 
						|
		if char_len == 1 && pc >= 0 {
 | 
						|
			if byte(char_tmp) == `\\` {
 | 
						|
				bsls_index, tmp := re.parse_bsls(in_txt, i)
 | 
						|
				// println("index: $bsls_index str:${in_txt[i..i+tmp]}")
 | 
						|
				if bsls_index >= 0 {
 | 
						|
					i = i + tmp
 | 
						|
					re.prog[pc].ist = u32(0) | regex.ist_bsls_char
 | 
						|
					re.prog[pc].rep_min = 1
 | 
						|
					re.prog[pc].rep_max = 1
 | 
						|
					re.prog[pc].validator = regex.bsls_validator_array[bsls_index].validator
 | 
						|
					re.prog[pc].ch = regex.bsls_validator_array[bsls_index].ch
 | 
						|
					pc = pc + 1
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				// this is an escape char, skip the bsls and continue as a normal char
 | 
						|
				else if bsls_index == regex.no_match_found {
 | 
						|
					i += char_len
 | 
						|
					char_tmp, char_len = re.get_char(in_txt, i)
 | 
						|
					// continue as simple char
 | 
						|
				}
 | 
						|
				// if not an escape or a bsls char then it is an error (at least for now!)
 | 
						|
				else {
 | 
						|
					return bsls_index, i + tmp
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// ist_simple_char
 | 
						|
		re.prog[pc].ist = regex.ist_simple_char
 | 
						|
		re.prog[pc].ch = char_tmp
 | 
						|
		re.prog[pc].ch_len = byte(char_len)
 | 
						|
		re.prog[pc].rep_min = 1
 | 
						|
		re.prog[pc].rep_max = 1
 | 
						|
		// println("char: ${char_tmp:c}")
 | 
						|
		pc = pc + 1
 | 
						|
 | 
						|
		i += char_len
 | 
						|
	}
 | 
						|
 | 
						|
	// add end of the program
 | 
						|
	re.prog[pc].ist = regex.ist_prog_end
 | 
						|
	re.prog_len = pc
 | 
						|
 | 
						|
	// check for unbalanced groups
 | 
						|
	if group_stack_index != -1 {
 | 
						|
		return regex.err_group_not_balanced, group_stack_txt_index[group_stack_index] + 1
 | 
						|
	}
 | 
						|
 | 
						|
	// check for OR at the end of the program
 | 
						|
	if pc > 0 && re.prog[pc - 1].ist == regex.ist_or_branch {
 | 
						|
		return regex.err_syntax_error, in_txt.len - 1
 | 
						|
	}
 | 
						|
 | 
						|
	// store the number of groups in the query
 | 
						|
	re.group_count = group_count + 1
 | 
						|
 | 
						|
	//******************************************
 | 
						|
	// Post processing
 | 
						|
	//******************************************
 | 
						|
 | 
						|
	//
 | 
						|
	// manage ist_dot_char
 | 
						|
	//
 | 
						|
 | 
						|
	// find the checks for dot chars, if any...
 | 
						|
	mut pc1 := 0
 | 
						|
	mut dot_char_count := 0
 | 
						|
	mut last_dot_char_pc := -1
 | 
						|
	for pc1 < pc {
 | 
						|
		if re.prog[pc1].ist == regex.ist_dot_char {
 | 
						|
			// println("Dot_char pc: $pc1")
 | 
						|
			last_dot_char_pc = pc1
 | 
						|
			dot_char_count++
 | 
						|
			mut pc2 := pc1 + 1
 | 
						|
			for pc2 < pc {
 | 
						|
				// consecutive dot chars is an error
 | 
						|
				if re.prog[pc2].ist == regex.ist_dot_char {
 | 
						|
					return regex.err_syntax_error, 0
 | 
						|
				}
 | 
						|
				if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
 | 
						|
					regex.ist_group_start] {
 | 
						|
					// println("Next dot char check is PC: ${pc2}")
 | 
						|
					re.prog[pc1].dot_check_pc = pc2
 | 
						|
					break
 | 
						|
				}
 | 
						|
				pc2++
 | 
						|
			}
 | 
						|
		}
 | 
						|
		pc1++
 | 
						|
	}
 | 
						|
 | 
						|
	// println("last_dot_char_pc: $last_dot_char_pc")
 | 
						|
	if last_dot_char_pc >= 0 {
 | 
						|
		pc1 = last_dot_char_pc + 1
 | 
						|
		mut is_last_dot := true
 | 
						|
		for pc1 < pc {
 | 
						|
			if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
 | 
						|
				is_last_dot = false
 | 
						|
				break
 | 
						|
			}
 | 
						|
			pc1++
 | 
						|
		}
 | 
						|
		if is_last_dot {
 | 
						|
			re.prog[last_dot_char_pc].last_dot_flag = true
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	//
 | 
						|
	// manage bsls_char
 | 
						|
	//
 | 
						|
 | 
						|
	// find the checks for bsls, if any...
 | 
						|
	pc1 = 0
 | 
						|
	mut bsls_char_count := 0
 | 
						|
	mut last_bsls_char_pc := -1
 | 
						|
	for pc1 < pc {
 | 
						|
		if re.prog[pc1].ist == regex.ist_bsls_char {
 | 
						|
			// println("bsls_char pc: $pc1")
 | 
						|
			last_bsls_char_pc = pc1
 | 
						|
			bsls_char_count++
 | 
						|
			mut pc2 := pc1 + 1
 | 
						|
			for pc2 < pc {
 | 
						|
				if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
 | 
						|
					regex.ist_group_start] {
 | 
						|
					// println("Next bsls check is PC: ${pc2}")
 | 
						|
					re.prog[pc1].bsls_check_pc = pc2
 | 
						|
					break
 | 
						|
				}
 | 
						|
				pc2++
 | 
						|
			}
 | 
						|
		}
 | 
						|
		pc1++
 | 
						|
	}
 | 
						|
 | 
						|
	// println("last_bsls_char_pc: $last_bsls_char_pc")
 | 
						|
	if last_bsls_char_pc >= 0 {
 | 
						|
		pc1 = last_bsls_char_pc + 1
 | 
						|
		mut is_last_bsls := true
 | 
						|
		for pc1 < pc {
 | 
						|
			if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
 | 
						|
				is_last_bsls = false
 | 
						|
				break
 | 
						|
			}
 | 
						|
			pc1++
 | 
						|
		}
 | 
						|
		if is_last_bsls {
 | 
						|
			re.prog[last_bsls_char_pc].last_dot_flag = true
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	//******************************************
 | 
						|
 | 
						|
	// OR branch
 | 
						|
	// a|b|cd
 | 
						|
	// d exit point
 | 
						|
	// a,b,c branches
 | 
						|
	// set the jump in the right places
 | 
						|
	pc1 = 0
 | 
						|
	for pc1 < pc - 2 {
 | 
						|
		// println("Here $pc1 ${pc-2}")
 | 
						|
		// println("source index: ${pc1 + 1} => ${re.prog[pc1+1].source_index}")
 | 
						|
		if re.prog[pc1 + 1].ist == regex.ist_or_branch {
 | 
						|
			// two consecutive OR are a syntax error
 | 
						|
			if re.prog[pc1 + 2].ist == regex.ist_or_branch {
 | 
						|
				return regex.err_syntax_error, i
 | 
						|
			}
 | 
						|
 | 
						|
			// check for []|[] errors
 | 
						|
			if re.prog[pc1].ist == regex.ist_char_class_pos
 | 
						|
				&& re.prog[pc1 + 2].ist == regex.ist_char_class_pos {
 | 
						|
				return regex.err_invalid_or_with_cc, re.prog[pc1 + 1].source_index
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// manange a|b chains like a|(b)|c|d...
 | 
						|
		// standard solution
 | 
						|
		if re.prog[pc1].ist != regex.ist_or_branch && re.prog[pc1 + 1].ist == regex.ist_or_branch
 | 
						|
			&& re.prog[pc1 + 2].ist != regex.ist_or_branch {
 | 
						|
			re.prog[pc1].next_is_or = true // set that the next token is an  OR
 | 
						|
			re.prog[pc1 + 1].rep_min = pc1 + 2 // failed match jump
 | 
						|
 | 
						|
			// match jump, if an OR chain the next token will be an OR token
 | 
						|
			mut pc2 := pc1 + 2
 | 
						|
			for pc2 < pc - 1 {
 | 
						|
				ist := re.prog[pc2].ist
 | 
						|
				if ist == regex.ist_group_start {
 | 
						|
					re.prog[pc1 + 1].rep_max = re.prog[pc2].goto_pc + 1
 | 
						|
					break
 | 
						|
				}
 | 
						|
				if ist != regex.ist_or_branch {
 | 
						|
					re.prog[pc1 + 1].rep_max = pc2 + 1
 | 
						|
					break
 | 
						|
				}
 | 
						|
 | 
						|
				pc2++
 | 
						|
			}
 | 
						|
			// special case query of few chars, the true can't go on the first instruction
 | 
						|
			if re.prog[pc1 + 1].rep_max == pc1 {
 | 
						|
				re.prog[pc1 + 1].rep_max = 3
 | 
						|
			}
 | 
						|
			// println("Compile OR postproc. [$pc1,OR ${pc1+1},$pc2]")
 | 
						|
			pc1 = pc2
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		pc1++
 | 
						|
	}
 | 
						|
 | 
						|
	//******************************************
 | 
						|
	// DEBUG PRINT REGEX GENERATED CODE
 | 
						|
	//******************************************
 | 
						|
	if re.debug > 0 {
 | 
						|
		gc := re.get_code()
 | 
						|
		re.log_func(gc)
 | 
						|
	}
 | 
						|
	//******************************************
 | 
						|
 | 
						|
	return regex.compile_ok, 0
 | 
						|
}
 | 
						|
 | 
						|
// get_code return the compiled code as regex string, note: may be different from the source!
 | 
						|
pub fn (re RE) get_code() string {
 | 
						|
	mut pc1 := 0
 | 
						|
	mut res := strings.new_builder(re.cc.len * 2 * re.prog.len)
 | 
						|
	res.write_string('========================================\nv RegEx compiler v $regex.v_regex_version output:\n')
 | 
						|
 | 
						|
	mut stop_flag := false
 | 
						|
 | 
						|
	for pc1 <= re.prog.len {
 | 
						|
		tk := re.prog[pc1]
 | 
						|
		res.write_string('PC:${pc1:3d}')
 | 
						|
 | 
						|
		res.write_string(' ist: ')
 | 
						|
		res.write_string('${tk.ist:8x}'.replace(' ', '0'))
 | 
						|
		res.write_string(' ')
 | 
						|
		ist := tk.ist
 | 
						|
		if ist == regex.ist_bsls_char {
 | 
						|
			res.write_string('[\\${tk.ch:1c}]     BSLS')
 | 
						|
		} else if ist == regex.ist_prog_end {
 | 
						|
			res.write_string('PROG_END')
 | 
						|
			stop_flag = true
 | 
						|
		} else if ist == regex.ist_or_branch {
 | 
						|
			res.write_string('OR      ')
 | 
						|
		} else if ist == regex.ist_char_class_pos {
 | 
						|
			res.write_string('[${re.get_char_class(pc1)}]     CHAR_CLASS_POS')
 | 
						|
		} else if ist == regex.ist_char_class_neg {
 | 
						|
			res.write_string('[^${re.get_char_class(pc1)}]    CHAR_CLASS_NEG')
 | 
						|
		} else if ist == regex.ist_dot_char {
 | 
						|
			res.write_string('.        DOT_CHAR nx chk: $tk.dot_check_pc')
 | 
						|
			if tk.last_dot_flag == true {
 | 
						|
				res.write_string(' last!')
 | 
						|
			}
 | 
						|
		} else if ist == regex.ist_group_start {
 | 
						|
			res.write_string('(        GROUP_START #:$tk.group_id')
 | 
						|
			if tk.group_id == -1 {
 | 
						|
				res.write_string(' ?:')
 | 
						|
			} else {
 | 
						|
				for x in re.group_map.keys() {
 | 
						|
					if re.group_map[x] == (tk.group_id + 1) {
 | 
						|
						res.write_string(' ?P<$x>')
 | 
						|
						break
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		} else if ist == regex.ist_group_end {
 | 
						|
			res.write_string(')        GROUP_END   #:$tk.group_id')
 | 
						|
		} else if ist == regex.ist_simple_char {
 | 
						|
			res.write_string('[${tk.ch:1c}]      query_ch')
 | 
						|
		}
 | 
						|
 | 
						|
		if tk.rep_max == regex.max_quantifier {
 | 
						|
			res.write_string(' {${tk.rep_min:3d},MAX}')
 | 
						|
		} else {
 | 
						|
			if ist == regex.ist_or_branch {
 | 
						|
				res.write_string(' if false go: ${tk.rep_min:3d} if true go: ${tk.rep_max:3d}')
 | 
						|
			} else {
 | 
						|
				res.write_string(' {${tk.rep_min:3d},${tk.rep_max:3d}}')
 | 
						|
			}
 | 
						|
			if tk.greedy == true {
 | 
						|
				res.write_string('?')
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		res.write_string('\n')
 | 
						|
		if stop_flag {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		pc1++
 | 
						|
	}
 | 
						|
 | 
						|
	res.write_string('========================================\n')
 | 
						|
	return res.str()
 | 
						|
}
 | 
						|
 | 
						|
// get_query return a string with a reconstruction of the query starting from the regex program code
 | 
						|
pub fn (re RE) get_query() string {
 | 
						|
	mut res := strings.new_builder(re.query.len * 2)
 | 
						|
 | 
						|
	if (re.flag & regex.f_ms) != 0 {
 | 
						|
		res.write_string('^')
 | 
						|
	}
 | 
						|
 | 
						|
	mut i := 0
 | 
						|
	for i < re.prog.len && re.prog[i].ist != regex.ist_prog_end && re.prog[i].ist != 0 {
 | 
						|
		tk := unsafe { &re.prog[i] }
 | 
						|
		ch := tk.ist
 | 
						|
 | 
						|
		// GROUP start
 | 
						|
		if ch == regex.ist_group_start {
 | 
						|
			if re.debug > 0 {
 | 
						|
				res.write_string('#$tk.group_id')
 | 
						|
			}
 | 
						|
			res.write_string('(')
 | 
						|
 | 
						|
			if tk.group_neg == true {
 | 
						|
				res.write_string('?!') // negation group
 | 
						|
			} else if tk.group_id == -1 {
 | 
						|
				res.write_string('?:') // non capturing group
 | 
						|
			}
 | 
						|
 | 
						|
			for x in re.group_map.keys() {
 | 
						|
				if re.group_map[x] == (tk.group_id + 1) {
 | 
						|
					res.write_string('?P<$x>')
 | 
						|
					break
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// GROUP end
 | 
						|
		if ch == regex.ist_group_end {
 | 
						|
			res.write_string(')')
 | 
						|
		}
 | 
						|
 | 
						|
		// OR branch
 | 
						|
		if ch == regex.ist_or_branch {
 | 
						|
			res.write_string('|')
 | 
						|
			if re.debug > 0 {
 | 
						|
				res.write_string('{$tk.rep_min,$tk.rep_max}')
 | 
						|
			}
 | 
						|
			i++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// char class
 | 
						|
		if ch == regex.ist_char_class_neg || ch == regex.ist_char_class_pos {
 | 
						|
			res.write_string('[')
 | 
						|
			if ch == regex.ist_char_class_neg {
 | 
						|
				res.write_string('^')
 | 
						|
			}
 | 
						|
			res.write_string('${re.get_char_class(i)}')
 | 
						|
			res.write_string(']')
 | 
						|
		}
 | 
						|
 | 
						|
		// bsls char
 | 
						|
		if ch == regex.ist_bsls_char {
 | 
						|
			res.write_string('\\${tk.ch:1c}')
 | 
						|
		}
 | 
						|
 | 
						|
		// ist_dot_char
 | 
						|
		if ch == regex.ist_dot_char {
 | 
						|
			res.write_string('.')
 | 
						|
		}
 | 
						|
 | 
						|
		// char alone
 | 
						|
		if ch == regex.ist_simple_char {
 | 
						|
			if byte(ch) in regex.bsls_escape_list {
 | 
						|
				res.write_string('\\')
 | 
						|
			}
 | 
						|
			res.write_string('${tk.ch:c}')
 | 
						|
		}
 | 
						|
 | 
						|
		// quantifier
 | 
						|
		if !(tk.rep_min == 1 && tk.rep_max == 1) && tk.group_neg == false {
 | 
						|
			if tk.rep_min == 0 && tk.rep_max == 1 {
 | 
						|
				res.write_string('?')
 | 
						|
			} else if tk.rep_min == 1 && tk.rep_max == regex.max_quantifier {
 | 
						|
				res.write_string('+')
 | 
						|
			} else if tk.rep_min == 0 && tk.rep_max == regex.max_quantifier {
 | 
						|
				res.write_string('*')
 | 
						|
			} else {
 | 
						|
				if tk.rep_max == regex.max_quantifier {
 | 
						|
					res.write_string('{$tk.rep_min,MAX}')
 | 
						|
				} else {
 | 
						|
					res.write_string('{$tk.rep_min,$tk.rep_max}')
 | 
						|
				}
 | 
						|
				if tk.greedy == true {
 | 
						|
					res.write_string('?')
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		i++
 | 
						|
	}
 | 
						|
	if (re.flag & regex.f_me) != 0 {
 | 
						|
		res.write_string('$')
 | 
						|
	}
 | 
						|
 | 
						|
	return res.str()
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************
 | 
						|
*
 | 
						|
* Groups saving utilities
 | 
						|
*
 | 
						|
******************************************************************************/
 | 
						|
[direct_array_access]
 | 
						|
fn (mut re RE) group_continuous_save(g_index int) {
 | 
						|
	if re.group_csave_flag == true {
 | 
						|
		// continuous save, save until we have space
 | 
						|
 | 
						|
		// init the first element as counter
 | 
						|
		if re.group_csave.len == 0 {
 | 
						|
			re.group_csave << 0
 | 
						|
		}
 | 
						|
 | 
						|
		gi := g_index >> 1
 | 
						|
		start := re.groups[g_index]
 | 
						|
		end := re.groups[g_index + 1]
 | 
						|
 | 
						|
		// check if we are simply increasing the size ot the found group
 | 
						|
		if re.group_csave.len >= 4 && gi == re.group_csave[re.group_csave.len - 3]
 | 
						|
			&& start == re.group_csave[re.group_csave.len - 2] {
 | 
						|
			re.group_csave[re.group_csave.len - 1] = end
 | 
						|
			return
 | 
						|
		}
 | 
						|
 | 
						|
		// otherwise append a new group to the list
 | 
						|
 | 
						|
		// increment counter
 | 
						|
		re.group_csave[0]++
 | 
						|
		// save the record
 | 
						|
		re.group_csave << (g_index >> 1) // group id
 | 
						|
		re.group_csave << re.groups[g_index] // start
 | 
						|
		re.group_csave << re.groups[g_index + 1] // end
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************************
 | 
						|
*
 | 
						|
* Matching
 | 
						|
*
 | 
						|
******************************************************************************/
 | 
						|
enum Match_state {
 | 
						|
	start = 0
 | 
						|
	stop
 | 
						|
	end
 | 
						|
	new_line
 | 
						|
	ist_load // load and execute instruction
 | 
						|
	ist_next // go to next instruction
 | 
						|
	ist_next_ks // go to next instruction without clenaning the state
 | 
						|
	ist_quant_p // match positive ,quantifier check
 | 
						|
	ist_quant_n // match negative, quantifier check
 | 
						|
	ist_quant_pg // match positive ,group quantifier check
 | 
						|
	ist_quant_ng // match negative ,group quantifier check
 | 
						|
}
 | 
						|
 | 
						|
fn state_str(s Match_state) string {
 | 
						|
	match s {
 | 
						|
		.start { return 'start' }
 | 
						|
		.stop { return 'stop' }
 | 
						|
		.end { return 'end' }
 | 
						|
		.new_line { return 'new line' }
 | 
						|
		.ist_load { return 'ist_load' }
 | 
						|
		.ist_next { return 'ist_next' }
 | 
						|
		.ist_next_ks { return 'ist_next_ks' }
 | 
						|
		.ist_quant_p { return 'ist_quant_p' }
 | 
						|
		.ist_quant_n { return 'ist_quant_n' }
 | 
						|
		.ist_quant_pg { return 'ist_quant_pg' }
 | 
						|
		.ist_quant_ng { return 'ist_quant_ng' }
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
struct StateObj {
 | 
						|
pub mut:
 | 
						|
	group_index int = -1 // group id used to know how many groups are open
 | 
						|
	match_flag  bool // indicate if we are in a match condition
 | 
						|
	match_index int = -1 // index of the last match
 | 
						|
	first_match int = -1 // index of the first match
 | 
						|
	pc          int = -1 // program counter
 | 
						|
	i           int = -1 // source string index
 | 
						|
	char_len    int  // last char legth
 | 
						|
	last_dot_pc int = -1 // last dot chat pc
 | 
						|
}
 | 
						|
 | 
						|
[direct_array_access]
 | 
						|
pub fn (mut re RE) match_base(in_txt &byte, in_txt_len int) (int, int) {
 | 
						|
	// result status
 | 
						|
	mut result := regex.no_match_found // function return
 | 
						|
 | 
						|
	mut ch := rune(0) // examinated char
 | 
						|
	mut char_len := 0 // utf8 examinated char len
 | 
						|
	mut m_state := Match_state.start // start point for the matcher FSM
 | 
						|
	mut src_end := false
 | 
						|
	mut last_fnd_pc := -1
 | 
						|
 | 
						|
	mut state := StateObj{} // actual state
 | 
						|
	mut ist := rune(0) // actual instruction
 | 
						|
	mut l_ist := rune(0) // last matched instruction
 | 
						|
 | 
						|
	mut step_count := 0 // stats for debug
 | 
						|
	mut dbg_line := 0 // count debug line printed
 | 
						|
 | 
						|
	re.reset()
 | 
						|
 | 
						|
	if re.debug > 0 {
 | 
						|
		// print header
 | 
						|
		mut h_buf := strings.new_builder(32)
 | 
						|
		h_buf.write_string('flags: ')
 | 
						|
		h_buf.write_string('${re.flag:8x}'.replace(' ', '0'))
 | 
						|
		h_buf.write_string('\n')
 | 
						|
		sss := h_buf.str()
 | 
						|
		re.log_func(sss)
 | 
						|
	}
 | 
						|
 | 
						|
	for m_state != .end {
 | 
						|
		if state.pc >= 0 && state.pc < re.prog.len {
 | 
						|
			ist = re.prog[state.pc].ist
 | 
						|
		} else if state.pc >= re.prog.len {
 | 
						|
			// println("ERROR!! PC overflow!!")
 | 
						|
			return regex.err_internal_error, state.i
 | 
						|
		}
 | 
						|
 | 
						|
		//******************************************
 | 
						|
		// DEBUG LOG
 | 
						|
		//******************************************
 | 
						|
		if re.debug > 0 {
 | 
						|
			mut buf2 := strings.new_builder(re.cc.len + 128)
 | 
						|
 | 
						|
			// print all the instructions
 | 
						|
 | 
						|
			// end of the input text
 | 
						|
			if state.i >= in_txt_len {
 | 
						|
				buf2.write_string('# ${step_count:3d} END OF INPUT TEXT\n')
 | 
						|
				sss := buf2.str()
 | 
						|
				re.log_func(sss)
 | 
						|
			} else {
 | 
						|
				// print only the exe instruction
 | 
						|
				if (re.debug == 1 && m_state == .ist_load) || re.debug == 2 {
 | 
						|
					if ist == regex.ist_prog_end {
 | 
						|
						buf2.write_string('# ${step_count:3d} PROG_END\n')
 | 
						|
					} else if ist == 0 || m_state in [.start, .ist_next, .stop] {
 | 
						|
						buf2.write_string('# ${step_count:3d} s: ${state_str(m_state):12s} PC: NA\n')
 | 
						|
					} else {
 | 
						|
						ch, char_len = re.get_charb(in_txt, state.i)
 | 
						|
 | 
						|
						buf2.write_string('# ${step_count:3d} s: ${state_str(m_state):12s} PC: ${state.pc:3d}=>')
 | 
						|
						buf2.write_string('${ist:8x}'.replace(' ', '0'))
 | 
						|
						buf2.write_string(" i,ch,len:[${state.i:3d},'${utf8_str(ch)}',$char_len] f.m:[${state.first_match:3d},${state.match_index:3d}] ")
 | 
						|
 | 
						|
						if ist == regex.ist_simple_char {
 | 
						|
							buf2.write_string('query_ch: [${re.prog[state.pc].ch:1c}]')
 | 
						|
						} else {
 | 
						|
							if ist == regex.ist_bsls_char {
 | 
						|
								buf2.write_string('BSLS [\\${re.prog[state.pc].ch:1c}]')
 | 
						|
							} else if ist == regex.ist_prog_end {
 | 
						|
								buf2.write_string('PROG_END')
 | 
						|
							} else if ist == regex.ist_or_branch {
 | 
						|
								buf2.write_string('OR')
 | 
						|
							} else if ist == regex.ist_char_class_pos {
 | 
						|
								buf2.write_string('CHAR_CLASS_POS[${re.get_char_class(state.pc)}]')
 | 
						|
							} else if ist == regex.ist_char_class_neg {
 | 
						|
								buf2.write_string('CHAR_CLASS_NEG[${re.get_char_class(state.pc)}]')
 | 
						|
							} else if ist == regex.ist_dot_char {
 | 
						|
								buf2.write_string('DOT_CHAR')
 | 
						|
							} else if ist == regex.ist_group_start {
 | 
						|
								tmp_gi := re.prog[state.pc].group_id
 | 
						|
								tmp_gr := re.prog[re.prog[state.pc].goto_pc].group_rep
 | 
						|
								buf2.write_string('GROUP_START #:$tmp_gi rep:$tmp_gr ')
 | 
						|
							} else if ist == regex.ist_group_end {
 | 
						|
								buf2.write_string('GROUP_END   #:${re.prog[state.pc].group_id} deep:$state.group_index')
 | 
						|
							}
 | 
						|
						}
 | 
						|
						if re.prog[state.pc].rep_max == regex.max_quantifier {
 | 
						|
							buf2.write_string('{${re.prog[state.pc].rep_min},MAX}:${re.prog[state.pc].rep}')
 | 
						|
						} else {
 | 
						|
							buf2.write_string('{${re.prog[state.pc].rep_min},${re.prog[state.pc].rep_max}}:${re.prog[state.pc].rep}')
 | 
						|
						}
 | 
						|
						if re.prog[state.pc].greedy == true {
 | 
						|
							buf2.write_string('?')
 | 
						|
						}
 | 
						|
						buf2.write_string(' (#$state.group_index)')
 | 
						|
 | 
						|
						if ist == regex.ist_dot_char {
 | 
						|
							buf2.write_string(' last!')
 | 
						|
						}
 | 
						|
 | 
						|
						buf2.write_string('\n')
 | 
						|
					}
 | 
						|
					sss2 := buf2.str()
 | 
						|
					re.log_func(sss2)
 | 
						|
				}
 | 
						|
			}
 | 
						|
			step_count++
 | 
						|
			dbg_line++
 | 
						|
		}
 | 
						|
		//******************************************
 | 
						|
 | 
						|
		if ist == regex.ist_prog_end {
 | 
						|
			// println("HERE we end!")
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		// we're out of text, manage it
 | 
						|
		if state.i >= in_txt_len || m_state == .new_line {
 | 
						|
			// println("Finished text!!")
 | 
						|
			src_end = true
 | 
						|
 | 
						|
			// manage groups
 | 
						|
			if state.group_index >= 0 && state.match_index >= 0 {
 | 
						|
				// println("End text with open groups!")
 | 
						|
				// close the groups
 | 
						|
				for state.group_index >= 0 {
 | 
						|
					tmp_pc := re.group_data[state.group_index]
 | 
						|
					re.prog[tmp_pc].group_rep++
 | 
						|
					// println("Closing group $state.group_index {${re.prog[tmp_pc].rep_min},${re.prog[tmp_pc].rep_max}}:${re.prog[tmp_pc].group_rep}")
 | 
						|
 | 
						|
					if re.prog[tmp_pc].group_rep >= re.prog[tmp_pc].rep_min
 | 
						|
						&& re.prog[tmp_pc].group_id >= 0 {
 | 
						|
						start_i := re.group_stack[state.group_index]
 | 
						|
						re.group_stack[state.group_index] = -1
 | 
						|
 | 
						|
						// save group results
 | 
						|
						g_index := re.prog[tmp_pc].group_id * 2
 | 
						|
						if start_i >= 0 {
 | 
						|
							re.groups[g_index] = start_i
 | 
						|
						} else {
 | 
						|
							re.groups[g_index] = 0
 | 
						|
						}
 | 
						|
						// we have fished the text, we must manage out pf bound indexes
 | 
						|
						if state.i >= in_txt_len {
 | 
						|
							state.i = in_txt_len - 1
 | 
						|
						}
 | 
						|
						re.groups[g_index + 1] = state.i
 | 
						|
 | 
						|
						if re.groups[g_index + 1] >= in_txt_len {
 | 
						|
							// println("clamp group on stop!")
 | 
						|
							re.groups[g_index + 1] = in_txt_len - 1
 | 
						|
						}
 | 
						|
 | 
						|
						// continuous save, save until we have space
 | 
						|
						re.group_continuous_save(g_index)
 | 
						|
					}
 | 
						|
					state.group_index--
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			// the text is finished and the groups closed and we are the last group, ok exit
 | 
						|
			if ist == regex.ist_group_end && re.prog[state.pc + 1].ist == regex.ist_prog_end {
 | 
						|
				// println("Last group end")
 | 
						|
				return state.first_match, state.i
 | 
						|
			}
 | 
						|
 | 
						|
			if state.pc == -1 {
 | 
						|
				state.pc = last_fnd_pc
 | 
						|
			}
 | 
						|
 | 
						|
			// println("Finished text!!")
 | 
						|
			// println("Instruction: ${ist:08x} pc: $state.pc")
 | 
						|
			// println("min_rep: ${re.prog[state.pc].rep_min} max_rep: ${re.prog[state.pc].rep_max} rep: ${re.prog[state.pc].rep}")
 | 
						|
 | 
						|
			// program end
 | 
						|
			if ist == regex.ist_prog_end {
 | 
						|
				// println("Program end on end of text!")
 | 
						|
				return state.first_match, state.i
 | 
						|
			}
 | 
						|
 | 
						|
			// we are in a last dot_ char case
 | 
						|
			if l_ist == regex.ist_dot_char {
 | 
						|
				// println("***** We have a last dot_char")
 | 
						|
				// println("PC: ${state.pc} last_dot_flag:${re.prog[state.pc].last_dot_flag}")
 | 
						|
				// println("rep: ${re.prog[state.pc].group_rep} min: ${re.prog[state.pc].rep_min} max: ${re.prog[state.pc].rep_max}")
 | 
						|
				// println("first match: ${state.first_match}")
 | 
						|
				if re.prog[state.pc].last_dot_flag == true
 | 
						|
					&& re.prog[state.pc].rep >= re.prog[state.pc].rep_min
 | 
						|
					&& re.prog[state.pc].rep <= re.prog[state.pc].rep_max {
 | 
						|
					return state.first_match, state.i
 | 
						|
				}
 | 
						|
				// println("Not fitted!!")
 | 
						|
			}
 | 
						|
 | 
						|
			// m_state = .end
 | 
						|
			// break
 | 
						|
 | 
						|
			// no groups open, check the last token quantifier
 | 
						|
			if ist != regex.ist_group_end && re.prog[state.pc + 1].ist == regex.ist_prog_end {
 | 
						|
				if re.prog[state.pc].rep >= re.prog[state.pc].rep_min
 | 
						|
					&& re.prog[state.pc].rep <= re.prog[state.pc].rep_max {
 | 
						|
					// println("We are in good repetition")
 | 
						|
					return state.first_match, state.i
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			// print("No good exit!!")
 | 
						|
			return regex.no_match_found, state.i
 | 
						|
		}
 | 
						|
 | 
						|
		// starting and init
 | 
						|
		if m_state == .start {
 | 
						|
			state.pc = -1
 | 
						|
			state.i = 0
 | 
						|
			m_state = .ist_next
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		// ist_next, next instruction reseting its state
 | 
						|
		else if m_state == .ist_next {
 | 
						|
			state.pc = state.pc + 1
 | 
						|
			re.prog[state.pc].reset()
 | 
						|
			// check if we are in the program bounds
 | 
						|
			if state.pc < 0 || state.pc > re.prog.len {
 | 
						|
				// println("ERROR!! PC overflow!!")
 | 
						|
				return regex.err_internal_error, state.i
 | 
						|
			}
 | 
						|
			m_state = .ist_load
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		// ist_next_ks, next instruction keeping its state
 | 
						|
		else if m_state == .ist_next_ks {
 | 
						|
			state.pc = state.pc + 1
 | 
						|
			// check if we are in the program bounds
 | 
						|
			if state.pc < 0 || state.pc > re.prog.len {
 | 
						|
				// println("ERROR!! PC overflow!!")
 | 
						|
				return regex.err_internal_error, state.i
 | 
						|
			}
 | 
						|
			m_state = .ist_load
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// load the char
 | 
						|
		ch, char_len = re.get_charb(in_txt, state.i)
 | 
						|
 | 
						|
		// check new line if flag f_nl enabled
 | 
						|
		if (re.flag & regex.f_nl) != 0 && char_len == 1 && byte(ch) in regex.new_line_list {
 | 
						|
			m_state = .new_line
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		// check if stop
 | 
						|
		else if m_state == .stop {
 | 
						|
			// we are in search mode, don't exit until the end
 | 
						|
			if ((re.flag & regex.f_src) != 0) && (ist != regex.ist_prog_end) {
 | 
						|
				last_fnd_pc = state.pc
 | 
						|
				state.pc = -1
 | 
						|
				state.i += char_len
 | 
						|
 | 
						|
				m_state = .ist_next
 | 
						|
				re.reset_src()
 | 
						|
				state.match_index = -1
 | 
						|
				state.first_match = -1
 | 
						|
 | 
						|
				// reset state list
 | 
						|
				re.reset()
 | 
						|
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			if ist == regex.ist_prog_end {
 | 
						|
				return state.first_match, state.i
 | 
						|
			}
 | 
						|
 | 
						|
			// manage here dot char
 | 
						|
 | 
						|
			if re.state_list.len > 0 {
 | 
						|
				// println("Here we are, with stop: state buffer: [${re.state_list.len}]")
 | 
						|
				state = re.state_list.pop()
 | 
						|
 | 
						|
				state.match_flag = true
 | 
						|
				l_ist = u32(regex.ist_dot_char)
 | 
						|
 | 
						|
				if state.first_match < 0 {
 | 
						|
					state.first_match = state.i
 | 
						|
				}
 | 
						|
				state.match_index = state.i
 | 
						|
				re.prog[state.pc].rep++ // increase repetitions
 | 
						|
 | 
						|
				state.i += char_len
 | 
						|
				m_state = .ist_quant_p
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			// exit on no match
 | 
						|
			return result, state.i
 | 
						|
		}
 | 
						|
		// ist_load
 | 
						|
		else if m_state == .ist_load {
 | 
						|
			// program end
 | 
						|
			if ist == regex.ist_prog_end {
 | 
						|
				// if we are in match exit well
 | 
						|
 | 
						|
				if state.group_index >= 0 && state.match_index >= 0 {
 | 
						|
					state.group_index = -1
 | 
						|
				}
 | 
						|
 | 
						|
				m_state = .stop
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// check GROUP start, no quantifier is checkd for this token!!
 | 
						|
			else if ist == regex.ist_group_start {
 | 
						|
				state.group_index++
 | 
						|
				re.group_data[state.group_index] = re.prog[state.pc].goto_pc // save where is ist_group_end, we will use it for escape
 | 
						|
				re.group_stack[state.group_index] = state.i // index where we start to manage
 | 
						|
				// println("group_index $state.group_index rep ${re.prog[re.prog[state.pc].goto_pc].group_rep}")
 | 
						|
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// check GROUP end
 | 
						|
			else if ist == regex.ist_group_end {
 | 
						|
				// we are in matching streak
 | 
						|
				// println("Group END!! last ist: ${l_ist:08x}")
 | 
						|
				if state.match_index >= 0 {
 | 
						|
					// restore txt index stack and save the group data
 | 
						|
 | 
						|
					// println("g.id: ${re.prog[state.pc].group_id} group_index: ${state.group_index}")
 | 
						|
					if state.group_index >= 0 && re.prog[state.pc].group_id >= 0 {
 | 
						|
						start_i := re.group_stack[state.group_index]
 | 
						|
 | 
						|
						// save group results
 | 
						|
						g_index := re.prog[state.pc].group_id * 2
 | 
						|
 | 
						|
						if start_i >= 0 {
 | 
						|
							re.groups[g_index] = start_i
 | 
						|
						} else {
 | 
						|
							re.groups[g_index] = 0
 | 
						|
						}
 | 
						|
 | 
						|
						re.groups[g_index + 1] = state.i
 | 
						|
 | 
						|
						if g_index > 0 && re.groups[g_index] <= re.groups[g_index - 1] {
 | 
						|
							re.groups[g_index] = re.groups[g_index - 1]
 | 
						|
						}
 | 
						|
 | 
						|
						if re.groups[g_index + 1] >= in_txt_len {
 | 
						|
							// println("clamp group!")
 | 
						|
							re.groups[g_index + 1] = in_txt_len - 1
 | 
						|
						}
 | 
						|
 | 
						|
						// println("GROUP ${re.prog[state.pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}] i: $state.i in_txt_len: $in_txt_len")
 | 
						|
 | 
						|
						// continuous save, save until we have space
 | 
						|
						re.group_continuous_save(g_index)
 | 
						|
					}
 | 
						|
 | 
						|
					re.prog[state.pc].group_rep++ // increase repetitions
 | 
						|
					// println("GROUP $group_index END ${re.prog[state.pc].group_rep}")
 | 
						|
					if re.prog[state.pc].group_rep > in_txt_len - 1 {
 | 
						|
						m_state = .ist_quant_ng
 | 
						|
						continue
 | 
						|
					}
 | 
						|
 | 
						|
					m_state = .ist_quant_pg
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				m_state = .ist_quant_ng
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// check OR
 | 
						|
			else if ist == regex.ist_or_branch {
 | 
						|
				if state.match_index >= 0 {
 | 
						|
					state.pc = re.prog[state.pc].rep_max
 | 
						|
					// println("ist_or_branch True pc: $state.pc")
 | 
						|
				} else {
 | 
						|
					state.pc = re.prog[state.pc].rep_min
 | 
						|
					// println("ist_or_branch False pc: $state.pc")
 | 
						|
				}
 | 
						|
				re.prog[state.pc].reset()
 | 
						|
				m_state = .ist_load
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// check ist_dot_char
 | 
						|
			else if ist == regex.ist_dot_char {
 | 
						|
				// println("ist_dot_char rep: ${re.prog[state.pc].rep}")
 | 
						|
 | 
						|
				// check next token to be false
 | 
						|
				mut next_check_flag := false
 | 
						|
 | 
						|
				// if we are done with max go on dot char are dedicated case!!
 | 
						|
				if re.prog[state.pc].rep >= re.prog[state.pc].rep_max {
 | 
						|
					re.state_list.pop()
 | 
						|
					m_state = .ist_next
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				if re.prog[state.pc].dot_check_pc >= 0
 | 
						|
					&& re.prog[state.pc].rep >= re.prog[state.pc].rep_min {
 | 
						|
					// load the char
 | 
						|
					// ch_t, _ := re.get_charb(in_txt, state.i+char_len)
 | 
						|
					ch_t := ch
 | 
						|
					chk_pc := re.prog[state.pc].dot_check_pc
 | 
						|
 | 
						|
					// simple char
 | 
						|
					if re.prog[chk_pc].ist == regex.ist_simple_char {
 | 
						|
						if re.prog[chk_pc].ch == ch_t {
 | 
						|
							next_check_flag = true
 | 
						|
						}
 | 
						|
						// println("Check [ist_simple_char] [${re.prog[chk_pc].ch}]==[${ch_t:c}] => $next_check_flag")
 | 
						|
					}
 | 
						|
					// char char_class
 | 
						|
					else if re.prog[chk_pc].ist == regex.ist_char_class_pos
 | 
						|
						|| re.prog[chk_pc].ist == regex.ist_char_class_neg {
 | 
						|
						mut cc_neg := false
 | 
						|
						if re.prog[chk_pc].ist == regex.ist_char_class_neg {
 | 
						|
							cc_neg = true
 | 
						|
						}
 | 
						|
						mut cc_res := re.check_char_class(chk_pc, ch_t)
 | 
						|
 | 
						|
						if cc_neg {
 | 
						|
							cc_res = !cc_res
 | 
						|
						}
 | 
						|
						next_check_flag = cc_res
 | 
						|
						// println("Check [ist_char_class] => $next_check_flag")
 | 
						|
					}
 | 
						|
					// check bsls
 | 
						|
					else if re.prog[chk_pc].ist == regex.ist_bsls_char {
 | 
						|
						next_check_flag = re.prog[chk_pc].validator(byte(ch_t))
 | 
						|
						// println("Check [ist_bsls_char] => $next_check_flag")
 | 
						|
					}
 | 
						|
				}
 | 
						|
 | 
						|
				// check if we must continue or pass to the next IST
 | 
						|
				if next_check_flag == true && re.prog[state.pc + 1].ist != regex.ist_prog_end {
 | 
						|
					// println("save the state!!")
 | 
						|
					mut dot_state := StateObj{
 | 
						|
						group_index: state.group_index
 | 
						|
						match_flag: state.match_flag
 | 
						|
						match_index: state.match_index
 | 
						|
						first_match: state.first_match
 | 
						|
						pc: state.pc
 | 
						|
						i: state.i + char_len
 | 
						|
						char_len: char_len
 | 
						|
						last_dot_pc: state.pc
 | 
						|
					}
 | 
						|
					// if we are mananging a .* stay on the same char on return
 | 
						|
					if re.prog[state.pc].rep_min == 0 {
 | 
						|
						dot_state.i -= char_len
 | 
						|
					}
 | 
						|
 | 
						|
					re.state_list << dot_state
 | 
						|
 | 
						|
					m_state = .ist_quant_n
 | 
						|
					// println("dot_char stack len: ${re.state_list.len}")
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				state.match_flag = true
 | 
						|
				l_ist = u32(regex.ist_dot_char)
 | 
						|
 | 
						|
				if state.first_match < 0 {
 | 
						|
					state.first_match = state.i
 | 
						|
				}
 | 
						|
				state.match_index = state.i
 | 
						|
				re.prog[state.pc].rep++ // increase repetitions
 | 
						|
 | 
						|
				state.i += char_len
 | 
						|
				m_state = .ist_quant_p
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// char class IST
 | 
						|
			else if ist == regex.ist_char_class_pos || ist == regex.ist_char_class_neg {
 | 
						|
				state.match_flag = false
 | 
						|
				mut cc_neg := false
 | 
						|
 | 
						|
				if ist == regex.ist_char_class_neg {
 | 
						|
					cc_neg = true
 | 
						|
				}
 | 
						|
				mut cc_res := re.check_char_class(state.pc, ch)
 | 
						|
 | 
						|
				if cc_neg {
 | 
						|
					cc_res = !cc_res
 | 
						|
				}
 | 
						|
 | 
						|
				if cc_res {
 | 
						|
					state.match_flag = true
 | 
						|
					l_ist = u32(regex.ist_char_class_pos)
 | 
						|
 | 
						|
					if state.first_match < 0 {
 | 
						|
						state.first_match = state.i
 | 
						|
					}
 | 
						|
 | 
						|
					state.match_index = state.i
 | 
						|
 | 
						|
					re.prog[state.pc].rep++ // increase repetitions
 | 
						|
					state.i += char_len // next char
 | 
						|
					m_state = .ist_quant_p
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				m_state = .ist_quant_n
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// check bsls
 | 
						|
			else if ist == regex.ist_bsls_char {
 | 
						|
				// println("ist_bsls_char rep: ${re.prog[state.pc].rep}")
 | 
						|
 | 
						|
				// check next token to be false
 | 
						|
				mut next_check_flag := false
 | 
						|
 | 
						|
				// if we are done with max go on dot char are dedicated case!!
 | 
						|
				if re.prog[state.pc].rep >= re.prog[state.pc].rep_max {
 | 
						|
					re.state_list.pop()
 | 
						|
					m_state = .ist_next
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				if re.prog[state.pc].bsls_check_pc >= 0
 | 
						|
					&& re.prog[state.pc].rep >= re.prog[state.pc].rep_min {
 | 
						|
					// load the char
 | 
						|
					// ch_t, _ := re.get_charb(in_txt, state.i+char_len)
 | 
						|
					ch_t := ch
 | 
						|
					chk_pc := re.prog[state.pc].bsls_check_pc
 | 
						|
 | 
						|
					// simple char
 | 
						|
					if re.prog[chk_pc].ist == regex.ist_simple_char {
 | 
						|
						if re.prog[chk_pc].ch == ch_t {
 | 
						|
							next_check_flag = true
 | 
						|
						}
 | 
						|
						// println("Check [ist_simple_char] [${re.prog[chk_pc].ch}]==[${ch_t:c}] => $next_check_flag")
 | 
						|
					}
 | 
						|
					// char char_class
 | 
						|
					else if re.prog[chk_pc].ist == regex.ist_char_class_pos
 | 
						|
						|| re.prog[chk_pc].ist == regex.ist_char_class_neg {
 | 
						|
						mut cc_neg := false
 | 
						|
						if re.prog[chk_pc].ist == regex.ist_char_class_neg {
 | 
						|
							cc_neg = true
 | 
						|
						}
 | 
						|
						mut cc_res := re.check_char_class(chk_pc, ch_t)
 | 
						|
 | 
						|
						if cc_neg {
 | 
						|
							cc_res = !cc_res
 | 
						|
						}
 | 
						|
						next_check_flag = cc_res
 | 
						|
						// println("Check [ist_char_class] => $next_check_flag")
 | 
						|
					}
 | 
						|
					// check bsls
 | 
						|
					else if re.prog[chk_pc].ist == regex.ist_bsls_char {
 | 
						|
						next_check_flag = re.prog[chk_pc].validator(byte(ch_t))
 | 
						|
						// println("Check [ist_bsls_char] => $next_check_flag")
 | 
						|
					}
 | 
						|
				}
 | 
						|
 | 
						|
				// check if we must continue or pass to the next IST
 | 
						|
				if next_check_flag == true && re.prog[state.pc + 1].ist != regex.ist_prog_end {
 | 
						|
					// println("save the state!!")
 | 
						|
					mut dot_state := StateObj{
 | 
						|
						group_index: state.group_index
 | 
						|
						match_flag: state.match_flag
 | 
						|
						match_index: state.match_index
 | 
						|
						first_match: state.first_match
 | 
						|
						pc: state.pc
 | 
						|
						i: state.i + char_len
 | 
						|
						char_len: char_len
 | 
						|
						last_dot_pc: state.pc
 | 
						|
					}
 | 
						|
					// if we are mananging a .* stay on the same char on return
 | 
						|
					if re.prog[state.pc].rep_min == 0 {
 | 
						|
						dot_state.i -= char_len
 | 
						|
					}
 | 
						|
 | 
						|
					re.state_list << dot_state
 | 
						|
 | 
						|
					m_state = .ist_quant_n
 | 
						|
					// println("dot_char stack len: ${re.state_list.len}")
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				tmp_res := re.prog[state.pc].validator(byte(ch))
 | 
						|
				if tmp_res == false {
 | 
						|
					m_state = .ist_quant_n
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				// println("${ch} => ${tmp_res}")
 | 
						|
 | 
						|
				state.match_flag = true
 | 
						|
				l_ist = u32(regex.ist_dot_char)
 | 
						|
 | 
						|
				if state.first_match < 0 {
 | 
						|
					state.first_match = state.i
 | 
						|
				}
 | 
						|
				state.match_index = state.i
 | 
						|
				re.prog[state.pc].rep++ // increase repetitions
 | 
						|
 | 
						|
				state.i += char_len
 | 
						|
				m_state = .ist_quant_p
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// simple char IST
 | 
						|
			else if ist == regex.ist_simple_char {
 | 
						|
				// println("ist_simple_char")
 | 
						|
				state.match_flag = false
 | 
						|
 | 
						|
				if re.prog[state.pc].ch == ch {
 | 
						|
					state.match_flag = true
 | 
						|
					l_ist = regex.ist_simple_char
 | 
						|
 | 
						|
					if state.first_match < 0 {
 | 
						|
						state.first_match = state.i
 | 
						|
					}
 | 
						|
					// println("state.match_index: ${state.match_index}")
 | 
						|
					state.match_index = state.i
 | 
						|
 | 
						|
					re.prog[state.pc].rep++ // increase repetitions
 | 
						|
					state.i += char_len // next char
 | 
						|
					m_state = .ist_quant_p
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				m_state = .ist_quant_n
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// UNREACHABLE
 | 
						|
			// println("PANIC2!! state: $m_state")
 | 
						|
			return regex.err_internal_error, state.i
 | 
						|
		}
 | 
						|
		/***********************************
 | 
						|
		* Quantifier management
 | 
						|
		***********************************/
 | 
						|
		// ist_quant_ng => quantifier negative test on group
 | 
						|
		else if m_state == .ist_quant_ng {
 | 
						|
			// we are finished here
 | 
						|
			if state.group_index < 0 {
 | 
						|
				// println("Early stop!")
 | 
						|
				result = regex.no_match_found
 | 
						|
				m_state = .stop
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			tmp_pc := re.group_data[state.group_index] // PC to the end of the group token
 | 
						|
			rep := re.prog[tmp_pc].group_rep // use a temp variable
 | 
						|
			re.prog[tmp_pc].group_rep = 0 // clear the repetitions
 | 
						|
 | 
						|
			// println(".ist_quant_ng group_pc_end: $tmp_pc rep: $rep")
 | 
						|
 | 
						|
			if rep >= re.prog[tmp_pc].rep_min {
 | 
						|
				// println("ist_quant_ng GROUP CLOSED OK group_index: $state.group_index")
 | 
						|
 | 
						|
				state.i = re.group_stack[state.group_index]
 | 
						|
				state.pc = tmp_pc
 | 
						|
				state.group_index--
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			} else if re.prog[tmp_pc].next_is_or {
 | 
						|
				// println("ist_quant_ng OR Negative branch")
 | 
						|
 | 
						|
				state.i = re.group_stack[state.group_index]
 | 
						|
				state.pc = re.prog[tmp_pc + 1].rep_min - 1
 | 
						|
				state.group_index--
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			} else if rep > 0 && rep < re.prog[tmp_pc].rep_min {
 | 
						|
				// println("ist_quant_ng UNDER THE MINIMUM g.i: $state.group_index")
 | 
						|
 | 
						|
				// check if we are inside a group, if yes exit from the nested groups
 | 
						|
				if state.group_index > 0 {
 | 
						|
					state.group_index--
 | 
						|
					state.pc = tmp_pc
 | 
						|
					m_state = .ist_quant_ng //.ist_next
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				if state.group_index == 0 {
 | 
						|
					state.group_index--
 | 
						|
					state.pc = tmp_pc // TEST
 | 
						|
					m_state = .ist_next
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				result = regex.no_match_found
 | 
						|
				m_state = .stop
 | 
						|
				continue
 | 
						|
			} else if rep == 0 && rep < re.prog[tmp_pc].rep_min {
 | 
						|
				// println("ist_quant_ng c_zero UNDER THE MINIMUM g.i: $state.group_index")
 | 
						|
 | 
						|
				if state.group_index > 0 {
 | 
						|
					state.group_index--
 | 
						|
					state.pc = tmp_pc
 | 
						|
					m_state = .ist_quant_ng //.ist_next
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				result = regex.no_match_found
 | 
						|
				m_state = .stop
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			// println("DO NOT STAY HERE!! {${re.prog[tmp_pc].rep_min},${re.prog[tmp_pc].rep_max}}:$rep")
 | 
						|
			// UNREACHABLE
 | 
						|
			return regex.err_internal_error, state.i
 | 
						|
		}
 | 
						|
		// ist_quant_pg => quantifier positive test on group
 | 
						|
		else if m_state == .ist_quant_pg {
 | 
						|
			// println(".ist_quant_pg")
 | 
						|
			mut tmp_pc := state.pc
 | 
						|
			if state.group_index >= 0 {
 | 
						|
				tmp_pc = re.group_data[state.group_index]
 | 
						|
			}
 | 
						|
 | 
						|
			if re.prog[tmp_pc].group_neg == true {
 | 
						|
				// println("***** Negation of the group")
 | 
						|
				result = regex.no_match_found
 | 
						|
				m_state = .stop
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			rep := re.prog[tmp_pc].group_rep
 | 
						|
 | 
						|
			if rep < re.prog[tmp_pc].rep_min {
 | 
						|
				// println("ist_quant_pg UNDER RANGE")
 | 
						|
				state.pc = re.prog[tmp_pc].goto_pc
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			} else if rep == re.prog[tmp_pc].rep_max {
 | 
						|
				// println("ist_quant_pg MAX RANGE")
 | 
						|
				re.prog[tmp_pc].group_rep = 0 // clear the repetitions
 | 
						|
				state.group_index--
 | 
						|
				m_state = .ist_next
 | 
						|
 | 
						|
				continue
 | 
						|
			} else if rep >= re.prog[tmp_pc].rep_min {
 | 
						|
				// println("ist_quant_pg IN RANGE group_index:$state.group_index")
 | 
						|
 | 
						|
				// check greedy flag, if true exit on minimum
 | 
						|
				if re.prog[tmp_pc].greedy == true {
 | 
						|
					re.prog[tmp_pc].group_rep = 0 // clear the repetitions
 | 
						|
					state.group_index--
 | 
						|
					m_state = .ist_next
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				state.pc = re.prog[tmp_pc].goto_pc - 1
 | 
						|
				state.group_index--
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			// UNREACHABLE
 | 
						|
			// println("PANIC3!! state: $m_state")
 | 
						|
			return regex.err_internal_error, state.i
 | 
						|
		}
 | 
						|
		// ist_quant_n => quantifier negative test on token
 | 
						|
		else if m_state == .ist_quant_n {
 | 
						|
			rep := re.prog[state.pc].rep
 | 
						|
			// println("Here!! PC $state.pc is_next_or: ${re.prog[state.pc].next_is_or}")
 | 
						|
 | 
						|
			// zero quantifier * or ?
 | 
						|
			if rep == 0 && re.prog[state.pc].rep_min == 0 {
 | 
						|
				// println("ist_quant_n c_zero RANGE MIN")
 | 
						|
				m_state = .ist_next // go to next ist
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// match + or *
 | 
						|
			else if rep >= re.prog[state.pc].rep_min {
 | 
						|
				// println("ist_quant_n MATCH RANGE")
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			// check the OR if present
 | 
						|
			if re.prog[state.pc].next_is_or {
 | 
						|
				// println("OR present on failing")
 | 
						|
				state.match_index = -1
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			// we are in a group manage no match from here
 | 
						|
			if state.group_index >= 0 {
 | 
						|
				// println("ist_quant_n FAILED insied a GROUP group_index:$state.group_index")
 | 
						|
				m_state = .ist_quant_ng
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			// no other options
 | 
						|
			// println("ist_quant_n no_match_found")
 | 
						|
			result = regex.no_match_found
 | 
						|
			m_state = .stop
 | 
						|
			continue
 | 
						|
			// return no_match_found, 0
 | 
						|
		}
 | 
						|
		// ist_quant_p => quantifier positive test on token
 | 
						|
		else if m_state == .ist_quant_p {
 | 
						|
			// println("Here .ist_quant_p")
 | 
						|
			// exit on first match
 | 
						|
			if (re.flag & regex.f_efm) != 0 {
 | 
						|
				return state.i, state.i + 1
 | 
						|
			}
 | 
						|
 | 
						|
			rep := re.prog[state.pc].rep
 | 
						|
			// println(rep)
 | 
						|
 | 
						|
			// under range
 | 
						|
			if rep > 0 && rep < re.prog[state.pc].rep_min {
 | 
						|
				// println("ist_quant_p UNDER RANGE")
 | 
						|
				m_state = .ist_load // continue the loop
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// range ok, continue loop
 | 
						|
			else if rep >= re.prog[state.pc].rep_min && rep < re.prog[state.pc].rep_max {
 | 
						|
				// println("ist_quant_p IN RANGE")
 | 
						|
 | 
						|
				// check greedy flag, if true exit on minimum
 | 
						|
				if re.prog[state.pc].greedy == true {
 | 
						|
					m_state = .ist_next
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				m_state = .ist_load
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			// max reached
 | 
						|
			else if rep == re.prog[state.pc].rep_max {
 | 
						|
				// println("ist_quant_p MAX RANGE")
 | 
						|
				m_state = .ist_next
 | 
						|
				continue
 | 
						|
			}
 | 
						|
		}
 | 
						|
		// UNREACHABLE
 | 
						|
		// println("PANIC4!! state: $m_state")
 | 
						|
		return regex.err_internal_error, state.i
 | 
						|
	}
 | 
						|
 | 
						|
	// println("Check end of text!")
 | 
						|
	// Check the results
 | 
						|
	if state.match_index >= 0 {
 | 
						|
		if state.group_index < 0 {
 | 
						|
			if re.prog[state.pc].ist == regex.ist_prog_end {
 | 
						|
				// println("program ended!!")
 | 
						|
 | 
						|
				if (re.flag & regex.f_src) != 0 {
 | 
						|
					// println("find return")
 | 
						|
					return state.first_match, state.i
 | 
						|
				} else {
 | 
						|
					// println("Here!!")
 | 
						|
					return 0, state.i
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			// println("No Group here, natural end [$state.first_match,$state.i] state: ${state_str(m_state)} ist: $ist pgr_end: $re.prog.len")
 | 
						|
 | 
						|
			if re.prog[state.pc + 1].ist == regex.ist_prog_end
 | 
						|
				|| re.prog[state.pc].ist == regex.ist_prog_end {
 | 
						|
				rep := re.prog[state.pc].rep
 | 
						|
				// println("rep: $rep re.prog[state.pc].rep_min: ${re.prog[state.pc].rep_min} re.prog[state.pc].rep_max: ${re.prog[state.pc].rep_max}")
 | 
						|
				if rep >= re.prog[state.pc].rep_min && rep <= re.prog[state.pc].rep_max {
 | 
						|
					return state.first_match, state.i
 | 
						|
				}
 | 
						|
				// println("Program not finished! ")
 | 
						|
				return regex.no_match_found, state.i
 | 
						|
			}
 | 
						|
			if src_end {
 | 
						|
				// println("program end")
 | 
						|
				return state.first_match, state.i
 | 
						|
			}
 | 
						|
			// print("No match found!!")
 | 
						|
			return regex.no_match_found, state.i
 | 
						|
		} else {
 | 
						|
			// println("Group match! OK")
 | 
						|
			// println("first_match: $state.first_match, i: $state.i")
 | 
						|
 | 
						|
			// println("Skip last group")
 | 
						|
			return state.first_match, state.i
 | 
						|
			// return state.first_match,re.group_stack[state.group_index--]
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// println("no_match_found, natural end")
 | 
						|
	return regex.no_match_found, state.i
 | 
						|
}
 |