regex: fix compilation issues with gcc under ubuntu (#7112)
parent
793f9ae9e3
commit
15ffce1317
|
@ -27,7 +27,7 @@ pub const(
|
||||||
// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
|
// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
|
||||||
spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
|
spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
|
||||||
// new line chars for now only '\n'
|
// new line chars for now only '\n'
|
||||||
new_line_list = [`\n`,`\r`]
|
new_line_list = [`\n`, `\r`]
|
||||||
|
|
||||||
// Results
|
// Results
|
||||||
no_match_found = -1
|
no_match_found = -1
|
||||||
|
@ -49,7 +49,7 @@ const(
|
||||||
//*************************************
|
//*************************************
|
||||||
// regex program instructions
|
// regex program instructions
|
||||||
//*************************************
|
//*************************************
|
||||||
ist_simple_char = u32(0x7FFFFFFF) // single char instruction, 31 bit available to char
|
ist_simple_char = u32(0x7FFFFFFF) // single char instruction, 31 bit available to char
|
||||||
|
|
||||||
// char class 11 0100 AA xxxxxxxx
|
// char class 11 0100 AA xxxxxxxx
|
||||||
// AA = 00 regular class
|
// AA = 00 regular class
|
||||||
|
@ -92,9 +92,7 @@ fn utf8util_char_len(b byte) int {
|
||||||
fn (re RE) get_char(in_txt string, i int) (u32,int) {
|
fn (re RE) get_char(in_txt string, i int) (u32,int) {
|
||||||
ini := unsafe {in_txt.str[i]}
|
ini := unsafe {in_txt.str[i]}
|
||||||
// ascii 8 bit
|
// ascii 8 bit
|
||||||
if (re.flag & f_bin) !=0 ||
|
if (re.flag & f_bin) !=0 || ini & 0x80 == 0 {
|
||||||
ini & 0x80 == 0
|
|
||||||
{
|
|
||||||
return u32(ini), 1
|
return u32(ini), 1
|
||||||
}
|
}
|
||||||
// unicode char
|
// unicode char
|
||||||
|
@ -102,7 +100,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {
|
||||||
mut tmp := 0
|
mut tmp := 0
|
||||||
mut ch := u32(0)
|
mut ch := u32(0)
|
||||||
for tmp < char_len {
|
for tmp < char_len {
|
||||||
ch = (ch << 8) | unsafe {in_txt.str[i+tmp]}
|
ch = (ch << 8) | unsafe {in_txt.str[i + tmp]}
|
||||||
tmp++
|
tmp++
|
||||||
}
|
}
|
||||||
return ch,char_len
|
return ch,char_len
|
||||||
|
@ -112,9 +110,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {
|
||||||
[inline]
|
[inline]
|
||||||
fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
|
fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
|
||||||
// ascii 8 bit
|
// ascii 8 bit
|
||||||
if (re.flag & f_bin) !=0 ||
|
if (re.flag & f_bin) !=0 || unsafe {in_txt[i]} & 0x80 == 0 {
|
||||||
unsafe {in_txt[i]} & 0x80 == 0
|
|
||||||
{
|
|
||||||
return u32(unsafe {in_txt[i]}), 1
|
return u32(unsafe {in_txt[i]}), 1
|
||||||
}
|
}
|
||||||
// unicode char
|
// unicode char
|
||||||
|
@ -122,7 +118,7 @@ fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
|
||||||
mut tmp := 0
|
mut tmp := 0
|
||||||
mut ch := u32(0)
|
mut ch := u32(0)
|
||||||
for tmp < char_len {
|
for tmp < char_len {
|
||||||
ch = (ch << 8) | unsafe {in_txt[i+tmp]}
|
ch = (ch << 8) | unsafe {in_txt[i + tmp]}
|
||||||
tmp++
|
tmp++
|
||||||
}
|
}
|
||||||
return ch,char_len
|
return ch,char_len
|
||||||
|
@ -131,11 +127,11 @@ fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
|
||||||
[inline]
|
[inline]
|
||||||
fn is_alnum(in_char byte) bool {
|
fn is_alnum(in_char byte) bool {
|
||||||
mut tmp := in_char - `A`
|
mut tmp := in_char - `A`
|
||||||
if tmp >= 0x00 && tmp <= 25 { return true }
|
if tmp <= 25 { return true }
|
||||||
tmp = in_char - `a`
|
tmp = in_char - `a`
|
||||||
if tmp >= 0x00 && tmp <= 25 { return true }
|
if tmp <= 25 { return true }
|
||||||
tmp = in_char - `0`
|
tmp = in_char - `0`
|
||||||
if tmp >= 0x00 && tmp <= 9 { return true }
|
if tmp <= 9 { return true }
|
||||||
if tmp == `_` { return true }
|
if tmp == `_` { return true }
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -158,7 +154,7 @@ fn is_not_space(in_char byte) bool {
|
||||||
[inline]
|
[inline]
|
||||||
fn is_digit(in_char byte) bool {
|
fn is_digit(in_char byte) bool {
|
||||||
tmp := in_char - `0`
|
tmp := in_char - `0`
|
||||||
return tmp <= 0x09 && tmp >= 0
|
return tmp <= 0x09
|
||||||
}
|
}
|
||||||
|
|
||||||
[inline]
|
[inline]
|
||||||
|
@ -179,13 +175,13 @@ fn is_not_wordchar(in_char byte) bool {
|
||||||
[inline]
|
[inline]
|
||||||
fn is_lower(in_char byte) bool {
|
fn is_lower(in_char byte) bool {
|
||||||
tmp := in_char - `a`
|
tmp := in_char - `a`
|
||||||
return tmp >= 0x00 && tmp <= 25
|
return tmp <= 25
|
||||||
}
|
}
|
||||||
|
|
||||||
[inline]
|
[inline]
|
||||||
fn is_upper(in_char byte) bool {
|
fn is_upper(in_char byte) bool {
|
||||||
tmp := in_char - `A`
|
tmp := in_char - `A`
|
||||||
return tmp >= 0x00 && tmp <= 25
|
return tmp <= 25
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn (re RE) get_parse_error_string(err int) string {
|
pub fn (re RE) get_parse_error_string(err int) string {
|
||||||
|
@ -211,7 +207,7 @@ fn utf8_str(ch rune) string {
|
||||||
mut i := 4
|
mut i := 4
|
||||||
mut res := ""
|
mut res := ""
|
||||||
for i > 0 {
|
for i > 0 {
|
||||||
v := byte((ch >> ((i-1)*8)) & 0xFF)
|
v := byte((ch >> ((i - 1) * 8)) & 0xFF)
|
||||||
if v != 0{
|
if v != 0{
|
||||||
res += "${v:1c}"
|
res += "${v:1c}"
|
||||||
}
|
}
|
||||||
|
@ -236,30 +232,30 @@ mut:
|
||||||
ist rune
|
ist rune
|
||||||
|
|
||||||
// char
|
// char
|
||||||
ch rune // char of the token if any
|
ch rune // char of the token if any
|
||||||
ch_len byte // char len
|
ch_len byte // char len
|
||||||
|
|
||||||
// Quantifiers / branch
|
// Quantifiers / branch
|
||||||
rep_min int // used also for jump next in the OR branch [no match] pc jump
|
rep_min int // used also for jump next in the OR branch [no match] pc jump
|
||||||
rep_max int // used also for jump next in the OR branch [ match] pc jump
|
rep_max int // used also for jump next in the OR branch [ match] pc jump
|
||||||
greedy bool // greedy quantifier flag
|
greedy bool // greedy quantifier flag
|
||||||
|
|
||||||
// Char class
|
// Char class
|
||||||
cc_index int = -1
|
cc_index int = -1
|
||||||
|
|
||||||
// counters for quantifier check (repetitions)
|
// counters for quantifier check (repetitions)
|
||||||
rep int
|
rep int
|
||||||
|
|
||||||
// validator function pointer
|
// validator function pointer
|
||||||
validator FnValidator
|
validator FnValidator
|
||||||
|
|
||||||
// groups variables
|
// groups variables
|
||||||
group_rep int // repetition of the group
|
group_rep int // repetition of the group
|
||||||
group_id int = -1 // id of the group
|
group_id int = -1 // id of the group
|
||||||
goto_pc int = -1 // jump to this PC if is needed
|
goto_pc int = -1 // jump to this PC if is needed
|
||||||
|
|
||||||
// OR flag for the token
|
// OR flag for the token
|
||||||
next_is_or bool // true if the next token is an OR
|
next_is_or bool // true if the next token is an OR
|
||||||
}
|
}
|
||||||
|
|
||||||
[inline]
|
[inline]
|
||||||
|
@ -310,34 +306,34 @@ pub mut:
|
||||||
|
|
||||||
|
|
||||||
// groups
|
// groups
|
||||||
group_count int // number of groups in this regex struct
|
group_count int // number of groups in this regex struct
|
||||||
groups []int // groups index results
|
groups []int // groups index results
|
||||||
group_max_nested int = 3 // max nested group
|
group_max_nested int = 3 // max nested group
|
||||||
group_max int = 8 // max allowed number of different groups
|
group_max int = 8 // max allowed number of different groups
|
||||||
|
|
||||||
group_csave []int = []int{} // groups continuous save array
|
group_csave []int = []int{} // groups continuous save array
|
||||||
group_csave_index int= -1 // groups continuous save index
|
group_csave_index int = -1 // groups continuous save index
|
||||||
|
|
||||||
group_map map[string]int // groups names map
|
group_map map[string]int // groups names map
|
||||||
|
|
||||||
// flags
|
// flags
|
||||||
flag int // flag for optional parameters
|
flag int // flag for optional parameters
|
||||||
|
|
||||||
// Debug/log
|
// Debug/log
|
||||||
debug int // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
|
debug int // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
|
||||||
log_func FnLog = simple_log // log function, can be customized by the user
|
log_func FnLog = simple_log // log function, can be customized by the user
|
||||||
query string // query string
|
query string // query string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset RE object
|
// Reset RE object
|
||||||
//[inline]
|
//[inline]
|
||||||
fn (mut re RE) reset(){
|
fn (mut re RE) reset(){
|
||||||
re.cc_index = 0
|
re.cc_index = 0
|
||||||
|
|
||||||
mut i := 0
|
mut i := 0
|
||||||
for i < re.prog.len {
|
for i < re.prog.len {
|
||||||
re.prog[i].group_rep = 0 // clear repetition of the group
|
re.prog[i].group_rep = 0 // clear repetition of the group
|
||||||
re.prog[i].rep = 0 // clear repetition of the token
|
re.prog[i].rep = 0 // clear repetition of the token
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
re.groups = [-1].repeat(re.group_count*2)
|
re.groups = [-1].repeat(re.group_count*2)
|
||||||
|
@ -347,7 +343,7 @@ fn (mut re RE) reset(){
|
||||||
// reset group_csave
|
// reset group_csave
|
||||||
if re.group_csave.len > 0 {
|
if re.group_csave.len > 0 {
|
||||||
re.group_csave_index = 1
|
re.group_csave_index = 1
|
||||||
re.group_csave[0] = 0 // reset the capture count
|
re.group_csave[0] = 0 // reset the capture count
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -356,8 +352,8 @@ fn (mut re RE) reset(){
|
||||||
fn (mut re RE) reset_src(){
|
fn (mut re RE) reset_src(){
|
||||||
mut i := 0
|
mut i := 0
|
||||||
for i < re.prog.len {
|
for i < re.prog.len {
|
||||||
re.prog[i].group_rep = 0 // clear repetition of the group
|
re.prog[i].group_rep = 0 // clear repetition of the group
|
||||||
re.prog[i].rep = 0 // clear repetition of the token
|
re.prog[i].rep = 0 // clear repetition of the token
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
re.state_stack_index = -1
|
re.state_stack_index = -1
|
||||||
|
@ -367,8 +363,8 @@ fn (mut re RE) reset_src(){
|
||||||
pub fn (re RE) get_group(group_name string) (int, int) {
|
pub fn (re RE) get_group(group_name string) (int, int) {
|
||||||
if group_name in re.group_map {
|
if group_name in re.group_map {
|
||||||
tmp_index := re.group_map[group_name]-1
|
tmp_index := re.group_map[group_name]-1
|
||||||
start := re.groups[tmp_index*2]
|
start := re.groups[tmp_index * 2]
|
||||||
end := re.groups[tmp_index*2+1]
|
end := re.groups[tmp_index * 2 + 1]
|
||||||
return start,end
|
return start,end
|
||||||
}
|
}
|
||||||
return -1, -1
|
return -1, -1
|
||||||
|
@ -397,7 +393,7 @@ const(
|
||||||
]
|
]
|
||||||
|
|
||||||
// these chars are escape if preceded by a \
|
// these chars are escape if preceded by a \
|
||||||
bsls_escape_list = [ `\\`,`|`,`.`,`*`,`+`,`-`,`{`,`}`,`[`,`]` ]
|
bsls_escape_list = [`\\`, `|`, `.`, `*`, `+`, `-`, `{`, `}`, `[`, `]`]
|
||||||
)
|
)
|
||||||
|
|
||||||
enum BSLS_parse_state {
|
enum BSLS_parse_state {
|
||||||
|
@ -414,7 +410,7 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
|
||||||
|
|
||||||
for i < in_txt.len {
|
for i < in_txt.len {
|
||||||
// get our char
|
// get our char
|
||||||
char_tmp,char_len := re.get_char(in_txt,i)
|
char_tmp, char_len := re.get_char(in_txt, i)
|
||||||
ch := byte(char_tmp)
|
ch := byte(char_tmp)
|
||||||
|
|
||||||
if status == .start && ch == `\\` {
|
if status == .start && ch == `\\` {
|
||||||
|
@ -427,7 +423,7 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
|
||||||
if status == .bsls_found {
|
if status == .bsls_found {
|
||||||
for c,x in bsls_validator_array {
|
for c,x in bsls_validator_array {
|
||||||
if x.ch == ch {
|
if x.ch == ch {
|
||||||
return c,i-in_i+1
|
return c, i-in_i+1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
status = .normal_char
|
status = .normal_char
|
||||||
|
@ -437,9 +433,9 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
|
||||||
// no BSLS validator, manage as normal escape char char
|
// no BSLS validator, manage as normal escape char char
|
||||||
if status == .normal_char {
|
if status == .normal_char {
|
||||||
if ch in bsls_escape_list {
|
if ch in bsls_escape_list {
|
||||||
return no_match_found,i-in_i+1
|
return no_match_found, i-in_i+1
|
||||||
}
|
}
|
||||||
return err_syntax_error,i-in_i+1
|
return err_syntax_error, i-in_i+1
|
||||||
}
|
}
|
||||||
|
|
||||||
// at the present time we manage only one char after the \
|
// at the present time we manage only one char after the \
|
||||||
|
@ -465,10 +461,10 @@ const(
|
||||||
|
|
||||||
struct CharClass {
|
struct CharClass {
|
||||||
mut:
|
mut:
|
||||||
cc_type int = cc_null // type of cc token
|
cc_type int = cc_null // type of cc token
|
||||||
ch0 rune // first char of the interval a-b a in this case
|
ch0 rune // first char of the interval a-b a in this case
|
||||||
ch1 rune // second char of the interval a-b b in this case
|
ch1 rune // second char of the interval a-b b in this case
|
||||||
validator FnValidator // validator function pointer
|
validator FnValidator // validator function pointer
|
||||||
}
|
}
|
||||||
|
|
||||||
enum CharClass_parse_state {
|
enum CharClass_parse_state {
|
||||||
|
@ -562,7 +558,7 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
|
||||||
mut i := in_i
|
mut i := in_i
|
||||||
|
|
||||||
mut tmp_index := re.cc_index
|
mut tmp_index := re.cc_index
|
||||||
res_index := re.cc_index
|
res_index := re.cc_index
|
||||||
|
|
||||||
mut cc_type := u32(ist_char_class_pos)
|
mut cc_type := u32(ist_char_class_pos)
|
||||||
|
|
||||||
|
@ -570,7 +566,7 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
|
||||||
|
|
||||||
// check if we are out of memory for char classes
|
// check if we are out of memory for char classes
|
||||||
if tmp_index >= re.cc.len {
|
if tmp_index >= re.cc.len {
|
||||||
return err_cc_alloc_overflow,0,u32(0)
|
return err_cc_alloc_overflow, 0, u32(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// get our char
|
// get our char
|
||||||
|
@ -710,7 +706,7 @@ fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int, bool) {
|
||||||
|
|
||||||
// exit on no compatible char with {} quantifier
|
// exit on no compatible char with {} quantifier
|
||||||
if utf8util_char_len(ch) != 1 {
|
if utf8util_char_len(ch) != 1 {
|
||||||
return err_syntax_error,i,0,false
|
return err_syntax_error, i, 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// min parsing skip if comma present
|
// min parsing skip if comma present
|
||||||
|
@ -913,14 +909,13 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
|
||||||
//
|
//
|
||||||
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
|
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
|
||||||
[deprecated]
|
[deprecated]
|
||||||
pub fn (mut re RE) compile(in_txt string) (int,int) {
|
pub fn (mut re RE) compile(in_txt string) (int, int) {
|
||||||
return re.impl_compile(in_txt)
|
return re.impl_compile(in_txt)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
mut i := 0 // input string index
|
mut i := 0 // input string index
|
||||||
mut pc := 0 // program counter
|
mut pc := 0 // program counter
|
||||||
mut tmp_code := u32(0)
|
|
||||||
|
|
||||||
// group management variables
|
// group management variables
|
||||||
mut group_count := -1
|
mut group_count := -1
|
||||||
|
@ -932,7 +927,6 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for i < in_txt.len {
|
for i < in_txt.len {
|
||||||
tmp_code = u32(0)
|
|
||||||
mut char_tmp := u32(0)
|
mut char_tmp := u32(0)
|
||||||
mut char_len := 0
|
mut char_len := 0
|
||||||
//println("i: ${i:3d} ch: ${in_txt.str[i]:c}")
|
//println("i: ${i:3d} ch: ${in_txt.str[i]:c}")
|
||||||
|
@ -958,20 +952,20 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
|
|
||||||
//check max groups allowed
|
//check max groups allowed
|
||||||
if group_count > re.group_max {
|
if group_count > re.group_max {
|
||||||
return err_groups_overflow,i+1
|
return err_groups_overflow, i+1
|
||||||
}
|
}
|
||||||
group_stack_index++
|
group_stack_index++
|
||||||
|
|
||||||
// check max nested groups allowed
|
// check max nested groups allowed
|
||||||
if group_stack_index > re.group_max_nested {
|
if group_stack_index > re.group_max_nested {
|
||||||
return err_groups_max_nested,i+1
|
return err_groups_max_nested, i+1
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp_res, cgroup_flag, cgroup_name, next_i := re.parse_groups(in_txt,i)
|
tmp_res, cgroup_flag, cgroup_name, next_i := re.parse_groups(in_txt,i)
|
||||||
|
|
||||||
// manage question mark format error
|
// manage question mark format error
|
||||||
if tmp_res < -1 {
|
if tmp_res < -1 {
|
||||||
return err_group_qm_notation,next_i
|
return err_group_qm_notation, next_i
|
||||||
}
|
}
|
||||||
|
|
||||||
//println("Parse group: [$tmp_res, $cgroup_flag, ($i,$next_i), '${in_txt[i..next_i]}' ]")
|
//println("Parse group: [$tmp_res, $cgroup_flag, ($i,$next_i), '${in_txt[i..next_i]}' ]")
|
||||||
|
@ -988,10 +982,10 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
if cgroup_name.len > 0 {
|
if cgroup_name.len > 0 {
|
||||||
//println("GROUP NAME: ${cgroup_name}")
|
//println("GROUP NAME: ${cgroup_name}")
|
||||||
if cgroup_name in re.group_map{
|
if cgroup_name in re.group_map{
|
||||||
group_id = re.group_map[cgroup_name]-1
|
group_id = re.group_map[cgroup_name] - 1
|
||||||
group_count--
|
group_count--
|
||||||
} else {
|
} else {
|
||||||
re.group_map[cgroup_name] = group_id+1
|
re.group_map[cgroup_name] = group_id + 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1018,7 +1012,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
// ist_group_end
|
// ist_group_end
|
||||||
if char_len==1 && pc > 0 && byte(char_tmp) == `)` {
|
if char_len==1 && pc > 0 && byte(char_tmp) == `)` {
|
||||||
if group_stack_index < 0 {
|
if group_stack_index < 0 {
|
||||||
return err_group_not_balanced,i+1
|
return err_group_not_balanced, i+1
|
||||||
}
|
}
|
||||||
|
|
||||||
goto_pc := group_stack[group_stack_index]
|
goto_pc := group_stack[group_stack_index]
|
||||||
|
@ -1161,7 +1155,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
}
|
}
|
||||||
// if not an escape or a bsls char then it is an error (at least for now!)
|
// if not an escape or a bsls char then it is an error (at least for now!)
|
||||||
else {
|
else {
|
||||||
return bsls_index,i+tmp
|
return bsls_index, i+tmp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1192,7 +1186,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// store the number of groups in the query
|
// store the number of groups in the query
|
||||||
re.group_count = group_count+1
|
re.group_count = group_count + 1
|
||||||
|
|
||||||
//******************************************
|
//******************************************
|
||||||
// Post processing
|
// Post processing
|
||||||
|
@ -1482,33 +1476,35 @@ fn state_str(s Match_state) string {
|
||||||
|
|
||||||
struct StateObj {
|
struct StateObj {
|
||||||
pub mut:
|
pub mut:
|
||||||
match_flag bool
|
match_flag bool
|
||||||
match_index int = -1
|
match_index int = -1
|
||||||
match_first int = -1
|
match_first int = -1
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// result status
|
// result status
|
||||||
mut result := no_match_found // function return
|
mut result := no_match_found // function return
|
||||||
mut first_match := -1 //index of the first match
|
mut first_match := -1 //index of the first match
|
||||||
|
|
||||||
mut i := 0 // source string index
|
mut i := 0 // source string index
|
||||||
mut ch := rune(0) // examinated char
|
mut ch := rune(0) // examinated char
|
||||||
mut char_len := 0 // utf8 examinated char len
|
mut char_len := 0 // utf8 examinated char len
|
||||||
mut m_state := Match_state.start // start point for the matcher FSM
|
mut m_state := Match_state.start // start point for the matcher FSM
|
||||||
|
|
||||||
mut pc := -1 // program counter
|
mut pc := -1 // program counter
|
||||||
mut state := StateObj{} // actual state
|
mut state := StateObj{} // actual state
|
||||||
mut ist := rune(0) // actual instruction
|
mut ist := rune(0) // actual instruction
|
||||||
mut l_ist :=rune(0) // last matched instruction
|
mut l_ist :=rune(0) // last matched instruction
|
||||||
|
|
||||||
mut group_stack := [-1].repeat(re.group_max)
|
//mut group_stack := [-1].repeat(re.group_max)
|
||||||
mut group_data := [-1].repeat(re.group_max)
|
//mut group_data := [-1].repeat(re.group_max)
|
||||||
|
mut group_stack := []int{len: re.group_max, init: -1}
|
||||||
|
mut group_data := []int{len: re.group_max, init: -1}
|
||||||
|
|
||||||
mut group_index := -1 // group id used to know how many groups are open
|
mut group_index := -1 // group id used to know how many groups are open
|
||||||
|
|
||||||
mut step_count := 0 // stats for debug
|
mut step_count := 0 // stats for debug
|
||||||
mut dbg_line := 0 // count debug line printed
|
mut dbg_line := 0 // count debug line printed
|
||||||
|
|
||||||
re.reset()
|
re.reset()
|
||||||
|
|
||||||
|
@ -1535,7 +1531,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// DEBUG LOG
|
// DEBUG LOG
|
||||||
//******************************************
|
//******************************************
|
||||||
if re.debug>0 {
|
if re.debug>0 {
|
||||||
mut buf2 := strings.new_builder(re.cc.len+128)
|
mut buf2 := strings.new_builder(re.cc.len + 128)
|
||||||
|
|
||||||
// print all the instructions
|
// print all the instructions
|
||||||
|
|
||||||
|
@ -1658,7 +1654,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// starting and init
|
// starting and init
|
||||||
if m_state == .start {
|
if m_state == .start {
|
||||||
pc = -1
|
pc = -1
|
||||||
i = 0
|
i = 0
|
||||||
m_state = .ist_next
|
m_state = .ist_next
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -1962,7 +1958,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
/***********************************
|
/***********************************
|
||||||
* Quantifier management
|
* Quantifier management
|
||||||
***********************************/
|
***********************************/
|
||||||
// ist_quant_ng
|
// ist_quant_ng => quantifier negative test on group
|
||||||
if m_state == .ist_quant_ng {
|
if m_state == .ist_quant_ng {
|
||||||
|
|
||||||
// we are finished here
|
// we are finished here
|
||||||
|
@ -2039,7 +2035,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
return err_internal_error, i
|
return err_internal_error, i
|
||||||
|
|
||||||
}
|
}
|
||||||
// ist_quant_pg
|
// ist_quant_pg => quantifier positive test on group
|
||||||
else if m_state == .ist_quant_pg {
|
else if m_state == .ist_quant_pg {
|
||||||
//println(".ist_quant_pg")
|
//println(".ist_quant_pg")
|
||||||
mut tmp_pc := pc
|
mut tmp_pc := pc
|
||||||
|
@ -2084,7 +2080,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
return err_internal_error, i
|
return err_internal_error, i
|
||||||
}
|
}
|
||||||
|
|
||||||
// ist_quant_n
|
// ist_quant_n => quantifier negative test on token
|
||||||
else if m_state == .ist_quant_n {
|
else if m_state == .ist_quant_n {
|
||||||
rep := re.prog[pc].rep
|
rep := re.prog[pc].rep
|
||||||
//println("Here!! PC $pc is_next_or: ${re.prog[pc].next_is_or}")
|
//println("Here!! PC $pc is_next_or: ${re.prog[pc].next_is_or}")
|
||||||
|
@ -2125,7 +2121,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//return no_match_found, 0
|
//return no_match_found, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// ist_quant_p
|
// ist_quant_p => quantifier positive test on token
|
||||||
else if m_state == .ist_quant_p {
|
else if m_state == .ist_quant_p {
|
||||||
// exit on first match
|
// exit on first match
|
||||||
if (re.flag & f_efm) != 0 {
|
if (re.flag & f_efm) != 0 {
|
||||||
|
@ -2255,7 +2251,7 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
start, end := re.match_base(in_txt.str, in_txt.len)
|
start, end := re.match_base(in_txt.str, in_txt.len)
|
||||||
re.flag = old_flag
|
re.flag = old_flag
|
||||||
if start >= 0 && end > start {
|
if start >= 0 && end > start {
|
||||||
return start,end
|
return start, end
|
||||||
}
|
}
|
||||||
return no_match_found, 0
|
return no_match_found, 0
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue