regex: speed optimization (#7471)
parent
97855eca93
commit
b16052db95
|
@ -88,6 +88,7 @@ fn utf8util_char_len(b byte) int {
|
||||||
|
|
||||||
// get_char get a char from position i and return an u32 with the unicode code
|
// get_char get a char from position i and return an u32 with the unicode code
|
||||||
[inline]
|
[inline]
|
||||||
|
[direct_array_access]
|
||||||
fn (re RE) get_char(in_txt string, i int) (u32,int) {
|
fn (re RE) get_char(in_txt string, i int) (u32,int) {
|
||||||
ini := unsafe {in_txt.str[i]}
|
ini := unsafe {in_txt.str[i]}
|
||||||
// ascii 8 bit
|
// ascii 8 bit
|
||||||
|
@ -107,6 +108,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {
|
||||||
|
|
||||||
// get_charb get a char from position i and return an u32 with the unicode code
|
// get_charb get a char from position i and return an u32 with the unicode code
|
||||||
[inline]
|
[inline]
|
||||||
|
[direct_array_access]
|
||||||
fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
|
fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
|
||||||
// ascii 8 bit
|
// ascii 8 bit
|
||||||
if (re.flag & f_bin) !=0 || unsafe {in_txt[i]} & 0x80 == 0 {
|
if (re.flag & f_bin) !=0 || unsafe {in_txt[i]} & 0x80 == 0 {
|
||||||
|
@ -297,6 +299,7 @@ pub
|
||||||
struct RE {
|
struct RE {
|
||||||
pub mut:
|
pub mut:
|
||||||
prog []Token
|
prog []Token
|
||||||
|
prog_len int // regex program len
|
||||||
|
|
||||||
// char classes storage
|
// char classes storage
|
||||||
cc []CharClass // char class list
|
cc []CharClass // char class list
|
||||||
|
@ -323,12 +326,13 @@ pub mut:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset RE object
|
// Reset RE object
|
||||||
//[inline]
|
[inline]
|
||||||
|
[direct_array_access]
|
||||||
fn (mut re RE) reset(){
|
fn (mut re RE) reset(){
|
||||||
re.cc_index = 0
|
re.cc_index = 0
|
||||||
|
|
||||||
mut i := 0
|
mut i := 0
|
||||||
for i < re.prog.len {
|
for i < re.prog_len {
|
||||||
re.prog[i].group_rep = 0 // clear repetition of the group
|
re.prog[i].group_rep = 0 // clear repetition of the group
|
||||||
re.prog[i].rep = 0 // clear repetition of the token
|
re.prog[i].rep = 0 // clear repetition of the token
|
||||||
i++
|
i++
|
||||||
|
@ -340,14 +344,18 @@ fn (mut re RE) reset(){
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset group_csave
|
// reset group_csave
|
||||||
re.group_csave = []int{}
|
if re.group_csave_flag == true {
|
||||||
|
re.group_csave.clear() // = []int{}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset for search mode fail
|
// reset for search mode fail
|
||||||
// gcc bug, dont use [inline] or go 5 time slower
|
// gcc bug, dont use [inline] or go 5 time slower
|
||||||
|
//[inline]
|
||||||
|
[direct_array_access]
|
||||||
fn (mut re RE) reset_src(){
|
fn (mut re RE) reset_src(){
|
||||||
mut i := 0
|
mut i := 0
|
||||||
for i < re.prog.len {
|
for i < re.prog_len {
|
||||||
re.prog[i].group_rep = 0 // clear repetition of the group
|
re.prog[i].group_rep = 0 // clear repetition of the group
|
||||||
re.prog[i].rep = 0 // clear repetition of the token
|
re.prog[i].rep = 0 // clear repetition of the token
|
||||||
i++
|
i++
|
||||||
|
@ -1155,6 +1163,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
|
|
||||||
// add end of the program
|
// add end of the program
|
||||||
re.prog[pc].ist = ist_prog_end
|
re.prog[pc].ist = ist_prog_end
|
||||||
|
re.prog_len = pc
|
||||||
|
|
||||||
// check for unbalanced groups
|
// check for unbalanced groups
|
||||||
if group_stack_index != -1 {
|
if group_stack_index != -1 {
|
||||||
|
@ -1467,7 +1476,7 @@ pub fn (re RE) get_query() string {
|
||||||
* Groups saving utilities
|
* Groups saving utilities
|
||||||
*
|
*
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
[inline]
|
[direct_array_access]
|
||||||
fn (mut re RE) group_continuous_save(g_index int) {
|
fn (mut re RE) group_continuous_save(g_index int) {
|
||||||
if re.group_csave_flag == true {
|
if re.group_csave_flag == true {
|
||||||
// continuous save, save until we have space
|
// continuous save, save until we have space
|
||||||
|
@ -1550,6 +1559,7 @@ pub mut:
|
||||||
last_dot_pc int = -1 // last dot chat pc
|
last_dot_pc int = -1 // last dot chat pc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[direct_array_access]
|
||||||
pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
// result status
|
// result status
|
||||||
mut result := no_match_found // function return
|
mut result := no_match_found // function return
|
||||||
|
@ -1771,7 +1781,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ist_next, next instruction reseting its state
|
// ist_next, next instruction reseting its state
|
||||||
if m_state == .ist_next {
|
else if m_state == .ist_next {
|
||||||
state.pc = state.pc + 1
|
state.pc = state.pc + 1
|
||||||
re.prog[state.pc].reset()
|
re.prog[state.pc].reset()
|
||||||
// check if we are in the program bounds
|
// check if we are in the program bounds
|
||||||
|
@ -1784,7 +1794,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ist_next_ks, next instruction keeping its state
|
// ist_next_ks, next instruction keeping its state
|
||||||
if m_state == .ist_next_ks {
|
else if m_state == .ist_next_ks {
|
||||||
state.pc = state.pc + 1
|
state.pc = state.pc + 1
|
||||||
// check if we are in the program bounds
|
// check if we are in the program bounds
|
||||||
if state.pc < 0 || state.pc > re.prog.len {
|
if state.pc < 0 || state.pc > re.prog.len {
|
||||||
|
@ -1805,7 +1815,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if stop
|
// check if stop
|
||||||
if m_state == .stop {
|
else if m_state == .stop {
|
||||||
|
|
||||||
// we are in search mode, don't exit until the end
|
// we are in search mode, don't exit until the end
|
||||||
if ((re.flag & f_src) != 0) && (ist != ist_prog_end) {
|
if ((re.flag & f_src) != 0) && (ist != ist_prog_end) {
|
||||||
|
@ -1849,7 +1859,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ist_load
|
// ist_load
|
||||||
if m_state == .ist_load {
|
else if m_state == .ist_load {
|
||||||
|
|
||||||
// program end
|
// program end
|
||||||
if ist == ist_prog_end {
|
if ist == ist_prog_end {
|
||||||
|
@ -2116,7 +2126,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
* Quantifier management
|
* Quantifier management
|
||||||
***********************************/
|
***********************************/
|
||||||
// ist_quant_ng => quantifier negative test on group
|
// ist_quant_ng => quantifier negative test on group
|
||||||
if m_state == .ist_quant_ng {
|
else if m_state == .ist_quant_ng {
|
||||||
|
|
||||||
// we are finished here
|
// we are finished here
|
||||||
if state.group_index < 0 {
|
if state.group_index < 0 {
|
||||||
|
@ -2378,7 +2388,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//
|
//
|
||||||
// Matchers
|
// Matchers
|
||||||
//
|
//
|
||||||
|
[direct_array_access]
|
||||||
pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
||||||
|
|
||||||
start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
|
start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
|
||||||
|
@ -2406,6 +2416,7 @@ pub fn (mut re RE) match_string(in_txt string) (int,int) {
|
||||||
//
|
//
|
||||||
|
|
||||||
// find try to find the first match in the input string
|
// find try to find the first match in the input string
|
||||||
|
[direct_array_access]
|
||||||
pub fn (mut re RE) find(in_txt string) (int,int) {
|
pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
old_flag := re.flag
|
old_flag := re.flag
|
||||||
|
|
||||||
|
@ -2424,6 +2435,7 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// find all the non overlapping occurrences of the match pattern
|
// find all the non overlapping occurrences of the match pattern
|
||||||
|
[direct_array_access]
|
||||||
pub fn (mut re RE) find_all(in_txt string) []int {
|
pub fn (mut re RE) find_all(in_txt string) []int {
|
||||||
mut i := 0
|
mut i := 0
|
||||||
mut res := []int{}
|
mut res := []int{}
|
||||||
|
|
Loading…
Reference in New Issue