regex: speed optimization (#7471)

2020-12-22 17:42:32 +01:00 · 2020-12-22 17:42:32 +01:00 · b16052db95
parent 97855eca93
commit b16052db95
1 changed files with 23 additions and 11 deletions
--- a/vlib/regex/regex.v
+++ b/vlib/regex/regex.v
@ -88,6 +88,7 @@ fn utf8util_char_len(b byte) int {

 // get_char get a char from position i and return an u32 with the unicode code
 [inline]
+[direct_array_access]
 fn (re RE) get_char(in_txt string, i int) (u32,int) {
 	ini := unsafe {in_txt.str[i]}
 	// ascii 8 bit
@ -107,6 +108,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {

 // get_charb get a char from position i and return an u32 with the unicode code
 [inline]
+[direct_array_access]
 fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 	// ascii 8 bit
 	if (re.flag & f_bin) !=0 ||	unsafe {in_txt[i]} & 0x80 == 0 {
@ -297,6 +299,7 @@ pub
 struct RE {
 pub mut:
 	prog []Token
+	prog_len         int       // regex program len

 	// char classes storage
 	cc []CharClass             // char class list
@ -323,12 +326,13 @@ pub mut:
 }

 // Reset RE object
-//[inline]
+[inline]
+[direct_array_access]
 fn (mut re RE) reset(){
 	re.cc_index = 0

 	mut i := 0
-	for i < re.prog.len {
+	for i < re.prog_len {
 		re.prog[i].group_rep = 0 // clear repetition of the group
 		re.prog[i].rep       = 0 // clear repetition of the token
 		i++
@ -340,14 +344,18 @@ fn (mut re RE) reset(){
 	}

 	// reset group_csave
-	re.group_csave = []int{}
+	if re.group_csave_flag == true {
+		re.group_csave.clear() // = []int{}
+	}
 }

 // reset for search mode fail
 // gcc bug, dont use [inline] or go 5 time slower
+//[inline]
+[direct_array_access]
 fn (mut re RE) reset_src(){
 	mut i := 0
-	for i < re.prog.len {
+	for i < re.prog_len {
 		re.prog[i].group_rep = 0 // clear repetition of the group
 		re.prog[i].rep       = 0 // clear repetition of the token
 		i++
@ -1155,6 +1163,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {

 	// add end of the program
 	re.prog[pc].ist = ist_prog_end
+	re.prog_len = pc

 	// check for unbalanced groups
 	if group_stack_index != -1 {
@ -1467,7 +1476,7 @@ pub fn (re RE) get_query() string {
 * Groups saving utilities
 *
 ******************************************************************************/
-[inline]
+[direct_array_access]
 fn (mut re RE) group_continuous_save(g_index int) {
 	if re.group_csave_flag == true {
 		// continuous save, save until we have space
@ -1550,6 +1559,7 @@ pub mut:
 	last_dot_pc int = -1      // last dot chat pc
 }

+[direct_array_access]
 pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 	// result status
 	mut result   := no_match_found    // function return     
@ -1771,7 +1781,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 		}

 		// ist_next, next instruction reseting its state
-		if m_state == .ist_next {
+		else if m_state == .ist_next {
 			state.pc = state.pc + 1
 			re.prog[state.pc].reset()
 			// check if we are in the program bounds
@ -1784,7 +1794,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 		}

 		// ist_next_ks, next instruction keeping its state
-		if m_state == .ist_next_ks {
+		else if m_state == .ist_next_ks {
 			state.pc = state.pc + 1
 			// check if we are in the program bounds
 			if state.pc < 0 || state.pc > re.prog.len {
@ -1805,7 +1815,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 		}

 		// check if stop
-		if m_state == .stop {
+		else if m_state == .stop {

 			// we are in search mode, don't exit until the end
 			if ((re.flag & f_src) != 0) && (ist != ist_prog_end) {
@ -1849,7 +1859,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 		}

 		// ist_load
-		if m_state == .ist_load {
+		else if m_state == .ist_load {

 			// program end
 			if ist == ist_prog_end {
@ -2116,7 +2126,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 		* Quantifier management
 		***********************************/
 		// ist_quant_ng => quantifier negative test on group
-		if m_state == .ist_quant_ng {
+		else if m_state == .ist_quant_ng {

 			// we are finished here
 			if state.group_index < 0 {
@ -2378,7 +2388,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 //
 // Matchers
 //
-
+[direct_array_access]
 pub fn (mut re RE) match_string(in_txt string) (int,int) {

 	start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
@ -2406,6 +2416,7 @@ pub fn (mut re RE) match_string(in_txt string) (int,int) {
 //

 // find try to find the first match in the input string
+[direct_array_access]
 pub fn (mut re RE) find(in_txt string) (int,int) {
 	old_flag := re.flag
 	
@ -2424,6 +2435,7 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
 }

 // find all the non overlapping occurrences of the match pattern
+[direct_array_access]
 pub fn (mut re RE) find_all(in_txt string) []int {
 	mut i := 0
 	mut res := []int{}