regex: fix compilation issues with gcc under ubuntu (#7112)

2020-12-03 19:33:53 +01:00 · 2020-12-03 19:33:53 +01:00 · 15ffce1317
parent 793f9ae9e3
commit 15ffce1317
1 changed files with 86 additions and 90 deletions
--- a/vlib/regex/regex.v
+++ b/vlib/regex/regex.v
@ -27,7 +27,7 @@ pub const(
 	// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
 	spaces = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
 	// new line chars for now only '\n'
-	new_line_list = [`\n`,`\r`]
+	new_line_list = [`\n`, `\r`]
 	// Results
 	no_match_found          = -1
@ -92,9 +92,7 @@ fn utf8util_char_len(b byte) int {
 fn (re RE) get_char(in_txt string, i int) (u32,int) {
 	ini := unsafe {in_txt.str[i]}
 	// ascii 8 bit
-	if (re.flag & f_bin) !=0 ||
+	if (re.flag & f_bin) !=0 ||	ini & 0x80 == 0 {
 		ini & 0x80 == 0
 	{
 		return u32(ini), 1
 	}
 	// unicode char
@ -102,7 +100,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {
 	mut tmp := 0
 	mut ch := u32(0)
 	for tmp < char_len {
-		ch = (ch << 8) | unsafe {in_txt.str[i+tmp]}
+		ch = (ch << 8) | unsafe {in_txt.str[i + tmp]}
 		tmp++
 	}
 	return ch,char_len
@ -112,9 +110,7 @@ fn (re RE) get_char(in_txt string, i int) (u32,int) {
 [inline]
 fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 	// ascii 8 bit
-	if (re.flag & f_bin) !=0 ||
+	if (re.flag & f_bin) !=0 ||	unsafe {in_txt[i]} & 0x80 == 0 {
 		unsafe {in_txt[i]} & 0x80 == 0
 	{
 		return u32(unsafe {in_txt[i]}), 1
 	}
 	// unicode char
@ -122,7 +118,7 @@ fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 	mut tmp := 0
 	mut ch := u32(0)
 	for tmp < char_len {
-		ch = (ch << 8) | unsafe {in_txt[i+tmp]}
+		ch = (ch << 8) | unsafe {in_txt[i + tmp]}
 		tmp++
 	}
 	return ch,char_len
@ -131,11 +127,11 @@ fn (re RE) get_charb(in_txt byteptr, i int) (u32,int) {
 [inline]
 fn is_alnum(in_char byte) bool {
 	mut tmp := in_char - `A`
-	if tmp >= 0x00 && tmp <= 25 { return true }
+	if tmp <= 25 { return true }
 	tmp = in_char - `a`
-	if tmp >= 0x00 && tmp <= 25 { return true }
+	if tmp <= 25 { return true }
 	tmp = in_char - `0`
-	if tmp >= 0x00 && tmp <= 9  { return true }
+	if tmp <= 9  { return true }
 	if tmp == `_` { return true }
 	return false
 }
@ -158,7 +154,7 @@ fn is_not_space(in_char byte) bool {
 [inline]
 fn is_digit(in_char byte) bool {
 	tmp := in_char - `0`
-	return tmp <= 0x09 && tmp >= 0
+	return tmp <= 0x09
 }
 [inline]
@ -179,13 +175,13 @@ fn is_not_wordchar(in_char byte) bool {
 [inline]
 fn is_lower(in_char byte) bool {
 	tmp := in_char - `a`
-	return  tmp >= 0x00 && tmp <= 25
+	return tmp <= 25
 }
 [inline]
 fn is_upper(in_char byte) bool {
 	tmp := in_char - `A`
-	return  tmp >= 0x00 && tmp <= 25
+	return tmp <= 25
 }
 pub fn (re RE) get_parse_error_string(err int) string {
@ -211,7 +207,7 @@ fn utf8_str(ch rune) string {
 	mut i := 4
 	mut res := ""
 	for i > 0 {
-		v := byte((ch >> ((i-1)*8)) & 0xFF)
+		v := byte((ch >> ((i - 1) * 8)) & 0xFF)
 		if v != 0{
 			res += "${v:1c}"
 		}
@ -316,7 +312,7 @@ pub mut:
 	group_max         int  = 8         // max allowed number of different groups
 	group_csave       []int = []int{}  // groups continuous save array
-	group_csave_index int= -1       // groups continuous save index
+	group_csave_index int = -1         // groups continuous save index
 	group_map         map[string]int   // groups names map
@ -367,8 +363,8 @@ fn (mut re RE) reset_src(){
 pub fn (re RE) get_group(group_name string) (int, int) {
 	if group_name in re.group_map {
 		tmp_index := re.group_map[group_name]-1
-		start := re.groups[tmp_index*2]
+		start     := re.groups[tmp_index * 2]
-		end := re.groups[tmp_index*2+1]
+		end       := re.groups[tmp_index * 2 + 1]
 		return start,end
 	}
 	return -1, -1
@ -397,7 +393,7 @@ const(
 	]
 	// these chars are escape if preceded by a \
-	bsls_escape_list = [ `\\`,`|`,`.`,`*`,`+`,`-`,`{`,`}`,`[`,`]` ]
+	bsls_escape_list = [`\\`, `|`, `.`, `*`, `+`, `-`, `{`, `}`, `[`, `]`]
 )
 enum BSLS_parse_state {
@ -414,7 +410,7 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
 	for i < in_txt.len {
 		// get our char
-		char_tmp,char_len := re.get_char(in_txt,i)
+		char_tmp, char_len := re.get_char(in_txt, i)
 		ch := byte(char_tmp)
 		if status == .start && ch == `\\` {
@ -427,7 +423,7 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
 		if status == .bsls_found {
 			for c,x in bsls_validator_array {
 				if x.ch == ch {
-					return c,i-in_i+1
+					return c, i-in_i+1
 				}
 			}
 			status = .normal_char
@ -437,9 +433,9 @@ fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
 		// no BSLS validator, manage as normal escape char char
 		if status == .normal_char {
 			if ch in bsls_escape_list {
-				return no_match_found,i-in_i+1
+				return no_match_found, i-in_i+1
 			}
-			return err_syntax_error,i-in_i+1
+			return err_syntax_error, i-in_i+1
 		}
 		// at the present time we manage only one char after the \
@ -570,7 +566,7 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
 		// check if we are out of memory for char classes
 		if tmp_index >= re.cc.len {
-			return err_cc_alloc_overflow,0,u32(0)
+			return err_cc_alloc_overflow, 0, u32(0)
 		}
 		// get our char
@ -710,7 +706,7 @@ fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int, bool) {
 		// exit on no compatible char with {} quantifier
 		if utf8util_char_len(ch) != 1 {
-			return err_syntax_error,i,0,false
+			return err_syntax_error, i, 0, false
 		}
 		// min parsing skip if comma present
@ -913,14 +909,13 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
 //
 // compile return (return code, index) where index is the index of the error in the query string if return code is an error code
 [deprecated]
-pub fn (mut re RE) compile(in_txt string) (int,int) {
+pub fn (mut re RE) compile(in_txt string) (int, int) {
 	return re.impl_compile(in_txt)
 }
 fn (mut re RE) impl_compile(in_txt string) (int,int) {
 	mut i        := 0      // input string index
 	mut pc       := 0      // program counter
 	mut tmp_code := u32(0)
 	// group management variables
 	mut group_count           := -1
@ -932,7 +927,6 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 	i = 0
 	for i < in_txt.len {
 		tmp_code = u32(0)
 		mut char_tmp := u32(0)
 		mut char_len := 0
 		//println("i: ${i:3d} ch: ${in_txt.str[i]:c}")
@ -958,20 +952,20 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 			//check max groups allowed
 			if group_count > re.group_max {
-				return err_groups_overflow,i+1
+				return err_groups_overflow, i+1
 			}
 			group_stack_index++
 			// check max nested groups allowed
 			if group_stack_index > re.group_max_nested {
-				return err_groups_max_nested,i+1
+				return err_groups_max_nested, i+1
 			}
 			tmp_res, cgroup_flag, cgroup_name, next_i := re.parse_groups(in_txt,i)
 			// manage question mark format error
 			if tmp_res < -1 {
-				return err_group_qm_notation,next_i
+				return err_group_qm_notation, next_i
 			}
 			//println("Parse group: [$tmp_res, $cgroup_flag, ($i,$next_i), '${in_txt[i..next_i]}' ]")
@ -988,10 +982,10 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 			if cgroup_name.len > 0 {
 				//println("GROUP NAME: ${cgroup_name}")
 				if cgroup_name in re.group_map{
-					group_id = re.group_map[cgroup_name]-1
+					group_id = re.group_map[cgroup_name] - 1
 					group_count--
 				} else {
-					re.group_map[cgroup_name] = group_id+1
+					re.group_map[cgroup_name] = group_id + 1
 				}
 			}
@ -1018,7 +1012,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 		// ist_group_end
 		if char_len==1 && pc > 0 && byte(char_tmp) == `)` {
 			if group_stack_index < 0 {
-				return err_group_not_balanced,i+1
+				return err_group_not_balanced, i+1
 			}
 			goto_pc := group_stack[group_stack_index]
@ -1161,7 +1155,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 				}
 				// if not an escape or a bsls char then it is an error (at least for now!)
 				else {
-					return bsls_index,i+tmp
+					return bsls_index, i+tmp
 				}
 			}
 		}
@ -1192,7 +1186,7 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 	}
 	// store the number of groups in the query
-	re.group_count = group_count+1
+	re.group_count = group_count + 1
 	//******************************************
 	// Post processing
@ -1502,8 +1496,10 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 	mut ist   := rune(0)              // actual instruction
 	mut l_ist :=rune(0)               // last matched instruction
-	mut group_stack      := [-1].repeat(re.group_max)
+	//mut group_stack      := [-1].repeat(re.group_max)
-	mut group_data       := [-1].repeat(re.group_max)
+	//mut group_data       := [-1].repeat(re.group_max)
 	mut group_stack := []int{len: re.group_max, init: -1}
 	mut group_data  := []int{len: re.group_max, init: -1}
 	mut group_index := -1             // group id used to know how many groups are open
@ -1535,7 +1531,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 		// DEBUG LOG
 		//******************************************
 		if re.debug>0 {
-			mut buf2 := strings.new_builder(re.cc.len+128)
+			mut buf2 := strings.new_builder(re.cc.len + 128)
 			// print all the instructions
@ -1962,7 +1958,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 		/***********************************
 		* Quantifier management
 		***********************************/
-		// ist_quant_ng
+		// ist_quant_ng => quantifier negative test on group
 		if m_state == .ist_quant_ng {
 			// we are finished here
@ -2039,7 +2035,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 			return err_internal_error, i
 		}
-		// ist_quant_pg
+		// ist_quant_pg => quantifier positive test on group
 		else if m_state == .ist_quant_pg {
 			//println(".ist_quant_pg")
 			mut tmp_pc := pc
@ -2084,7 +2080,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 			return err_internal_error, i
 		}
-		// ist_quant_n
+		// ist_quant_n => quantifier negative test on token
 		else if m_state == .ist_quant_n {
 			rep := re.prog[pc].rep
 			//println("Here!! PC $pc is_next_or: ${re.prog[pc].next_is_or}")
@ -2125,7 +2121,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 			//return no_match_found, 0
 		}
-		// ist_quant_p
+		// ist_quant_p => quantifier positive test on token
 		else if m_state == .ist_quant_p {
 			// exit on first match
 			if (re.flag & f_efm) != 0 {
@ -2255,7 +2251,7 @@ pub fn (mut re RE) find(in_txt string) (int,int) {
 	start, end := re.match_base(in_txt.str, in_txt.len)
 	re.flag = old_flag
 	if start >= 0 && end > start {
-		return start,end
+		return start, end
 	}
 	return no_match_found, 0
 }