parent
							
								
									6bd70bc88a
								
							
						
					
					
						commit
						4324be9bd8
					
				| 
						 | 
				
			
			@ -256,6 +256,9 @@ mut:
 | 
			
		|||
 | 
			
		||||
	// OR flag for the token
 | 
			
		||||
	next_is_or      bool       // true if the next token is an OR
 | 
			
		||||
 | 
			
		||||
	// last_dot flag
 | 
			
		||||
	last_dot bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[inline]
 | 
			
		||||
| 
						 | 
				
			
			@ -1192,16 +1195,44 @@ fn (mut re RE) impl_compile(in_txt string) (int,int) {
 | 
			
		|||
	// Post processing
 | 
			
		||||
	//******************************************
 | 
			
		||||
 | 
			
		||||
	//
 | 
			
		||||
	// manage ist_dot_char
 | 
			
		||||
	//
 | 
			
		||||
	// count ist_dot_char to set the size of the state stack
 | 
			
		||||
	mut pc1 := 0
 | 
			
		||||
	mut tmp_count := 0
 | 
			
		||||
	mut last_dot_pc := -1
 | 
			
		||||
	for pc1 < pc {
 | 
			
		||||
		if re.prog[pc1].ist == ist_dot_char {
 | 
			
		||||
			tmp_count++
 | 
			
		||||
			last_dot_pc = pc1
 | 
			
		||||
			//println("Found dot_char pc:[${last_dot_pc}]")
 | 
			
		||||
		}
 | 
			
		||||
		pc1++
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// if exist set the last dot_char token to manage the last .* 
 | 
			
		||||
	if last_dot_pc >= 0 {
 | 
			
		||||
		re.prog[last_dot_pc].last_dot = true
 | 
			
		||||
		
 | 
			
		||||
		mut last_dot_flag := true
 | 
			
		||||
		mut tmp_pc := last_dot_pc + 1
 | 
			
		||||
		for tmp_pc < pc {
 | 
			
		||||
			if re.prog[tmp_pc].ist !in [rune(ist_prog_end),ist_group_end] {
 | 
			
		||||
				last_dot_flag = false
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
			tmp_pc++
 | 
			
		||||
		}
 | 
			
		||||
		re.prog[last_dot_pc].last_dot = last_dot_flag
 | 
			
		||||
		//println("Our last dot flag  pc: ${last_dot_pc} flag: ${last_dot_flag}")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	//******************************************
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	// init the state stack
 | 
			
		||||
	re.state_stack = []StateDotObj{len: tmp_count+1, init: StateDotObj{}}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1325,6 +1356,12 @@ pub fn (re RE) get_code() string {
 | 
			
		|||
					res.write("?")
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// last dot char flag
 | 
			
		||||
			if tk.last_dot == true {
 | 
			
		||||
				res.write(" Last dot_char!")
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			res.write("\n")
 | 
			
		||||
			if stop_flag {
 | 
			
		||||
				break
 | 
			
		||||
| 
						 | 
				
			
			@ -1502,7 +1539,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
	mut pc    := -1                   // program counter
 | 
			
		||||
	mut state := StateObj{}           // actual state
 | 
			
		||||
	mut ist   := rune(0)              // actual instruction
 | 
			
		||||
	mut l_ist :=rune(0)               // last matched instruction
 | 
			
		||||
	mut l_ist := rune(0)              // last matched instruction
 | 
			
		||||
 | 
			
		||||
	//mut group_stack      := [-1].repeat(re.group_max)
 | 
			
		||||
	//mut group_data       := [-1].repeat(re.group_max)
 | 
			
		||||
| 
						 | 
				
			
			@ -1581,6 +1618,9 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
								buf2.write("CHAR_CLASS_NEG[${re.get_char_class(pc)}]")
 | 
			
		||||
							} else if ist == ist_dot_char {
 | 
			
		||||
								buf2.write("DOT_CHAR")
 | 
			
		||||
								if re.prog[pc].last_dot == true {
 | 
			
		||||
									buf2.write(" Last dot_char!")
 | 
			
		||||
								}
 | 
			
		||||
							} else if ist == ist_group_start {
 | 
			
		||||
								tmp_gi :=re.prog[pc].group_id
 | 
			
		||||
								tmp_gr := re.prog[re.prog[pc].goto_pc].group_rep
 | 
			
		||||
| 
						 | 
				
			
			@ -1638,6 +1678,11 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
						}
 | 
			
		||||
						re.groups[g_index+1] = i
 | 
			
		||||
 | 
			
		||||
						// manage last dot_char
 | 
			
		||||
						if l_ist == ist_dot_char && re.prog[pc].last_dot == true {
 | 
			
		||||
							re.groups[g_index+1]--
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						// continuous save, save until we have space
 | 
			
		||||
						if re.group_csave_index > 0 {
 | 
			
		||||
							// check if we have space to save the record
 | 
			
		||||
| 
						 | 
				
			
			@ -1677,6 +1722,11 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
				return first_match,i
 | 
			
		||||
			}		
 | 
			
		||||
 | 
			
		||||
			// manage last dot_char
 | 
			
		||||
			if first_match >= 0 && l_ist == ist_dot_char && re.prog[pc].last_dot == true {
 | 
			
		||||
				return first_match,i
 | 
			
		||||
			}
 | 
			
		||||
			
 | 
			
		||||
			//m_state = .end
 | 
			
		||||
			//break
 | 
			
		||||
			return no_match_found,0
 | 
			
		||||
| 
						 | 
				
			
			@ -1798,6 +1848,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
			// check GROUP end
 | 
			
		||||
			else if ist == ist_group_end {
 | 
			
		||||
				// we are in matching streak
 | 
			
		||||
				//println("Group END!! last ist: ${l_ist:08x}")
 | 
			
		||||
				if state.match_index >= 0 {
 | 
			
		||||
					// restore txt index stack and save the group data
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1816,9 +1867,14 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
						re.groups[g_index+1] = i
 | 
			
		||||
						
 | 
			
		||||
						// if a group end with a dot, manage the not increased char index 
 | 
			
		||||
/*
 | 
			
		||||
						if i == re.groups[g_index] {
 | 
			
		||||
							re.groups[g_index+1] = i+1
 | 
			
		||||
						}
 | 
			
		||||
*/
 | 
			
		||||
						if l_ist == ist_dot_char {
 | 
			
		||||
							re.groups[g_index+1] = i+1
 | 
			
		||||
						}
 | 
			
		||||
						
 | 
			
		||||
						//println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1873,8 +1929,8 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
				state.match_index = i
 | 
			
		||||
				re.prog[pc].rep++ // increase repetitions
 | 
			
		||||
 | 
			
		||||
				//if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max {
 | 
			
		||||
				if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max {
 | 
			
		||||
				if re.prog[pc].rep >= re.prog[pc].rep_min && re.prog[pc].rep <= re.prog[pc].rep_max {
 | 
			
		||||
				//if re.prog[pc].rep >= 0 && re.prog[pc].rep <= re.prog[pc].rep_max {
 | 
			
		||||
					//println("DOT CHAR save state : ${re.state_stack_index}")
 | 
			
		||||
					// save the state
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1893,18 +1949,21 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
 | 
			
		|||
				}
 | 
			
		||||
 | 
			
		||||
				if re.prog[pc].rep >= 1 && re.state_stack_index >= 0 {
 | 
			
		||||
					//println("Save state char index.")
 | 
			
		||||
					re.state_stack[re.state_stack_index].i  = i + char_len
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				//i += char_len // next char
 | 
			
		||||
/*
 | 
			
		||||
				// manage * and {0,} quantifier
 | 
			
		||||
				if re.prog[pc].rep_max == max_quantifier {
 | 
			
		||||
					//println("manage .*")
 | 
			
		||||
				// manage last dot char
 | 
			
		||||
				if re.prog[pc].last_dot == true 
 | 
			
		||||
					&& re.prog[pc].rep >= re.prog[pc].rep_min 
 | 
			
		||||
					&& re.prog[pc].rep <= re.prog[pc].rep_max
 | 
			
		||||
				{
 | 
			
		||||
					//println("We are the last dot_char in the query")
 | 
			
		||||
					i += char_len
 | 
			
		||||
					m_state = .ist_load
 | 
			
		||||
					continue
 | 
			
		||||
				}
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
				m_state = .ist_next
 | 
			
		||||
				continue
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -112,6 +112,13 @@ match_test_suite = [
 | 
			
		|||
	TestItem{"poth", r".(oth)|(eth)$",0,4},
 | 
			
		||||
	TestItem{"poth", r"^.(oth)|(eth)$",0,4},
 | 
			
		||||
	TestItem{"poth", r"^\w+$",0,4},
 | 
			
		||||
 | 
			
		||||
	// test dot_char
 | 
			
		||||
	TestItem{"8-11 l: qllllqllklhlvtl", r"^(\d+)-(\d+) ([a-z]): (.*)$",0,23},
 | 
			
		||||
	TestItem{"accccb deer", r"^a(.*)b d(.+)r",0,11},
 | 
			
		||||
	TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11},
 | 
			
		||||
	TestItem{"accccb deer", r"^(.*)$",0,11},
 | 
			
		||||
	TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,0},
 | 
			
		||||
]
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -167,7 +174,7 @@ struct TestItemCGroup {
 | 
			
		|||
	q string
 | 
			
		||||
	s int
 | 
			
		||||
	e int
 | 
			
		||||
	cg []int
 | 
			
		||||
	cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ]
 | 
			
		||||
	cgn map[string]int
 | 
			
		||||
}
 | 
			
		||||
const (
 | 
			
		||||
| 
						 | 
				
			
			@ -181,7 +188,7 @@ cgroups_test_suite = [
 | 
			
		|||
	TestItemCGroup{
 | 
			
		||||
		"http://www.ciao.mondo/hello/pippo12_/pera.html",
 | 
			
		||||
		r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
 | 
			
		||||
		[2, 0, 0, 4, 1, 7, 10],
 | 
			
		||||
		[8, 0, 0, 4, 1, 7, 12, 1, 11, 17, 1, 16, 23, 1, 22, 29, 1, 28, 38, 1, 37, 43, 1, 42, 46],
 | 
			
		||||
		{'format':int(0),'token':1}
 | 
			
		||||
	},
 | 
			
		||||
	TestItemCGroup{
 | 
			
		||||
| 
						 | 
				
			
			@ -247,7 +254,7 @@ fn test_regex(){
 | 
			
		|||
		// check cgroups
 | 
			
		||||
		if to.cgn.len > 0 {
 | 
			
		||||
			if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
 | 
			
		||||
				println("Capturing group len error! ${re.group_csave[0]}")
 | 
			
		||||
				println("Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}")
 | 
			
		||||
				assert false
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -256,6 +263,7 @@ fn test_regex(){
 | 
			
		|||
			mut ln := re.group_csave[0]*3
 | 
			
		||||
			for ln > 0 {
 | 
			
		||||
				if re.group_csave[ln] != to.cg[ln] {
 | 
			
		||||
					println("Capturing group failed on $ln item!")
 | 
			
		||||
					assert false
 | 
			
		||||
				}
 | 
			
		||||
				ln--
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue