regex: new options returning interface to the regex module (#6062)
parent
664c26ab4b
commit
2a4ef2acbd
|
@ -1,4 +1,4 @@
|
|||
# V RegEx (Regular expression) 0.9d
|
||||
# V RegEx (Regular expression) 0.9g
|
||||
|
||||
[TOC]
|
||||
|
||||
|
@ -137,7 +137,7 @@ The "capture groups" are store as couple of index in the field `groups` that is
|
|||
```v
|
||||
text := "cpaz cpapaz cpapapaz"
|
||||
query:= r"(c(pa)+z ?)+"
|
||||
re, _, _ := regex.regex(query)
|
||||
mut re := regex.regex_opt(query) or { panic(err) }
|
||||
|
||||
println(re.get_query())
|
||||
// #0(c#1(pa)+z ?)+ // #0 and #1 are the ids of the groups, are shown if re.debug is 1 or 2
|
||||
|
@ -155,8 +155,6 @@ for gi < re.groups.len {
|
|||
// groups captured
|
||||
// 0 :[cpapapaz]
|
||||
// 1 :[pa]
|
||||
|
||||
|
||||
```
|
||||
|
||||
**note:** *to show the `group id number` in the result of the `get_query()` the flag `debug` of the RE object must be `1` or `2`*
|
||||
|
@ -187,48 +185,41 @@ fn example2() {
|
|||
text := "tst: 01,23,45 ,56, 78"
|
||||
query:= r".*:(\s*\d+[\s,]*)+"
|
||||
|
||||
mut re := regex.new_regex()
|
||||
mut re := new() or { panic(err) }
|
||||
//re.debug = 2
|
||||
re.group_csave = [-1].repeat(3*20+1) // we expect max 20 records
|
||||
|
||||
re_err, err_pos := re.compile(query)
|
||||
if re_err == regex.COMPILE_OK {
|
||||
q_str := re.get_query()
|
||||
println("Query: $q_str")
|
||||
|
||||
start, end := re.match_string(text)
|
||||
if start < 0 {
|
||||
println("ERROR : ${re.get_parse_error_string(start)}, $start")
|
||||
} else {
|
||||
println("found in [$start, $end] => [${text[start..end]}]")
|
||||
}
|
||||
re.compile_opt(query) or { println(err) return }
|
||||
|
||||
// groups capture
|
||||
mut gi := 0
|
||||
for gi < re.groups.len {
|
||||
if re.groups[gi] >= 0 {
|
||||
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
||||
}
|
||||
gi += 2
|
||||
}
|
||||
q_str := re.get_query()
|
||||
println("Query: $q_str")
|
||||
|
||||
// continuous saving
|
||||
gi = 0
|
||||
println("num: ${re.group_csave[0]}")
|
||||
for gi < re.group_csave[0] {
|
||||
id := re.group_csave[1+gi*3]
|
||||
st := re.group_csave[1+gi*3+1]
|
||||
en := re.group_csave[1+gi*3+2]
|
||||
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
||||
gi++
|
||||
}
|
||||
} else {
|
||||
println("query: $query")
|
||||
lc := "-".repeat(err_pos)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
}
|
||||
start, end := re.match_string(text)
|
||||
if start < 0 {
|
||||
println("ERROR : ${re.get_parse_error_string(start)}, $start")
|
||||
} else {
|
||||
println("found in [$start, $end] => [${text[start..end]}]")
|
||||
}
|
||||
|
||||
// groups capture
|
||||
mut gi := 0
|
||||
for gi < re.groups.len {
|
||||
if re.groups[gi] >= 0 {
|
||||
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
||||
}
|
||||
gi += 2
|
||||
}
|
||||
|
||||
// continuous saving
|
||||
gi = 0
|
||||
println("num: ${re.group_csave[0]}")
|
||||
for gi < re.group_csave[0] {
|
||||
id := re.group_csave[1+gi*3]
|
||||
st := re.group_csave[1+gi*3+1]
|
||||
en := re.group_csave[1+gi*3+2]
|
||||
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
||||
gi++
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -261,73 +252,65 @@ Have a look at the example for the use of them.
|
|||
example:
|
||||
|
||||
```v
|
||||
import regex
|
||||
fn main() {
|
||||
test_regex()
|
||||
|
||||
text := "http://www.ciao.mondo/hello/pippo12_/pera.html"
|
||||
query:= r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+"
|
||||
|
||||
mut re := new_regex()
|
||||
mut re := new()
|
||||
re.debug = 2
|
||||
|
||||
// must provide an array of the right size if want the continuos saving of the groups
|
||||
re.group_csave = [-1].repeat(3*20+1)
|
||||
|
||||
re_err, err_pos := re.compile(query)
|
||||
if re_err == COMPILE_OK {
|
||||
q_str := re.get_query()
|
||||
println("O.Query: $query")
|
||||
println("Query : $q_str")
|
||||
|
||||
re.debug = 0
|
||||
start, end := re.match_string(text)
|
||||
if start < 0 {
|
||||
err_str := re.get_parse_error_string(start)
|
||||
println("ERROR : $err_str, $start")
|
||||
} else {
|
||||
text1 := text[start..end]
|
||||
println("found in [$start, $end] => [$text1]")
|
||||
}
|
||||
re.compile_opt(query) or { println(err) return }
|
||||
|
||||
// groups
|
||||
mut gi := 0
|
||||
for gi < re.groups.len {
|
||||
if re.groups[gi] >= 0 {
|
||||
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
||||
}
|
||||
gi += 2
|
||||
}
|
||||
// continuous saving
|
||||
gi = 0
|
||||
println("num of group item saved: ${re.group_csave[0]}")
|
||||
for gi < re.group_csave[0] {
|
||||
id := re.group_csave[1+gi*3]
|
||||
st := re.group_csave[1+gi*3+1]
|
||||
en := re.group_csave[1+gi*3+2]
|
||||
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
||||
gi++
|
||||
}
|
||||
println("raw array: ${re.group_csave[0..gi*3+2-1]}")
|
||||
q_str := re.get_query()
|
||||
println("O.Query: $query")
|
||||
println("Query : $q_str")
|
||||
|
||||
re.debug = 0
|
||||
start, end := re.match_string(text)
|
||||
if start < 0 {
|
||||
err_str := re.get_parse_error_string(start)
|
||||
println("ERROR : $err_str, $start")
|
||||
} else {
|
||||
text1 := text[start..end]
|
||||
println("found in [$start, $end] => [$text1]")
|
||||
}
|
||||
|
||||
// named capturing groups
|
||||
println("named capturing groups:")
|
||||
for g_name in re.group_map.keys() {
|
||||
s,e := re.get_group(g_name)
|
||||
if s >= 0 && e > s {
|
||||
println("'${g_name}':[$s, $e] => '${text[s..e]}'")
|
||||
} else {
|
||||
println("Group [${g_name}] doesn't exist.")
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
println("query: $query")
|
||||
lc := "-".repeat(err_pos)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
}
|
||||
// groups
|
||||
mut gi := 0
|
||||
for gi < re.groups.len {
|
||||
if re.groups[gi] >= 0 {
|
||||
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
||||
}
|
||||
gi += 2
|
||||
}
|
||||
// continuous saving
|
||||
gi = 0
|
||||
println("num of group item saved: ${re.group_csave[0]}")
|
||||
for gi < re.group_csave[0] {
|
||||
id := re.group_csave[1+gi*3]
|
||||
st := re.group_csave[1+gi*3+1]
|
||||
en := re.group_csave[1+gi*3+2]
|
||||
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
||||
gi++
|
||||
}
|
||||
println("raw array: ${re.group_csave[0..gi*3+2-1]}")
|
||||
|
||||
// named capturing groups
|
||||
println("named capturing groups:")
|
||||
for g_name in re.group_map.keys() {
|
||||
s,e := re.get_group(g_name)
|
||||
if s >= 0 && e > s {
|
||||
println("'${g_name}':[$s, $e] => '${text[s..e]}'")
|
||||
} else {
|
||||
println("Group [${g_name}] doesn't exist.")
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -360,7 +343,7 @@ It is possible to set some flags in the regex parser that change the behavior of
|
|||
|
||||
```v
|
||||
// example of flag settings
|
||||
mut re := regex.new_regex()
|
||||
mut re := regex.new()
|
||||
re.flag = regex.F_BIN
|
||||
|
||||
```
|
||||
|
@ -382,22 +365,22 @@ These functions are helper that create the `RE` struct, a `RE` struct can be cre
|
|||
|
||||
```v
|
||||
// regex create a regex object from the query string and compile it
|
||||
pub fn regex(in_query string) (RE,int,int)
|
||||
pub fn regex_opt(in_query string) ?RE
|
||||
```
|
||||
|
||||
#### **Base initializer**
|
||||
|
||||
```v
|
||||
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
||||
pub fn new_regex() RE
|
||||
pub fn new() RE
|
||||
|
||||
// new_regex_by_size create a REgex of large size, mult specify the scale factor of the memory that will be allocated
|
||||
pub fn new_regex_by_size(mult int) RE
|
||||
pub fn new_by_size(mult int) RE
|
||||
```
|
||||
After a base initializer is used, the regex expression must be compiled with:
|
||||
```v
|
||||
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
|
||||
pub fn (re mut RE) compile(in_txt string) (int,int)
|
||||
// compile compiles the REgex returning an error if the compilation fails
|
||||
pub fn (re mut RE) compile_opt(in_txt string) ?
|
||||
```
|
||||
|
||||
### Operative Functions
|
||||
|
@ -428,20 +411,9 @@ the following example code show how to visualize the syntax errors in the compil
|
|||
|
||||
```v
|
||||
query:= r"ciao da ab[ab-]" // there is an error, a range not closed!!
|
||||
mut re := new_regex()
|
||||
mut re := new()
|
||||
|
||||
// re_err ==> is the return value, if < 0 it is an error
|
||||
// re_pos ==> if re_err < 0, re_pos is the error index in the query string
|
||||
re_err, err_pos := re.compile(query)
|
||||
|
||||
// print the error if one happen
|
||||
if re_err != COMPILE_OK {
|
||||
println("query: $query")
|
||||
lc := "-".repeat(err_pos)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err) // get the error string
|
||||
println("ERROR: $err_str")
|
||||
}
|
||||
re.compile_opt(query) or { println(err) }
|
||||
|
||||
// output!!
|
||||
|
||||
|
@ -543,7 +515,7 @@ fn custom_print(txt string) {
|
|||
println("my log: $txt")
|
||||
}
|
||||
|
||||
mut re := new_regex()
|
||||
mut re := new()
|
||||
re.log_func = custom_print // every debug output from now will call this function
|
||||
|
||||
```
|
||||
|
@ -571,38 +543,29 @@ tests = [
|
|||
|
||||
fn example() {
|
||||
for c,tst in tests {
|
||||
mut re := regex.new_regex()
|
||||
re_err, err_pos := re.compile(tst.query)
|
||||
if re_err == regex.COMPILE_OK {
|
||||
mut re := regex.new()
|
||||
re.compile_opt(tst.query) or { println(err) continue }
|
||||
|
||||
// print the query parsed with the groups ids
|
||||
re.debug = 1 // set debug on at minimum level
|
||||
println("#${c:2d} query parsed: ${re.get_query()}")
|
||||
re.debug = 0
|
||||
|
||||
// do the match
|
||||
start, end := re.match_string(tst.source)
|
||||
if start >= 0 && end > start {
|
||||
println("#${c:2d} found in: [$start, $end] => [${tst.source[start..end]}]")
|
||||
}
|
||||
|
||||
// print the groups
|
||||
mut gi := 0
|
||||
for gi < re.groups.len {
|
||||
if re.groups[gi] >= 0 {
|
||||
println("group ${gi/2:2d} :[${tst.source[re.groups[gi]..re.groups[gi+1]]}]")
|
||||
}
|
||||
gi += 2
|
||||
}
|
||||
println("")
|
||||
} else {
|
||||
// print the compile error
|
||||
println("query: $tst.query")
|
||||
lc := "-".repeat(err_pos-1)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
}
|
||||
// print the query parsed with the groups ids
|
||||
re.debug = 1 // set debug on at minimum level
|
||||
println("#${c:2d} query parsed: ${re.get_query()}")
|
||||
re.debug = 0
|
||||
|
||||
// do the match
|
||||
start, end := re.match_string(tst.source)
|
||||
if start >= 0 && end > start {
|
||||
println("#${c:2d} found in: [$start, $end] => [${tst.source[start..end]}]")
|
||||
}
|
||||
|
||||
// print the groups
|
||||
mut gi := 0
|
||||
for gi < re.groups.len {
|
||||
if re.groups[gi] >= 0 {
|
||||
println("group ${gi/2:2d} :[${tst.source[re.groups[gi]..re.groups[gi+1]]}]")
|
||||
}
|
||||
gi += 2
|
||||
}
|
||||
println("")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
|
||||
regex 0.9e
|
||||
regex 0.9g
|
||||
|
||||
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||
Use of this source code is governed by an MIT license
|
||||
|
@ -19,7 +19,7 @@ module regex
|
|||
import strings
|
||||
|
||||
pub const(
|
||||
v_regex_version = "0.9e" // regex module version
|
||||
v_regex_version = "0.9g" // regex module version
|
||||
|
||||
max_code_len = 256 // default small base code len for the regex programs
|
||||
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
|
||||
|
@ -912,7 +912,12 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
|
|||
// main compiler
|
||||
//
|
||||
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
|
||||
[deprecated]
|
||||
pub fn (mut re RE) compile(in_txt string) (int,int) {
|
||||
return re.impl_compile(in_txt)
|
||||
}
|
||||
|
||||
fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||
mut i := 0 // input string index
|
||||
mut pc := 0 // program counter
|
||||
mut tmp_code := u32(0)
|
||||
|
@ -2187,6 +2192,7 @@ Public functions
|
|||
//
|
||||
|
||||
// regex create a regex object from the query string
|
||||
[deprecated]
|
||||
pub fn regex(in_query string) (RE,int,int){
|
||||
mut re := RE{}
|
||||
re.prog = [Token{}].repeat(in_query.len+1)
|
||||
|
@ -2198,12 +2204,17 @@ pub fn regex(in_query string) (RE,int,int){
|
|||
}
|
||||
|
||||
// new_regex create a RE of small size, usually sufficient for ordinary use
|
||||
[deprecated]
|
||||
pub fn new_regex() RE {
|
||||
return new_regex_by_size(1)
|
||||
return impl_new_regex_by_size(1)
|
||||
}
|
||||
|
||||
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
||||
[deprecated]
|
||||
pub fn new_regex_by_size(mult int) RE {
|
||||
return impl_new_regex_by_size(mult)
|
||||
}
|
||||
fn impl_new_regex_by_size(mult int) RE {
|
||||
mut re := RE{}
|
||||
re.prog = [Token{}].repeat(max_code_len*mult) // max program length, default 256 istructions
|
||||
re.cc = [CharClass{}].repeat(max_code_len*mult) // char class list
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
module regex
|
||||
import strings
|
||||
|
||||
// compile_opt compile RE pattern string
|
||||
pub fn (mut re RE) compile_opt(pattern string) ? {
|
||||
re_err,err_pos := re.impl_compile(pattern)
|
||||
|
||||
if re_err != compile_ok {
|
||||
mut err_msg := strings.new_builder(300)
|
||||
err_msg.write("query: $pattern\n")
|
||||
line := "-".repeat(err_pos)
|
||||
err_msg.write("err : ${line}^\n")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
err_msg.write("ERROR: $err_str\n")
|
||||
return error_with_code(err_msg.str(), re_err)
|
||||
}
|
||||
}
|
||||
|
||||
// new_regex create a RE of small size, usually sufficient for ordinary use
|
||||
pub fn new() RE {
|
||||
return impl_new_regex_by_size(1)
|
||||
}
|
||||
|
||||
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
||||
pub fn new_by_size(mult int) RE {
|
||||
return impl_new_regex_by_size(mult)
|
||||
}
|
||||
|
||||
// regex_opt create new RE object from RE pattern string
|
||||
pub fn regex_opt(pattern string) ?RE {
|
||||
mut re := new()
|
||||
re.compile_opt(pattern)?
|
||||
return re
|
||||
}
|
|
@ -175,48 +175,52 @@ fn test_regex(){
|
|||
// debug print
|
||||
//println("#$c [$to.src] q[$to.q] ($to.s, $to.e)")
|
||||
|
||||
mut re, re_err, _ := regex.regex(to.q)
|
||||
mut re := regex.regex_opt(to.q) or {
|
||||
eprintln('err: $err')
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
re.group_csave = [-1].repeat(3*20+1)
|
||||
|
||||
if re_err == regex.compile_ok {
|
||||
start, end := re.match_string(to.src)
|
||||
start, end := re.match_string(to.src)
|
||||
|
||||
mut tmp_str := ""
|
||||
if start >= 0 && end > start{
|
||||
tmp_str = to.src[start..end]
|
||||
}
|
||||
mut tmp_str := ""
|
||||
if start >= 0 && end > start{
|
||||
tmp_str = to.src[start..end]
|
||||
}
|
||||
|
||||
if start != to.s || end != to.e {
|
||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||
println("ERROR!")
|
||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||
assert false
|
||||
break
|
||||
}
|
||||
if start != to.s || end != to.e {
|
||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||
println("ERROR!")
|
||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
// check cgroups
|
||||
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
||||
println("Capturing group len error!")
|
||||
// check cgroups
|
||||
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
||||
println("Capturing group len error!")
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
// check captured groups
|
||||
mut ln := re.group_csave[0]*3
|
||||
for ln > 0 {
|
||||
if re.group_csave[ln] != to.cg[ln] {
|
||||
assert false
|
||||
}
|
||||
ln--
|
||||
}
|
||||
|
||||
// check captured groups
|
||||
mut ln := re.group_csave[0]*3
|
||||
for ln > 0 {
|
||||
if re.group_csave[ln] != to.cg[ln] {
|
||||
assert false
|
||||
}
|
||||
ln--
|
||||
// check named captured groups
|
||||
for k in to.cgn.keys() {
|
||||
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
||||
println("Named capturing group error! [$k]")
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
// check named captured groups
|
||||
for k in to.cgn.keys() {
|
||||
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
||||
println("Named capturing group error! [$k]")
|
||||
assert false
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -225,29 +229,27 @@ fn test_regex(){
|
|||
// debug print
|
||||
//println("#$c [$to.src] q[$to.q] $to.r")
|
||||
|
||||
mut re, re_err, err_pos := regex.regex(to.q)
|
||||
if re_err == regex.compile_ok {
|
||||
res := re.find_all(to.src)
|
||||
if res.len != to.r.len {
|
||||
println("ERROR: find_all, array of different size.")
|
||||
assert false
|
||||
}
|
||||
|
||||
for c1,i in res {
|
||||
if i != to.r[c1] {
|
||||
println("ERROR: find_all, different indexes.")
|
||||
assert false
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
println("query: $to.q")
|
||||
lc := "-".repeat(err_pos-1)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
mut re := regex.regex_opt(to.q) or {
|
||||
eprintln('err: $err')
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
res := re.find_all(to.src)
|
||||
if res.len != to.r.len {
|
||||
println("ERROR: find_all, array of different size.")
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
for c1,i in res {
|
||||
if i != to.r[c1] {
|
||||
println("ERROR: find_all, different indexes.")
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// check replace
|
||||
|
@ -255,97 +257,81 @@ fn test_regex(){
|
|||
// debug print
|
||||
//println("#$c [$to.src] q[$to.q] $to.r")
|
||||
|
||||
mut re, re_err, err_pos := regex.regex(to.q)
|
||||
if re_err == regex.compile_ok {
|
||||
res := re.replace(to.src,to.rep)
|
||||
if res != to.r {
|
||||
println("ERROR: replace.")
|
||||
assert false
|
||||
}
|
||||
|
||||
} else {
|
||||
println("query: $to.q")
|
||||
lc := "-".repeat(err_pos-1)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
mut re := regex.regex_opt(to.q) or {
|
||||
eprintln('err: $err')
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
res := re.replace(to.src,to.rep)
|
||||
if res != to.r {
|
||||
println("ERROR: replace.")
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// check match and find
|
||||
for c,to in match_test_suite {
|
||||
// debug print
|
||||
//println("#$c [$to.src] q[$to.q] $to.s")
|
||||
println("#$c [$to.src] q[$to.q] $to.s $to.e")
|
||||
|
||||
// test the find
|
||||
if to.s > 0 {
|
||||
mut re, re_err, err_pos := regex.regex(to.q)
|
||||
if re_err == regex.compile_ok {
|
||||
//q_str := re.get_query()
|
||||
//println("Query: $q_str")
|
||||
start,end := re.find(to.src)
|
||||
|
||||
if start != to.s || end != to.e {
|
||||
err_str := re.get_parse_error_string(start)
|
||||
println("ERROR : $err_str")
|
||||
assert false
|
||||
} else {
|
||||
//tmp_str := text[start..end]
|
||||
//println("found in [$start, $end] => [$tmp_str]")
|
||||
assert true
|
||||
}
|
||||
|
||||
} else {
|
||||
println("query: $to.q")
|
||||
lc := "-".repeat(err_pos-1)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
mut re := regex.regex_opt(to.q) or {
|
||||
eprintln('err: $err')
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
// q_str := re.get_query()
|
||||
// println("Query: $q_str")
|
||||
start,end := re.find(to.src)
|
||||
|
||||
if start != to.s || end != to.e {
|
||||
err_str := re.get_parse_error_string(start)
|
||||
println("ERROR : $err_str")
|
||||
assert false
|
||||
} else {
|
||||
//tmp_str := text[start..end]
|
||||
//println("found in [$start, $end] => [$tmp_str]")
|
||||
assert true
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// test the match
|
||||
mut re := regex.new_regex()
|
||||
mut re := regex.new()
|
||||
//re.debug = true
|
||||
|
||||
re_err,err_pos := re.compile(to.q)
|
||||
if re_err == regex.compile_ok {
|
||||
//println("#$c [$to.src] q[$to.q]")
|
||||
start, end := re.match_string(to.src)
|
||||
|
||||
mut tmp_str := ""
|
||||
if start >= 0 && end > start{
|
||||
tmp_str = to.src[start..end]
|
||||
}
|
||||
|
||||
if start != to.s || end != to.e {
|
||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||
println("ERROR!")
|
||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||
assert false
|
||||
break
|
||||
}
|
||||
|
||||
// rerun to test consistency
|
||||
tmp_str1 := to.src.clone()
|
||||
start1, end1 := re.match_string(tmp_str1)
|
||||
if start1 != start || end1 != end {
|
||||
println("two run ERROR!!")
|
||||
assert false
|
||||
break
|
||||
}
|
||||
|
||||
} else {
|
||||
println("query: $to.q")
|
||||
lc := "-".repeat(err_pos-1)
|
||||
println("err : $lc^")
|
||||
err_str := re.get_parse_error_string(re_err)
|
||||
println("ERROR: $err_str")
|
||||
re.compile_opt(to.q) or {
|
||||
eprintln('err: $err')
|
||||
assert false
|
||||
break
|
||||
continue
|
||||
}
|
||||
//println("#$c [$to.src] q[$to.q]")
|
||||
start, end := re.match_string(to.src)
|
||||
|
||||
mut tmp_str := ""
|
||||
if start >= 0 && end > start{
|
||||
tmp_str = to.src[start..end]
|
||||
}
|
||||
|
||||
if start != to.s || end != to.e {
|
||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||
println("ERROR!")
|
||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
// rerun to test consistency
|
||||
tmp_str1 := to.src.clone()
|
||||
start1, end1 := re.match_string(tmp_str1)
|
||||
if start1 != start || end1 != end {
|
||||
println("two run ERROR!!")
|
||||
assert false
|
||||
continue
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue