regex: new options returning interface to the regex module (#6062)
parent
664c26ab4b
commit
2a4ef2acbd
|
@ -1,4 +1,4 @@
|
||||||
# V RegEx (Regular expression) 0.9d
|
# V RegEx (Regular expression) 0.9g
|
||||||
|
|
||||||
[TOC]
|
[TOC]
|
||||||
|
|
||||||
|
@ -137,7 +137,7 @@ The "capture groups" are store as couple of index in the field `groups` that is
|
||||||
```v
|
```v
|
||||||
text := "cpaz cpapaz cpapapaz"
|
text := "cpaz cpapaz cpapapaz"
|
||||||
query:= r"(c(pa)+z ?)+"
|
query:= r"(c(pa)+z ?)+"
|
||||||
re, _, _ := regex.regex(query)
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
|
||||||
println(re.get_query())
|
println(re.get_query())
|
||||||
// #0(c#1(pa)+z ?)+ // #0 and #1 are the ids of the groups, are shown if re.debug is 1 or 2
|
// #0(c#1(pa)+z ?)+ // #0 and #1 are the ids of the groups, are shown if re.debug is 1 or 2
|
||||||
|
@ -155,8 +155,6 @@ for gi < re.groups.len {
|
||||||
// groups captured
|
// groups captured
|
||||||
// 0 :[cpapapaz]
|
// 0 :[cpapapaz]
|
||||||
// 1 :[pa]
|
// 1 :[pa]
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**note:** *to show the `group id number` in the result of the `get_query()` the flag `debug` of the RE object must be `1` or `2`*
|
**note:** *to show the `group id number` in the result of the `get_query()` the flag `debug` of the RE object must be `1` or `2`*
|
||||||
|
@ -187,48 +185,41 @@ fn example2() {
|
||||||
text := "tst: 01,23,45 ,56, 78"
|
text := "tst: 01,23,45 ,56, 78"
|
||||||
query:= r".*:(\s*\d+[\s,]*)+"
|
query:= r".*:(\s*\d+[\s,]*)+"
|
||||||
|
|
||||||
mut re := regex.new_regex()
|
mut re := new() or { panic(err) }
|
||||||
//re.debug = 2
|
//re.debug = 2
|
||||||
re.group_csave = [-1].repeat(3*20+1) // we expect max 20 records
|
re.group_csave = [-1].repeat(3*20+1) // we expect max 20 records
|
||||||
|
|
||||||
re_err, err_pos := re.compile(query)
|
re.compile_opt(query) or { println(err) return }
|
||||||
if re_err == regex.COMPILE_OK {
|
|
||||||
q_str := re.get_query()
|
|
||||||
println("Query: $q_str")
|
|
||||||
|
|
||||||
start, end := re.match_string(text)
|
|
||||||
if start < 0 {
|
|
||||||
println("ERROR : ${re.get_parse_error_string(start)}, $start")
|
|
||||||
} else {
|
|
||||||
println("found in [$start, $end] => [${text[start..end]}]")
|
|
||||||
}
|
|
||||||
|
|
||||||
// groups capture
|
q_str := re.get_query()
|
||||||
mut gi := 0
|
println("Query: $q_str")
|
||||||
for gi < re.groups.len {
|
|
||||||
if re.groups[gi] >= 0 {
|
|
||||||
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
|
||||||
}
|
|
||||||
gi += 2
|
|
||||||
}
|
|
||||||
|
|
||||||
// continuous saving
|
start, end := re.match_string(text)
|
||||||
gi = 0
|
if start < 0 {
|
||||||
println("num: ${re.group_csave[0]}")
|
println("ERROR : ${re.get_parse_error_string(start)}, $start")
|
||||||
for gi < re.group_csave[0] {
|
} else {
|
||||||
id := re.group_csave[1+gi*3]
|
println("found in [$start, $end] => [${text[start..end]}]")
|
||||||
st := re.group_csave[1+gi*3+1]
|
}
|
||||||
en := re.group_csave[1+gi*3+2]
|
|
||||||
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
// groups capture
|
||||||
gi++
|
mut gi := 0
|
||||||
}
|
for gi < re.groups.len {
|
||||||
} else {
|
if re.groups[gi] >= 0 {
|
||||||
println("query: $query")
|
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
||||||
lc := "-".repeat(err_pos)
|
}
|
||||||
println("err : $lc^")
|
gi += 2
|
||||||
err_str := re.get_parse_error_string(re_err)
|
}
|
||||||
println("ERROR: $err_str")
|
|
||||||
}
|
// continuous saving
|
||||||
|
gi = 0
|
||||||
|
println("num: ${re.group_csave[0]}")
|
||||||
|
for gi < re.group_csave[0] {
|
||||||
|
id := re.group_csave[1+gi*3]
|
||||||
|
st := re.group_csave[1+gi*3+1]
|
||||||
|
en := re.group_csave[1+gi*3+2]
|
||||||
|
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
||||||
|
gi++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -261,73 +252,65 @@ Have a look at the example for the use of them.
|
||||||
example:
|
example:
|
||||||
|
|
||||||
```v
|
```v
|
||||||
|
import regex
|
||||||
fn main() {
|
fn main() {
|
||||||
test_regex()
|
test_regex()
|
||||||
|
|
||||||
text := "http://www.ciao.mondo/hello/pippo12_/pera.html"
|
text := "http://www.ciao.mondo/hello/pippo12_/pera.html"
|
||||||
query:= r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+"
|
query:= r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+.)+"
|
||||||
|
|
||||||
mut re := new_regex()
|
mut re := new()
|
||||||
re.debug = 2
|
re.debug = 2
|
||||||
|
|
||||||
// must provide an array of the right size if want the continuos saving of the groups
|
// must provide an array of the right size if want the continuos saving of the groups
|
||||||
re.group_csave = [-1].repeat(3*20+1)
|
re.group_csave = [-1].repeat(3*20+1)
|
||||||
|
|
||||||
re_err, err_pos := re.compile(query)
|
re.compile_opt(query) or { println(err) return }
|
||||||
if re_err == COMPILE_OK {
|
|
||||||
q_str := re.get_query()
|
|
||||||
println("O.Query: $query")
|
|
||||||
println("Query : $q_str")
|
|
||||||
|
|
||||||
re.debug = 0
|
|
||||||
start, end := re.match_string(text)
|
|
||||||
if start < 0 {
|
|
||||||
err_str := re.get_parse_error_string(start)
|
|
||||||
println("ERROR : $err_str, $start")
|
|
||||||
} else {
|
|
||||||
text1 := text[start..end]
|
|
||||||
println("found in [$start, $end] => [$text1]")
|
|
||||||
}
|
|
||||||
|
|
||||||
// groups
|
q_str := re.get_query()
|
||||||
mut gi := 0
|
println("O.Query: $query")
|
||||||
for gi < re.groups.len {
|
println("Query : $q_str")
|
||||||
if re.groups[gi] >= 0 {
|
|
||||||
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
re.debug = 0
|
||||||
}
|
start, end := re.match_string(text)
|
||||||
gi += 2
|
if start < 0 {
|
||||||
}
|
err_str := re.get_parse_error_string(start)
|
||||||
// continuous saving
|
println("ERROR : $err_str, $start")
|
||||||
gi = 0
|
} else {
|
||||||
println("num of group item saved: ${re.group_csave[0]}")
|
text1 := text[start..end]
|
||||||
for gi < re.group_csave[0] {
|
println("found in [$start, $end] => [$text1]")
|
||||||
id := re.group_csave[1+gi*3]
|
}
|
||||||
st := re.group_csave[1+gi*3+1]
|
|
||||||
en := re.group_csave[1+gi*3+2]
|
|
||||||
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
|
||||||
gi++
|
|
||||||
}
|
|
||||||
println("raw array: ${re.group_csave[0..gi*3+2-1]}")
|
|
||||||
|
|
||||||
// named capturing groups
|
// groups
|
||||||
println("named capturing groups:")
|
mut gi := 0
|
||||||
for g_name in re.group_map.keys() {
|
for gi < re.groups.len {
|
||||||
s,e := re.get_group(g_name)
|
if re.groups[gi] >= 0 {
|
||||||
if s >= 0 && e > s {
|
println("${gi/2} ${re.groups[gi]},${re.groups[gi+1]} :[${text[re.groups[gi]..re.groups[gi+1]]}]")
|
||||||
println("'${g_name}':[$s, $e] => '${text[s..e]}'")
|
}
|
||||||
} else {
|
gi += 2
|
||||||
println("Group [${g_name}] doesn't exist.")
|
}
|
||||||
}
|
// continuous saving
|
||||||
}
|
gi = 0
|
||||||
|
println("num of group item saved: ${re.group_csave[0]}")
|
||||||
} else {
|
for gi < re.group_csave[0] {
|
||||||
println("query: $query")
|
id := re.group_csave[1+gi*3]
|
||||||
lc := "-".repeat(err_pos)
|
st := re.group_csave[1+gi*3+1]
|
||||||
println("err : $lc^")
|
en := re.group_csave[1+gi*3+2]
|
||||||
err_str := re.get_parse_error_string(re_err)
|
println("cg id: ${id} [${st}, ${en}] => [${text[st..en]}]")
|
||||||
println("ERROR: $err_str")
|
gi++
|
||||||
}
|
}
|
||||||
|
println("raw array: ${re.group_csave[0..gi*3+2-1]}")
|
||||||
|
|
||||||
|
// named capturing groups
|
||||||
|
println("named capturing groups:")
|
||||||
|
for g_name in re.group_map.keys() {
|
||||||
|
s,e := re.get_group(g_name)
|
||||||
|
if s >= 0 && e > s {
|
||||||
|
println("'${g_name}':[$s, $e] => '${text[s..e]}'")
|
||||||
|
} else {
|
||||||
|
println("Group [${g_name}] doesn't exist.")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -360,7 +343,7 @@ It is possible to set some flags in the regex parser that change the behavior of
|
||||||
|
|
||||||
```v
|
```v
|
||||||
// example of flag settings
|
// example of flag settings
|
||||||
mut re := regex.new_regex()
|
mut re := regex.new()
|
||||||
re.flag = regex.F_BIN
|
re.flag = regex.F_BIN
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -382,22 +365,22 @@ These functions are helper that create the `RE` struct, a `RE` struct can be cre
|
||||||
|
|
||||||
```v
|
```v
|
||||||
// regex create a regex object from the query string and compile it
|
// regex create a regex object from the query string and compile it
|
||||||
pub fn regex(in_query string) (RE,int,int)
|
pub fn regex_opt(in_query string) ?RE
|
||||||
```
|
```
|
||||||
|
|
||||||
#### **Base initializer**
|
#### **Base initializer**
|
||||||
|
|
||||||
```v
|
```v
|
||||||
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
||||||
pub fn new_regex() RE
|
pub fn new() RE
|
||||||
|
|
||||||
// new_regex_by_size create a REgex of large size, mult specify the scale factor of the memory that will be allocated
|
// new_regex_by_size create a REgex of large size, mult specify the scale factor of the memory that will be allocated
|
||||||
pub fn new_regex_by_size(mult int) RE
|
pub fn new_by_size(mult int) RE
|
||||||
```
|
```
|
||||||
After a base initializer is used, the regex expression must be compiled with:
|
After a base initializer is used, the regex expression must be compiled with:
|
||||||
```v
|
```v
|
||||||
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
|
// compile compiles the REgex returning an error if the compilation fails
|
||||||
pub fn (re mut RE) compile(in_txt string) (int,int)
|
pub fn (re mut RE) compile_opt(in_txt string) ?
|
||||||
```
|
```
|
||||||
|
|
||||||
### Operative Functions
|
### Operative Functions
|
||||||
|
@ -428,20 +411,9 @@ the following example code show how to visualize the syntax errors in the compil
|
||||||
|
|
||||||
```v
|
```v
|
||||||
query:= r"ciao da ab[ab-]" // there is an error, a range not closed!!
|
query:= r"ciao da ab[ab-]" // there is an error, a range not closed!!
|
||||||
mut re := new_regex()
|
mut re := new()
|
||||||
|
|
||||||
// re_err ==> is the return value, if < 0 it is an error
|
re.compile_opt(query) or { println(err) }
|
||||||
// re_pos ==> if re_err < 0, re_pos is the error index in the query string
|
|
||||||
re_err, err_pos := re.compile(query)
|
|
||||||
|
|
||||||
// print the error if one happen
|
|
||||||
if re_err != COMPILE_OK {
|
|
||||||
println("query: $query")
|
|
||||||
lc := "-".repeat(err_pos)
|
|
||||||
println("err : $lc^")
|
|
||||||
err_str := re.get_parse_error_string(re_err) // get the error string
|
|
||||||
println("ERROR: $err_str")
|
|
||||||
}
|
|
||||||
|
|
||||||
// output!!
|
// output!!
|
||||||
|
|
||||||
|
@ -543,7 +515,7 @@ fn custom_print(txt string) {
|
||||||
println("my log: $txt")
|
println("my log: $txt")
|
||||||
}
|
}
|
||||||
|
|
||||||
mut re := new_regex()
|
mut re := new()
|
||||||
re.log_func = custom_print // every debug output from now will call this function
|
re.log_func = custom_print // every debug output from now will call this function
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -571,38 +543,29 @@ tests = [
|
||||||
|
|
||||||
fn example() {
|
fn example() {
|
||||||
for c,tst in tests {
|
for c,tst in tests {
|
||||||
mut re := regex.new_regex()
|
mut re := regex.new()
|
||||||
re_err, err_pos := re.compile(tst.query)
|
re.compile_opt(tst.query) or { println(err) continue }
|
||||||
if re_err == regex.COMPILE_OK {
|
|
||||||
|
|
||||||
// print the query parsed with the groups ids
|
// print the query parsed with the groups ids
|
||||||
re.debug = 1 // set debug on at minimum level
|
re.debug = 1 // set debug on at minimum level
|
||||||
println("#${c:2d} query parsed: ${re.get_query()}")
|
println("#${c:2d} query parsed: ${re.get_query()}")
|
||||||
re.debug = 0
|
re.debug = 0
|
||||||
|
|
||||||
// do the match
|
// do the match
|
||||||
start, end := re.match_string(tst.source)
|
start, end := re.match_string(tst.source)
|
||||||
if start >= 0 && end > start {
|
if start >= 0 && end > start {
|
||||||
println("#${c:2d} found in: [$start, $end] => [${tst.source[start..end]}]")
|
println("#${c:2d} found in: [$start, $end] => [${tst.source[start..end]}]")
|
||||||
}
|
}
|
||||||
|
|
||||||
// print the groups
|
// print the groups
|
||||||
mut gi := 0
|
mut gi := 0
|
||||||
for gi < re.groups.len {
|
for gi < re.groups.len {
|
||||||
if re.groups[gi] >= 0 {
|
if re.groups[gi] >= 0 {
|
||||||
println("group ${gi/2:2d} :[${tst.source[re.groups[gi]..re.groups[gi+1]]}]")
|
println("group ${gi/2:2d} :[${tst.source[re.groups[gi]..re.groups[gi+1]]}]")
|
||||||
}
|
}
|
||||||
gi += 2
|
gi += 2
|
||||||
}
|
}
|
||||||
println("")
|
println("")
|
||||||
} else {
|
|
||||||
// print the compile error
|
|
||||||
println("query: $tst.query")
|
|
||||||
lc := "-".repeat(err_pos-1)
|
|
||||||
println("err : $lc^")
|
|
||||||
err_str := re.get_parse_error_string(re_err)
|
|
||||||
println("ERROR: $err_str")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
|
|
||||||
regex 0.9e
|
regex 0.9g
|
||||||
|
|
||||||
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||||
Use of this source code is governed by an MIT license
|
Use of this source code is governed by an MIT license
|
||||||
|
@ -19,7 +19,7 @@ module regex
|
||||||
import strings
|
import strings
|
||||||
|
|
||||||
pub const(
|
pub const(
|
||||||
v_regex_version = "0.9e" // regex module version
|
v_regex_version = "0.9g" // regex module version
|
||||||
|
|
||||||
max_code_len = 256 // default small base code len for the regex programs
|
max_code_len = 256 // default small base code len for the regex programs
|
||||||
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
|
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
|
||||||
|
@ -912,7 +912,12 @@ fn (re RE) parse_groups(in_txt string, in_i int) (int, bool, string, int) {
|
||||||
// main compiler
|
// main compiler
|
||||||
//
|
//
|
||||||
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
|
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
|
||||||
|
[deprecated]
|
||||||
pub fn (mut re RE) compile(in_txt string) (int,int) {
|
pub fn (mut re RE) compile(in_txt string) (int,int) {
|
||||||
|
return re.impl_compile(in_txt)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn (mut re RE) impl_compile(in_txt string) (int,int) {
|
||||||
mut i := 0 // input string index
|
mut i := 0 // input string index
|
||||||
mut pc := 0 // program counter
|
mut pc := 0 // program counter
|
||||||
mut tmp_code := u32(0)
|
mut tmp_code := u32(0)
|
||||||
|
@ -2187,6 +2192,7 @@ Public functions
|
||||||
//
|
//
|
||||||
|
|
||||||
// regex create a regex object from the query string
|
// regex create a regex object from the query string
|
||||||
|
[deprecated]
|
||||||
pub fn regex(in_query string) (RE,int,int){
|
pub fn regex(in_query string) (RE,int,int){
|
||||||
mut re := RE{}
|
mut re := RE{}
|
||||||
re.prog = [Token{}].repeat(in_query.len+1)
|
re.prog = [Token{}].repeat(in_query.len+1)
|
||||||
|
@ -2198,12 +2204,17 @@ pub fn regex(in_query string) (RE,int,int){
|
||||||
}
|
}
|
||||||
|
|
||||||
// new_regex create a RE of small size, usually sufficient for ordinary use
|
// new_regex create a RE of small size, usually sufficient for ordinary use
|
||||||
|
[deprecated]
|
||||||
pub fn new_regex() RE {
|
pub fn new_regex() RE {
|
||||||
return new_regex_by_size(1)
|
return impl_new_regex_by_size(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
||||||
|
[deprecated]
|
||||||
pub fn new_regex_by_size(mult int) RE {
|
pub fn new_regex_by_size(mult int) RE {
|
||||||
|
return impl_new_regex_by_size(mult)
|
||||||
|
}
|
||||||
|
fn impl_new_regex_by_size(mult int) RE {
|
||||||
mut re := RE{}
|
mut re := RE{}
|
||||||
re.prog = [Token{}].repeat(max_code_len*mult) // max program length, default 256 istructions
|
re.prog = [Token{}].repeat(max_code_len*mult) // max program length, default 256 istructions
|
||||||
re.cc = [CharClass{}].repeat(max_code_len*mult) // char class list
|
re.cc = [CharClass{}].repeat(max_code_len*mult) // char class list
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
module regex
|
||||||
|
import strings
|
||||||
|
|
||||||
|
// compile_opt compile RE pattern string
|
||||||
|
pub fn (mut re RE) compile_opt(pattern string) ? {
|
||||||
|
re_err,err_pos := re.impl_compile(pattern)
|
||||||
|
|
||||||
|
if re_err != compile_ok {
|
||||||
|
mut err_msg := strings.new_builder(300)
|
||||||
|
err_msg.write("query: $pattern\n")
|
||||||
|
line := "-".repeat(err_pos)
|
||||||
|
err_msg.write("err : ${line}^\n")
|
||||||
|
err_str := re.get_parse_error_string(re_err)
|
||||||
|
err_msg.write("ERROR: $err_str\n")
|
||||||
|
return error_with_code(err_msg.str(), re_err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// new_regex create a RE of small size, usually sufficient for ordinary use
|
||||||
|
pub fn new() RE {
|
||||||
|
return impl_new_regex_by_size(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// new_regex_by_size create a RE of large size, mult specify the scale factor of the memory that will be allocated
|
||||||
|
pub fn new_by_size(mult int) RE {
|
||||||
|
return impl_new_regex_by_size(mult)
|
||||||
|
}
|
||||||
|
|
||||||
|
// regex_opt create new RE object from RE pattern string
|
||||||
|
pub fn regex_opt(pattern string) ?RE {
|
||||||
|
mut re := new()
|
||||||
|
re.compile_opt(pattern)?
|
||||||
|
return re
|
||||||
|
}
|
|
@ -175,48 +175,52 @@ fn test_regex(){
|
||||||
// debug print
|
// debug print
|
||||||
//println("#$c [$to.src] q[$to.q] ($to.s, $to.e)")
|
//println("#$c [$to.src] q[$to.q] ($to.s, $to.e)")
|
||||||
|
|
||||||
mut re, re_err, _ := regex.regex(to.q)
|
mut re := regex.regex_opt(to.q) or {
|
||||||
|
eprintln('err: $err')
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
re.group_csave = [-1].repeat(3*20+1)
|
re.group_csave = [-1].repeat(3*20+1)
|
||||||
|
|
||||||
if re_err == regex.compile_ok {
|
start, end := re.match_string(to.src)
|
||||||
start, end := re.match_string(to.src)
|
|
||||||
|
|
||||||
mut tmp_str := ""
|
mut tmp_str := ""
|
||||||
if start >= 0 && end > start{
|
if start >= 0 && end > start{
|
||||||
tmp_str = to.src[start..end]
|
tmp_str = to.src[start..end]
|
||||||
}
|
}
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
if start != to.s || end != to.e {
|
||||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||||
println("ERROR!")
|
println("ERROR!")
|
||||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||||
assert false
|
assert false
|
||||||
break
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// check cgroups
|
// check cgroups
|
||||||
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
|
||||||
println("Capturing group len error!")
|
println("Capturing group len error!")
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// check captured groups
|
||||||
|
mut ln := re.group_csave[0]*3
|
||||||
|
for ln > 0 {
|
||||||
|
if re.group_csave[ln] != to.cg[ln] {
|
||||||
assert false
|
assert false
|
||||||
}
|
}
|
||||||
|
ln--
|
||||||
|
}
|
||||||
|
|
||||||
// check captured groups
|
// check named captured groups
|
||||||
mut ln := re.group_csave[0]*3
|
for k in to.cgn.keys() {
|
||||||
for ln > 0 {
|
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
||||||
if re.group_csave[ln] != to.cg[ln] {
|
println("Named capturing group error! [$k]")
|
||||||
assert false
|
assert false
|
||||||
}
|
continue
|
||||||
ln--
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check named captured groups
|
|
||||||
for k in to.cgn.keys() {
|
|
||||||
if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
|
|
||||||
println("Named capturing group error! [$k]")
|
|
||||||
assert false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -225,29 +229,27 @@ fn test_regex(){
|
||||||
// debug print
|
// debug print
|
||||||
//println("#$c [$to.src] q[$to.q] $to.r")
|
//println("#$c [$to.src] q[$to.q] $to.r")
|
||||||
|
|
||||||
mut re, re_err, err_pos := regex.regex(to.q)
|
mut re := regex.regex_opt(to.q) or {
|
||||||
if re_err == regex.compile_ok {
|
eprintln('err: $err')
|
||||||
res := re.find_all(to.src)
|
|
||||||
if res.len != to.r.len {
|
|
||||||
println("ERROR: find_all, array of different size.")
|
|
||||||
assert false
|
|
||||||
}
|
|
||||||
|
|
||||||
for c1,i in res {
|
|
||||||
if i != to.r[c1] {
|
|
||||||
println("ERROR: find_all, different indexes.")
|
|
||||||
assert false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
println("query: $to.q")
|
|
||||||
lc := "-".repeat(err_pos-1)
|
|
||||||
println("err : $lc^")
|
|
||||||
err_str := re.get_parse_error_string(re_err)
|
|
||||||
println("ERROR: $err_str")
|
|
||||||
assert false
|
assert false
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
res := re.find_all(to.src)
|
||||||
|
if res.len != to.r.len {
|
||||||
|
println("ERROR: find_all, array of different size.")
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for c1,i in res {
|
||||||
|
if i != to.r[c1] {
|
||||||
|
println("ERROR: find_all, different indexes.")
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check replace
|
// check replace
|
||||||
|
@ -255,97 +257,81 @@ fn test_regex(){
|
||||||
// debug print
|
// debug print
|
||||||
//println("#$c [$to.src] q[$to.q] $to.r")
|
//println("#$c [$to.src] q[$to.q] $to.r")
|
||||||
|
|
||||||
mut re, re_err, err_pos := regex.regex(to.q)
|
mut re := regex.regex_opt(to.q) or {
|
||||||
if re_err == regex.compile_ok {
|
eprintln('err: $err')
|
||||||
res := re.replace(to.src,to.rep)
|
|
||||||
if res != to.r {
|
|
||||||
println("ERROR: replace.")
|
|
||||||
assert false
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
println("query: $to.q")
|
|
||||||
lc := "-".repeat(err_pos-1)
|
|
||||||
println("err : $lc^")
|
|
||||||
err_str := re.get_parse_error_string(re_err)
|
|
||||||
println("ERROR: $err_str")
|
|
||||||
assert false
|
assert false
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
res := re.replace(to.src,to.rep)
|
||||||
|
if res != to.r {
|
||||||
|
println("ERROR: replace.")
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// check match and find
|
// check match and find
|
||||||
for c,to in match_test_suite {
|
for c,to in match_test_suite {
|
||||||
// debug print
|
// debug print
|
||||||
//println("#$c [$to.src] q[$to.q] $to.s")
|
println("#$c [$to.src] q[$to.q] $to.s $to.e")
|
||||||
|
|
||||||
// test the find
|
// test the find
|
||||||
if to.s > 0 {
|
if to.s > 0 {
|
||||||
mut re, re_err, err_pos := regex.regex(to.q)
|
mut re := regex.regex_opt(to.q) or {
|
||||||
if re_err == regex.compile_ok {
|
eprintln('err: $err')
|
||||||
//q_str := re.get_query()
|
|
||||||
//println("Query: $q_str")
|
|
||||||
start,end := re.find(to.src)
|
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
|
||||||
err_str := re.get_parse_error_string(start)
|
|
||||||
println("ERROR : $err_str")
|
|
||||||
assert false
|
|
||||||
} else {
|
|
||||||
//tmp_str := text[start..end]
|
|
||||||
//println("found in [$start, $end] => [$tmp_str]")
|
|
||||||
assert true
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
println("query: $to.q")
|
|
||||||
lc := "-".repeat(err_pos-1)
|
|
||||||
println("err : $lc^")
|
|
||||||
err_str := re.get_parse_error_string(re_err)
|
|
||||||
println("ERROR: $err_str")
|
|
||||||
assert false
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// q_str := re.get_query()
|
||||||
|
// println("Query: $q_str")
|
||||||
|
start,end := re.find(to.src)
|
||||||
|
|
||||||
|
if start != to.s || end != to.e {
|
||||||
|
err_str := re.get_parse_error_string(start)
|
||||||
|
println("ERROR : $err_str")
|
||||||
|
assert false
|
||||||
|
} else {
|
||||||
|
//tmp_str := text[start..end]
|
||||||
|
//println("found in [$start, $end] => [$tmp_str]")
|
||||||
|
assert true
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// test the match
|
// test the match
|
||||||
mut re := regex.new_regex()
|
mut re := regex.new()
|
||||||
//re.debug = true
|
//re.debug = true
|
||||||
|
|
||||||
re_err,err_pos := re.compile(to.q)
|
re.compile_opt(to.q) or {
|
||||||
if re_err == regex.compile_ok {
|
eprintln('err: $err')
|
||||||
//println("#$c [$to.src] q[$to.q]")
|
|
||||||
start, end := re.match_string(to.src)
|
|
||||||
|
|
||||||
mut tmp_str := ""
|
|
||||||
if start >= 0 && end > start{
|
|
||||||
tmp_str = to.src[start..end]
|
|
||||||
}
|
|
||||||
|
|
||||||
if start != to.s || end != to.e {
|
|
||||||
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
|
||||||
println("ERROR!")
|
|
||||||
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
|
||||||
assert false
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// rerun to test consistency
|
|
||||||
tmp_str1 := to.src.clone()
|
|
||||||
start1, end1 := re.match_string(tmp_str1)
|
|
||||||
if start1 != start || end1 != end {
|
|
||||||
println("two run ERROR!!")
|
|
||||||
assert false
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
println("query: $to.q")
|
|
||||||
lc := "-".repeat(err_pos-1)
|
|
||||||
println("err : $lc^")
|
|
||||||
err_str := re.get_parse_error_string(re_err)
|
|
||||||
println("ERROR: $err_str")
|
|
||||||
assert false
|
assert false
|
||||||
break
|
continue
|
||||||
}
|
}
|
||||||
|
//println("#$c [$to.src] q[$to.q]")
|
||||||
|
start, end := re.match_string(to.src)
|
||||||
|
|
||||||
|
mut tmp_str := ""
|
||||||
|
if start >= 0 && end > start{
|
||||||
|
tmp_str = to.src[start..end]
|
||||||
|
}
|
||||||
|
|
||||||
|
if start != to.s || end != to.e {
|
||||||
|
println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end")
|
||||||
|
println("ERROR!")
|
||||||
|
//C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e)
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// rerun to test consistency
|
||||||
|
tmp_str1 := to.src.clone()
|
||||||
|
start1, end1 := re.match_string(tmp_str1)
|
||||||
|
if start1 != start || end1 != end {
|
||||||
|
println("two run ERROR!!")
|
||||||
|
assert false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue