regex: new examples, new utility functions (#7150)

pull/7152/head
penguindark 2020-12-05 22:24:08 +01:00 committed by GitHub
parent 1739b08e73
commit 6b7d7cee0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 206 additions and 7 deletions

View File

@ -0,0 +1,80 @@
/**********************************************************************
* regex samples
*
* Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
* Use of this source code is governed by an MIT license
* that can be found in the LICENSE file.
*
* This file contains a collection of regex samples
*
**********************************************************************/
import regex
/*
This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value,
this function is not optimized and it si only for didatical purpose
example: #A0B0CC #A9F
*/
fn convert_html_rgb(in_col string) u32 {
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
// this is the regex query, it use the V string interpolation to customize the regex query
// NOTE: if you want use escaped code you must use the r"" (raw) strings,
// *** please remember that the V interpoaltion doesn't work on raw strings. ***
query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})"
mut re := regex.regex_opt(query) or { panic(err) }
start, end := re.match_string(in_col)
println("start: $start, end: $end")
mut res := u32(0)
if start >= 0 {
group_list := re.get_group_list()
r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul
g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul
b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul
println("r: $r g: $g b: $b")
res = u32(r) << 16 | u32(g) << 8 | u32(b)
}
return res
}
/*
This function demostrate the use of the named groups
*/
fn convert_html_rgb_n(in_col string) u32 {
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
query:= "#(?P<red>[a-fA-F0-9]{$n_digit})(?P<green>[a-fA-F0-9]{$n_digit})(?P<blue>[a-fA-F0-9]{$n_digit})"
mut re := regex.regex_opt(query) or { panic(err) }
start, end := re.match_string(in_col)
println("start: $start, end: $end")
mut res := u32(0)
if start >= 0 {
red_s, red_e := re.get_group("red")
r := ("0x" + in_col[red_s..red_e]).int() << col_mul
green_s, green_e := re.get_group("green")
g := ("0x" + in_col[green_s..green_e]).int() << col_mul
blue_s, blue_e := re.get_group("blue")
b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul
println("r: $r g: $g b: $b")
res = u32(r) << 16 | u32(g) << 8 | u32(b)
}
return res
}
fn main() {
// convert HTML rgb color usign groups
println(convert_html_rgb("#A0b0Cc").hex())
println(convert_html_rgb("#ABC").hex())
// convert HTML rgb color using maned groups
println(convert_html_rgb_n("#A0B0CC").hex())
println(convert_html_rgb_n("#ABC").hex())
}

View File

@ -1,4 +1,4 @@
# V RegEx (Regular expression) 0.9g # V RegEx (Regular expression) 0.9h
[TOC] [TOC]
@ -185,6 +185,56 @@ for gi < re.groups.len {
**note:** *to show the `group id number` in the result of the `get_query()`* **note:** *to show the `group id number` in the result of the `get_query()`*
*the flag `debug` of the RE object must be `1` or `2`* *the flag `debug` of the RE object must be `1` or `2`*
In order to simplify the use of the captured groups it possible to use the
utility function: `get_group_list`.
This function return a list of groups using this support struct:
```v oksyntax
pub
struct Re_group {
pub:
start int = -1
end int = -1
}
```
Here an example of use:
```v oksyntax
/*
This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value,
this function is not optimized and it si only for didatical purpose
example: #A0B0CC #A9F
*/
fn convert_html_rgb(in_col string) u32 {
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
// this is the regex query, it use the V string interpolation to customize the regex query
// NOTE: if you want use escaped code you must use the r"" (raw) strings,
// *** please remember that the V interpoaltion doesn't work on raw strings. ***
query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})"
mut re := regex.regex_opt(query) or { panic(err) }
start, end := re.match_string(in_col)
println("start: $start, end: $end")
mut res := u32(0)
if start >= 0 {
group_list := re.get_group_list() // this is the utility function
r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul
g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul
b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul
println("r: $r g: $g b: $b")
res = u32(r) << 16 | u32(g) << 8 | u32(b)
}
return res
}
```
### Groups Continuous saving ### Groups Continuous saving
In particular situations it is useful have a continuous save of the groups, In particular situations it is useful have a continuous save of the groups,
@ -281,7 +331,7 @@ Have a look at the example for the use of them.
example: example:
```v oksyntax ```v ignore
import regex import regex
fn main() { fn main() {
test_regex() test_regex()
@ -367,11 +417,49 @@ named capturing groups:
'token':[42, 46] => 'html' 'token':[42, 46] => 'html'
``` ```
In order to simplify the use of the named groups it possible to use names map in the `re`
struct using the function `re.get_group`.
Here a more complex example of use:
```v oksyntax
/*
This function demostrate the use of the named groups
*/
fn convert_html_rgb_n(in_col string) u32 {
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
query:= "#(?P<red>[a-fA-F0-9]{$n_digit})(?P<green>[a-fA-F0-9]{$n_digit})(?P<blue>[a-fA-F0-9]{$n_digit})"
mut re := regex.regex_opt(query) or { panic(err) }
start, end := re.match_string(in_col)
println("start: $start, end: $end")
mut res := u32(0)
if start >= 0 {
red_s, red_e := re.get_group("red")
r := ("0x" + in_col[red_s..red_e]).int() << col_mul
green_s, green_e := re.get_group("green")
g := ("0x" + in_col[green_s..green_e]).int() << col_mul
blue_s, blue_e := re.get_group("blue")
b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul
println("r: $r g: $g b: $b")
res = u32(r) << 16 | u32(g) << 8 | u32(b)
}
return res
}
```
## Flags ## Flags
It is possible to set some flags in the regex parser that change the behavior of the parser itself. It is possible to set some flags in the regex parser that change the behavior of the parser itself.
```v oksyntax ```v ignore
// example of flag settings // example of flag settings
mut re := regex.new() mut re := regex.new()
re.flag = regex.F_BIN re.flag = regex.F_BIN
@ -395,14 +483,14 @@ a `RE` struct can be created manually if you needed.
#### **Simplified initializer** #### **Simplified initializer**
```v ```v ignore
// regex create a regex object from the query string and compile it // regex create a regex object from the query string and compile it
pub fn regex_opt(in_query string) ?RE pub fn regex_opt(in_query string) ?RE
``` ```
#### **Base initializer** #### **Base initializer**
```v ```v ignore
// new_regex create a REgex of small size, usually sufficient for ordinary use // new_regex create a REgex of small size, usually sufficient for ordinary use
pub fn new() RE pub fn new() RE

View File

@ -1,6 +1,6 @@
/* /*
regex 0.9g regex 0.9h
Copyright (c) 2019-2020 Dario Deledda. All rights reserved. Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license Use of this source code is governed by an MIT license
@ -19,7 +19,7 @@ module regex
import strings import strings
pub const( pub const(
v_regex_version = "0.9g" // regex module version v_regex_version = "0.9h" // regex module version
max_code_len = 256 // default small base code len for the regex programs max_code_len = 256 // default small base code len for the regex programs
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30 max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
@ -2381,3 +2381,34 @@ pub fn (mut re RE) replace(in_txt string, repl string) string {
} }
return in_txt return in_txt
} }
/*
Utilities
*/
pub
struct Re_group {
pub:
start int = -1
end int = -1
}
// get_group_list return a list of Re_group for the found groups
pub fn (re RE) get_group_list() []Re_group {
mut res := []Re_group{len: re.groups.len >> 1}
mut gi := 0
//println("len: ${re.groups.len} groups: ${re.groups}")
for gi < re.groups.len {
if re.groups[gi] >= 0 {
//println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}
//println(tmp)
res[gi >> 1] = tmp
}
gi += 2
}
return res
}