regex: new examples, new utility functions (#7150)
parent
1739b08e73
commit
6b7d7cee0c
|
@ -0,0 +1,80 @@
|
|||
/**********************************************************************
|
||||
* regex samples
|
||||
*
|
||||
* Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||
* Use of this source code is governed by an MIT license
|
||||
* that can be found in the LICENSE file.
|
||||
*
|
||||
* This file contains a collection of regex samples
|
||||
*
|
||||
**********************************************************************/
|
||||
import regex
|
||||
|
||||
/*
|
||||
This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value,
|
||||
this function is not optimized and it si only for didatical purpose
|
||||
example: #A0B0CC #A9F
|
||||
*/
|
||||
fn convert_html_rgb(in_col string) u32 {
|
||||
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||
|
||||
// this is the regex query, it use the V string interpolation to customize the regex query
|
||||
// NOTE: if you want use escaped code you must use the r"" (raw) strings,
|
||||
// *** please remember that the V interpoaltion doesn't work on raw strings. ***
|
||||
|
||||
query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})"
|
||||
|
||||
mut re := regex.regex_opt(query) or { panic(err) }
|
||||
start, end := re.match_string(in_col)
|
||||
println("start: $start, end: $end")
|
||||
mut res := u32(0)
|
||||
if start >= 0 {
|
||||
group_list := re.get_group_list()
|
||||
r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul
|
||||
g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul
|
||||
b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul
|
||||
println("r: $r g: $g b: $b")
|
||||
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
/*
|
||||
This function demostrate the use of the named groups
|
||||
*/
|
||||
fn convert_html_rgb_n(in_col string) u32 {
|
||||
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||
|
||||
query:= "#(?P<red>[a-fA-F0-9]{$n_digit})(?P<green>[a-fA-F0-9]{$n_digit})(?P<blue>[a-fA-F0-9]{$n_digit})"
|
||||
|
||||
mut re := regex.regex_opt(query) or { panic(err) }
|
||||
start, end := re.match_string(in_col)
|
||||
println("start: $start, end: $end")
|
||||
mut res := u32(0)
|
||||
if start >= 0 {
|
||||
red_s, red_e := re.get_group("red")
|
||||
r := ("0x" + in_col[red_s..red_e]).int() << col_mul
|
||||
|
||||
green_s, green_e := re.get_group("green")
|
||||
g := ("0x" + in_col[green_s..green_e]).int() << col_mul
|
||||
|
||||
blue_s, blue_e := re.get_group("blue")
|
||||
b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul
|
||||
|
||||
println("r: $r g: $g b: $b")
|
||||
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// convert HTML rgb color usign groups
|
||||
println(convert_html_rgb("#A0b0Cc").hex())
|
||||
println(convert_html_rgb("#ABC").hex())
|
||||
|
||||
// convert HTML rgb color using maned groups
|
||||
println(convert_html_rgb_n("#A0B0CC").hex())
|
||||
println(convert_html_rgb_n("#ABC").hex())
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
# V RegEx (Regular expression) 0.9g
|
||||
# V RegEx (Regular expression) 0.9h
|
||||
|
||||
[TOC]
|
||||
|
||||
|
@ -185,6 +185,56 @@ for gi < re.groups.len {
|
|||
**note:** *to show the `group id number` in the result of the `get_query()`*
|
||||
*the flag `debug` of the RE object must be `1` or `2`*
|
||||
|
||||
In order to simplify the use of the captured groups it possible to use the
|
||||
utility function: `get_group_list`.
|
||||
|
||||
This function return a list of groups using this support struct:
|
||||
|
||||
```v oksyntax
|
||||
pub
|
||||
struct Re_group {
|
||||
pub:
|
||||
start int = -1
|
||||
end int = -1
|
||||
}
|
||||
```
|
||||
|
||||
Here an example of use:
|
||||
|
||||
```v oksyntax
|
||||
/*
|
||||
This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value,
|
||||
this function is not optimized and it si only for didatical purpose
|
||||
example: #A0B0CC #A9F
|
||||
*/
|
||||
fn convert_html_rgb(in_col string) u32 {
|
||||
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||
|
||||
// this is the regex query, it use the V string interpolation to customize the regex query
|
||||
// NOTE: if you want use escaped code you must use the r"" (raw) strings,
|
||||
// *** please remember that the V interpoaltion doesn't work on raw strings. ***
|
||||
|
||||
query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})"
|
||||
|
||||
mut re := regex.regex_opt(query) or { panic(err) }
|
||||
start, end := re.match_string(in_col)
|
||||
println("start: $start, end: $end")
|
||||
mut res := u32(0)
|
||||
if start >= 0 {
|
||||
group_list := re.get_group_list() // this is the utility function
|
||||
r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul
|
||||
g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul
|
||||
b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul
|
||||
println("r: $r g: $g b: $b")
|
||||
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||
}
|
||||
return res
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Groups Continuous saving
|
||||
|
||||
In particular situations it is useful have a continuous save of the groups,
|
||||
|
@ -281,7 +331,7 @@ Have a look at the example for the use of them.
|
|||
|
||||
example:
|
||||
|
||||
```v oksyntax
|
||||
```v ignore
|
||||
import regex
|
||||
fn main() {
|
||||
test_regex()
|
||||
|
@ -367,11 +417,49 @@ named capturing groups:
|
|||
'token':[42, 46] => 'html'
|
||||
```
|
||||
|
||||
In order to simplify the use of the named groups it possible to use names map in the `re`
|
||||
struct using the function `re.get_group`.
|
||||
|
||||
Here a more complex example of use:
|
||||
|
||||
```v oksyntax
|
||||
/*
|
||||
This function demostrate the use of the named groups
|
||||
*/
|
||||
fn convert_html_rgb_n(in_col string) u32 {
|
||||
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||
|
||||
query:= "#(?P<red>[a-fA-F0-9]{$n_digit})(?P<green>[a-fA-F0-9]{$n_digit})(?P<blue>[a-fA-F0-9]{$n_digit})"
|
||||
|
||||
mut re := regex.regex_opt(query) or { panic(err) }
|
||||
start, end := re.match_string(in_col)
|
||||
println("start: $start, end: $end")
|
||||
mut res := u32(0)
|
||||
if start >= 0 {
|
||||
red_s, red_e := re.get_group("red")
|
||||
r := ("0x" + in_col[red_s..red_e]).int() << col_mul
|
||||
|
||||
green_s, green_e := re.get_group("green")
|
||||
g := ("0x" + in_col[green_s..green_e]).int() << col_mul
|
||||
|
||||
blue_s, blue_e := re.get_group("blue")
|
||||
b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul
|
||||
|
||||
println("r: $r g: $g b: $b")
|
||||
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||
}
|
||||
return res
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Flags
|
||||
|
||||
It is possible to set some flags in the regex parser that change the behavior of the parser itself.
|
||||
|
||||
```v oksyntax
|
||||
```v ignore
|
||||
// example of flag settings
|
||||
mut re := regex.new()
|
||||
re.flag = regex.F_BIN
|
||||
|
@ -395,14 +483,14 @@ a `RE` struct can be created manually if you needed.
|
|||
|
||||
#### **Simplified initializer**
|
||||
|
||||
```v
|
||||
```v ignore
|
||||
// regex create a regex object from the query string and compile it
|
||||
pub fn regex_opt(in_query string) ?RE
|
||||
```
|
||||
|
||||
#### **Base initializer**
|
||||
|
||||
```v
|
||||
```v ignore
|
||||
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
||||
pub fn new() RE
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
|
||||
regex 0.9g
|
||||
regex 0.9h
|
||||
|
||||
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||
Use of this source code is governed by an MIT license
|
||||
|
@ -19,7 +19,7 @@ module regex
|
|||
import strings
|
||||
|
||||
pub const(
|
||||
v_regex_version = "0.9g" // regex module version
|
||||
v_regex_version = "0.9h" // regex module version
|
||||
|
||||
max_code_len = 256 // default small base code len for the regex programs
|
||||
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
|
||||
|
@ -2381,3 +2381,34 @@ pub fn (mut re RE) replace(in_txt string, repl string) string {
|
|||
}
|
||||
return in_txt
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Utilities
|
||||
|
||||
*/
|
||||
|
||||
pub
|
||||
struct Re_group {
|
||||
pub:
|
||||
start int = -1
|
||||
end int = -1
|
||||
}
|
||||
|
||||
// get_group_list return a list of Re_group for the found groups
|
||||
pub fn (re RE) get_group_list() []Re_group {
|
||||
mut res := []Re_group{len: re.groups.len >> 1}
|
||||
mut gi := 0
|
||||
//println("len: ${re.groups.len} groups: ${re.groups}")
|
||||
for gi < re.groups.len {
|
||||
if re.groups[gi] >= 0 {
|
||||
//println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
|
||||
tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}
|
||||
//println(tmp)
|
||||
res[gi >> 1] = tmp
|
||||
}
|
||||
gi += 2
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue