regex: new examples, new utility functions (#7150)
parent
1739b08e73
commit
6b7d7cee0c
|
@ -0,0 +1,80 @@
|
||||||
|
/**********************************************************************
|
||||||
|
* regex samples
|
||||||
|
*
|
||||||
|
* Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||||
|
* Use of this source code is governed by an MIT license
|
||||||
|
* that can be found in the LICENSE file.
|
||||||
|
*
|
||||||
|
* This file contains a collection of regex samples
|
||||||
|
*
|
||||||
|
**********************************************************************/
|
||||||
|
import regex
|
||||||
|
|
||||||
|
/*
|
||||||
|
This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value,
|
||||||
|
this function is not optimized and it si only for didatical purpose
|
||||||
|
example: #A0B0CC #A9F
|
||||||
|
*/
|
||||||
|
fn convert_html_rgb(in_col string) u32 {
|
||||||
|
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||||
|
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||||
|
|
||||||
|
// this is the regex query, it use the V string interpolation to customize the regex query
|
||||||
|
// NOTE: if you want use escaped code you must use the r"" (raw) strings,
|
||||||
|
// *** please remember that the V interpoaltion doesn't work on raw strings. ***
|
||||||
|
|
||||||
|
query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})"
|
||||||
|
|
||||||
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
start, end := re.match_string(in_col)
|
||||||
|
println("start: $start, end: $end")
|
||||||
|
mut res := u32(0)
|
||||||
|
if start >= 0 {
|
||||||
|
group_list := re.get_group_list()
|
||||||
|
r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul
|
||||||
|
g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul
|
||||||
|
b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul
|
||||||
|
println("r: $r g: $g b: $b")
|
||||||
|
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
This function demostrate the use of the named groups
|
||||||
|
*/
|
||||||
|
fn convert_html_rgb_n(in_col string) u32 {
|
||||||
|
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||||
|
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||||
|
|
||||||
|
query:= "#(?P<red>[a-fA-F0-9]{$n_digit})(?P<green>[a-fA-F0-9]{$n_digit})(?P<blue>[a-fA-F0-9]{$n_digit})"
|
||||||
|
|
||||||
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
start, end := re.match_string(in_col)
|
||||||
|
println("start: $start, end: $end")
|
||||||
|
mut res := u32(0)
|
||||||
|
if start >= 0 {
|
||||||
|
red_s, red_e := re.get_group("red")
|
||||||
|
r := ("0x" + in_col[red_s..red_e]).int() << col_mul
|
||||||
|
|
||||||
|
green_s, green_e := re.get_group("green")
|
||||||
|
g := ("0x" + in_col[green_s..green_e]).int() << col_mul
|
||||||
|
|
||||||
|
blue_s, blue_e := re.get_group("blue")
|
||||||
|
b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul
|
||||||
|
|
||||||
|
println("r: $r g: $g b: $b")
|
||||||
|
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// convert HTML rgb color usign groups
|
||||||
|
println(convert_html_rgb("#A0b0Cc").hex())
|
||||||
|
println(convert_html_rgb("#ABC").hex())
|
||||||
|
|
||||||
|
// convert HTML rgb color using maned groups
|
||||||
|
println(convert_html_rgb_n("#A0B0CC").hex())
|
||||||
|
println(convert_html_rgb_n("#ABC").hex())
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
# V RegEx (Regular expression) 0.9g
|
# V RegEx (Regular expression) 0.9h
|
||||||
|
|
||||||
[TOC]
|
[TOC]
|
||||||
|
|
||||||
|
@ -185,6 +185,56 @@ for gi < re.groups.len {
|
||||||
**note:** *to show the `group id number` in the result of the `get_query()`*
|
**note:** *to show the `group id number` in the result of the `get_query()`*
|
||||||
*the flag `debug` of the RE object must be `1` or `2`*
|
*the flag `debug` of the RE object must be `1` or `2`*
|
||||||
|
|
||||||
|
In order to simplify the use of the captured groups it possible to use the
|
||||||
|
utility function: `get_group_list`.
|
||||||
|
|
||||||
|
This function return a list of groups using this support struct:
|
||||||
|
|
||||||
|
```v oksyntax
|
||||||
|
pub
|
||||||
|
struct Re_group {
|
||||||
|
pub:
|
||||||
|
start int = -1
|
||||||
|
end int = -1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Here an example of use:
|
||||||
|
|
||||||
|
```v oksyntax
|
||||||
|
/*
|
||||||
|
This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value,
|
||||||
|
this function is not optimized and it si only for didatical purpose
|
||||||
|
example: #A0B0CC #A9F
|
||||||
|
*/
|
||||||
|
fn convert_html_rgb(in_col string) u32 {
|
||||||
|
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||||
|
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||||
|
|
||||||
|
// this is the regex query, it use the V string interpolation to customize the regex query
|
||||||
|
// NOTE: if you want use escaped code you must use the r"" (raw) strings,
|
||||||
|
// *** please remember that the V interpoaltion doesn't work on raw strings. ***
|
||||||
|
|
||||||
|
query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})"
|
||||||
|
|
||||||
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
start, end := re.match_string(in_col)
|
||||||
|
println("start: $start, end: $end")
|
||||||
|
mut res := u32(0)
|
||||||
|
if start >= 0 {
|
||||||
|
group_list := re.get_group_list() // this is the utility function
|
||||||
|
r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul
|
||||||
|
g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul
|
||||||
|
b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul
|
||||||
|
println("r: $r g: $g b: $b")
|
||||||
|
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Groups Continuous saving
|
### Groups Continuous saving
|
||||||
|
|
||||||
In particular situations it is useful have a continuous save of the groups,
|
In particular situations it is useful have a continuous save of the groups,
|
||||||
|
@ -281,7 +331,7 @@ Have a look at the example for the use of them.
|
||||||
|
|
||||||
example:
|
example:
|
||||||
|
|
||||||
```v oksyntax
|
```v ignore
|
||||||
import regex
|
import regex
|
||||||
fn main() {
|
fn main() {
|
||||||
test_regex()
|
test_regex()
|
||||||
|
@ -367,11 +417,49 @@ named capturing groups:
|
||||||
'token':[42, 46] => 'html'
|
'token':[42, 46] => 'html'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
In order to simplify the use of the named groups it possible to use names map in the `re`
|
||||||
|
struct using the function `re.get_group`.
|
||||||
|
|
||||||
|
Here a more complex example of use:
|
||||||
|
|
||||||
|
```v oksyntax
|
||||||
|
/*
|
||||||
|
This function demostrate the use of the named groups
|
||||||
|
*/
|
||||||
|
fn convert_html_rgb_n(in_col string) u32 {
|
||||||
|
mut n_digit := if in_col.len == 4 { 1 } else { 2 }
|
||||||
|
mut col_mul := if in_col.len == 4 { 4 } else { 0 }
|
||||||
|
|
||||||
|
query:= "#(?P<red>[a-fA-F0-9]{$n_digit})(?P<green>[a-fA-F0-9]{$n_digit})(?P<blue>[a-fA-F0-9]{$n_digit})"
|
||||||
|
|
||||||
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
start, end := re.match_string(in_col)
|
||||||
|
println("start: $start, end: $end")
|
||||||
|
mut res := u32(0)
|
||||||
|
if start >= 0 {
|
||||||
|
red_s, red_e := re.get_group("red")
|
||||||
|
r := ("0x" + in_col[red_s..red_e]).int() << col_mul
|
||||||
|
|
||||||
|
green_s, green_e := re.get_group("green")
|
||||||
|
g := ("0x" + in_col[green_s..green_e]).int() << col_mul
|
||||||
|
|
||||||
|
blue_s, blue_e := re.get_group("blue")
|
||||||
|
b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul
|
||||||
|
|
||||||
|
println("r: $r g: $g b: $b")
|
||||||
|
res = u32(r) << 16 | u32(g) << 8 | u32(b)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Flags
|
## Flags
|
||||||
|
|
||||||
It is possible to set some flags in the regex parser that change the behavior of the parser itself.
|
It is possible to set some flags in the regex parser that change the behavior of the parser itself.
|
||||||
|
|
||||||
```v oksyntax
|
```v ignore
|
||||||
// example of flag settings
|
// example of flag settings
|
||||||
mut re := regex.new()
|
mut re := regex.new()
|
||||||
re.flag = regex.F_BIN
|
re.flag = regex.F_BIN
|
||||||
|
@ -395,14 +483,14 @@ a `RE` struct can be created manually if you needed.
|
||||||
|
|
||||||
#### **Simplified initializer**
|
#### **Simplified initializer**
|
||||||
|
|
||||||
```v
|
```v ignore
|
||||||
// regex create a regex object from the query string and compile it
|
// regex create a regex object from the query string and compile it
|
||||||
pub fn regex_opt(in_query string) ?RE
|
pub fn regex_opt(in_query string) ?RE
|
||||||
```
|
```
|
||||||
|
|
||||||
#### **Base initializer**
|
#### **Base initializer**
|
||||||
|
|
||||||
```v
|
```v ignore
|
||||||
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
||||||
pub fn new() RE
|
pub fn new() RE
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
|
|
||||||
regex 0.9g
|
regex 0.9h
|
||||||
|
|
||||||
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
Copyright (c) 2019-2020 Dario Deledda. All rights reserved.
|
||||||
Use of this source code is governed by an MIT license
|
Use of this source code is governed by an MIT license
|
||||||
|
@ -19,7 +19,7 @@ module regex
|
||||||
import strings
|
import strings
|
||||||
|
|
||||||
pub const(
|
pub const(
|
||||||
v_regex_version = "0.9g" // regex module version
|
v_regex_version = "0.9h" // regex module version
|
||||||
|
|
||||||
max_code_len = 256 // default small base code len for the regex programs
|
max_code_len = 256 // default small base code len for the regex programs
|
||||||
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
|
max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30
|
||||||
|
@ -2381,3 +2381,34 @@ pub fn (mut re RE) replace(in_txt string, repl string) string {
|
||||||
}
|
}
|
||||||
return in_txt
|
return in_txt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
Utilities
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
pub
|
||||||
|
struct Re_group {
|
||||||
|
pub:
|
||||||
|
start int = -1
|
||||||
|
end int = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// get_group_list return a list of Re_group for the found groups
|
||||||
|
pub fn (re RE) get_group_list() []Re_group {
|
||||||
|
mut res := []Re_group{len: re.groups.len >> 1}
|
||||||
|
mut gi := 0
|
||||||
|
//println("len: ${re.groups.len} groups: ${re.groups}")
|
||||||
|
for gi < re.groups.len {
|
||||||
|
if re.groups[gi] >= 0 {
|
||||||
|
//println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
|
||||||
|
tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}
|
||||||
|
//println(tmp)
|
||||||
|
res[gi >> 1] = tmp
|
||||||
|
}
|
||||||
|
gi += 2
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue