diff --git a/examples/regex_example.v b/examples/regex_example.v new file mode 100644 index 0000000000..b76d668b9d --- /dev/null +++ b/examples/regex_example.v @@ -0,0 +1,80 @@ +/********************************************************************** +* regex samples +* +* Copyright (c) 2019-2020 Dario Deledda. All rights reserved. +* Use of this source code is governed by an MIT license +* that can be found in the LICENSE file. +* +* This file contains a collection of regex samples +* +**********************************************************************/ +import regex + +/* +This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value, +this function is not optimized and it si only for didatical purpose +example: #A0B0CC #A9F +*/ +fn convert_html_rgb(in_col string) u32 { + mut n_digit := if in_col.len == 4 { 1 } else { 2 } + mut col_mul := if in_col.len == 4 { 4 } else { 0 } + + // this is the regex query, it use the V string interpolation to customize the regex query + // NOTE: if you want use escaped code you must use the r"" (raw) strings, + // *** please remember that the V interpoaltion doesn't work on raw strings. *** + + query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})" + + mut re := regex.regex_opt(query) or { panic(err) } + start, end := re.match_string(in_col) + println("start: $start, end: $end") + mut res := u32(0) + if start >= 0 { + group_list := re.get_group_list() + r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul + g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul + b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul + println("r: $r g: $g b: $b") + res = u32(r) << 16 | u32(g) << 8 | u32(b) + } + return res +} + +/* +This function demostrate the use of the named groups +*/ +fn convert_html_rgb_n(in_col string) u32 { + mut n_digit := if in_col.len == 4 { 1 } else { 2 } + mut col_mul := if in_col.len == 4 { 4 } else { 0 } + + query:= "#(?P[a-fA-F0-9]{$n_digit})(?P[a-fA-F0-9]{$n_digit})(?P[a-fA-F0-9]{$n_digit})" + + mut re := regex.regex_opt(query) or { panic(err) } + start, end := re.match_string(in_col) + println("start: $start, end: $end") + mut res := u32(0) + if start >= 0 { + red_s, red_e := re.get_group("red") + r := ("0x" + in_col[red_s..red_e]).int() << col_mul + + green_s, green_e := re.get_group("green") + g := ("0x" + in_col[green_s..green_e]).int() << col_mul + + blue_s, blue_e := re.get_group("blue") + b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul + + println("r: $r g: $g b: $b") + res = u32(r) << 16 | u32(g) << 8 | u32(b) + } + return res +} + +fn main() { + // convert HTML rgb color usign groups + println(convert_html_rgb("#A0b0Cc").hex()) + println(convert_html_rgb("#ABC").hex()) + + // convert HTML rgb color using maned groups + println(convert_html_rgb_n("#A0B0CC").hex()) + println(convert_html_rgb_n("#ABC").hex()) +} \ No newline at end of file diff --git a/vlib/regex/README.md b/vlib/regex/README.md index 86062f0798..b66c7ebf68 100644 --- a/vlib/regex/README.md +++ b/vlib/regex/README.md @@ -1,4 +1,4 @@ -# V RegEx (Regular expression) 0.9g +# V RegEx (Regular expression) 0.9h [TOC] @@ -185,6 +185,56 @@ for gi < re.groups.len { **note:** *to show the `group id number` in the result of the `get_query()`* *the flag `debug` of the RE object must be `1` or `2`* +In order to simplify the use of the captured groups it possible to use the +utility function: `get_group_list`. + +This function return a list of groups using this support struct: + +```v oksyntax +pub +struct Re_group { +pub: + start int = -1 + end int = -1 +} +``` + +Here an example of use: + +```v oksyntax +/* +This simple function convert an HTML RGB value with 3 or 6 hex digits to an u32 value, +this function is not optimized and it si only for didatical purpose +example: #A0B0CC #A9F +*/ +fn convert_html_rgb(in_col string) u32 { + mut n_digit := if in_col.len == 4 { 1 } else { 2 } + mut col_mul := if in_col.len == 4 { 4 } else { 0 } + + // this is the regex query, it use the V string interpolation to customize the regex query + // NOTE: if you want use escaped code you must use the r"" (raw) strings, + // *** please remember that the V interpoaltion doesn't work on raw strings. *** + + query:= "#([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})([a-fA-F0-9]{$n_digit})" + + mut re := regex.regex_opt(query) or { panic(err) } + start, end := re.match_string(in_col) + println("start: $start, end: $end") + mut res := u32(0) + if start >= 0 { + group_list := re.get_group_list() // this is the utility function + r := ("0x" + in_col[group_list[0].start..group_list[0].end]).int() << col_mul + g := ("0x" + in_col[group_list[1].start..group_list[1].end]).int() << col_mul + b := ("0x" + in_col[group_list[2].start..group_list[2].end]).int() << col_mul + println("r: $r g: $g b: $b") + res = u32(r) << 16 | u32(g) << 8 | u32(b) + } + return res +} +``` + + + ### Groups Continuous saving In particular situations it is useful have a continuous save of the groups, @@ -281,7 +331,7 @@ Have a look at the example for the use of them. example: -```v oksyntax +```v ignore import regex fn main() { test_regex() @@ -367,11 +417,49 @@ named capturing groups: 'token':[42, 46] => 'html' ``` +In order to simplify the use of the named groups it possible to use names map in the `re` +struct using the function `re.get_group`. + +Here a more complex example of use: + +```v oksyntax +/* +This function demostrate the use of the named groups +*/ +fn convert_html_rgb_n(in_col string) u32 { + mut n_digit := if in_col.len == 4 { 1 } else { 2 } + mut col_mul := if in_col.len == 4 { 4 } else { 0 } + + query:= "#(?P[a-fA-F0-9]{$n_digit})(?P[a-fA-F0-9]{$n_digit})(?P[a-fA-F0-9]{$n_digit})" + + mut re := regex.regex_opt(query) or { panic(err) } + start, end := re.match_string(in_col) + println("start: $start, end: $end") + mut res := u32(0) + if start >= 0 { + red_s, red_e := re.get_group("red") + r := ("0x" + in_col[red_s..red_e]).int() << col_mul + + green_s, green_e := re.get_group("green") + g := ("0x" + in_col[green_s..green_e]).int() << col_mul + + blue_s, blue_e := re.get_group("blue") + b := ("0x" + in_col[blue_s..blue_e]).int() << col_mul + + println("r: $r g: $g b: $b") + res = u32(r) << 16 | u32(g) << 8 | u32(b) + } + return res +} +``` + + + ## Flags It is possible to set some flags in the regex parser that change the behavior of the parser itself. -```v oksyntax +```v ignore // example of flag settings mut re := regex.new() re.flag = regex.F_BIN @@ -395,14 +483,14 @@ a `RE` struct can be created manually if you needed. #### **Simplified initializer** -```v +```v ignore // regex create a regex object from the query string and compile it pub fn regex_opt(in_query string) ?RE ``` #### **Base initializer** -```v +```v ignore // new_regex create a REgex of small size, usually sufficient for ordinary use pub fn new() RE diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index 1294cdf342..b5e393a1fe 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -1,6 +1,6 @@ /* -regex 0.9g +regex 0.9h Copyright (c) 2019-2020 Dario Deledda. All rights reserved. Use of this source code is governed by an MIT license @@ -19,7 +19,7 @@ module regex import strings pub const( - v_regex_version = "0.9g" // regex module version + v_regex_version = "0.9h" // regex module version max_code_len = 256 // default small base code len for the regex programs max_quantifier = 1073741824 // default max repetitions allowed for the quantifiers = 2^30 @@ -2381,3 +2381,34 @@ pub fn (mut re RE) replace(in_txt string, repl string) string { } return in_txt } + +/* + +Utilities + +*/ + +pub +struct Re_group { +pub: + start int = -1 + end int = -1 +} + +// get_group_list return a list of Re_group for the found groups +pub fn (re RE) get_group_list() []Re_group { + mut res := []Re_group{len: re.groups.len >> 1} + mut gi := 0 + //println("len: ${re.groups.len} groups: ${re.groups}") + for gi < re.groups.len { + if re.groups[gi] >= 0 { + //println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ") + tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]} + //println(tmp) + res[gi >> 1] = tmp + } + gi += 2 + } + return res +} +