From 3b166d8327f21c199e69d2921dec8691113b449e Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Sat, 27 Mar 2021 17:15:06 +0100 Subject: [PATCH] regex: added less restrictive use of '-' in CC (#9484) --- vlib/regex/README.md | 4 ++-- vlib/regex/regex.v | 9 ++++++++- vlib/regex/regex_test.v | 8 ++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/vlib/regex/README.md b/vlib/regex/README.md index bdd8755f17..7b6611e9dc 100644 --- a/vlib/regex/README.md +++ b/vlib/regex/README.md @@ -69,8 +69,8 @@ and all the digits `\d`. It is possible to mix all the properties of the char class together. -**Note:** In order to match the `-` (minus) char, it must be located at the first position - in the cc, for example `[-_\d\a]` will match `-` minus, `_`underscore, `\d` numeric chars, +**Note:** In order to match the `-` (minus) char, it must be preceded by a backslash + in the cc, for example `[\-_\d\a]` will match `-` minus, `_`underscore, `\d` numeric chars, `\a` lower case chars. ### Meta-chars diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index 547a097192..d5aa48056a 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -390,7 +390,7 @@ const( ] // these chars are escape if preceded by a \ - bsls_escape_list = [`\\`, `|`, `.`, `:`, `*`, `+`, `-`, `{`, `}`, `[`, `]`, `(`, `)`, `?`] + bsls_escape_list = [`\\`, `|`, `.`, `:`, `*`, `+`, `-`, `{`, `}`, `[`, `]`, `(`, `)`, `?`, `^`, `!`] ) enum BSLS_parse_state { @@ -613,8 +613,15 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) { } } if status == .in_bsls { + // manage as a simple char //println("CC bsls not found [${ch:c}]") + re.cc[tmp_index].cc_type = cc_char + re.cc[tmp_index].ch0 = char_tmp + re.cc[tmp_index].ch1 = char_tmp + i += char_len + tmp_index++ status = .in_char + continue }else { continue } diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index c203aba89a..60bf748813 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -15,6 +15,14 @@ struct TestItem { const( match_test_suite = [ + // minus in CC + TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0}, + TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,0}, + TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8}, + TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,13}, + TestItem{"abcdefGHK",r"[a-f]+\A+",0,9}, + TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11}, + // base OR TestItem{"a",r"a|b",0,1}, TestItem{"a",r"b|a",0,1},