From fb3671107e54d791a97119320d004b454498b1d4 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Sat, 14 Aug 2021 07:47:12 +0200 Subject: [PATCH] regex: bug fix about \w \W backslash chars (#11176) --- vlib/regex/README.md | 4 ++-- vlib/regex/regex.v | 4 +++- vlib/regex/regex_test.v | 4 +++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/vlib/regex/README.md b/vlib/regex/README.md index 8922166dcc..0faa833bb3 100644 --- a/vlib/regex/README.md +++ b/vlib/regex/README.md @@ -90,8 +90,8 @@ For example `\w` is the meta-char `w`. A meta-char can match different types of characters. -* `\w` matches an alphanumeric char `[a-zA-Z0-9_]` -* `\W` matches a non alphanumeric char +* `\w` matches a word char char `[a-zA-Z0-9_]` +* `\W` matches a non word char * `\d` matches a digit `[0-9]` * `\D` matches a non digit * `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']` diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index 12e86dab96..9e630e1d5d 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -127,7 +127,7 @@ fn is_alnum(in_char byte) bool { if tmp <= 9 { return true } - if tmp == `_` { + if in_char == `_` { return true } return false @@ -159,6 +159,7 @@ fn is_not_digit(in_char byte) bool { return !is_digit(in_char) } +/* [inline] fn is_wordchar(in_char byte) bool { return is_alnum(in_char) || in_char == `_` @@ -168,6 +169,7 @@ fn is_wordchar(in_char byte) bool { fn is_not_wordchar(in_char byte) bool { return !is_alnum(in_char) } +*/ [inline] fn is_lower(in_char byte) bool { diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index 242dc84fbe..aa6bf79f74 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -19,7 +19,7 @@ match_test_suite = [ TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0}, TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,0}, TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8}, - TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,13}, + TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,16}, TestItem{"abcdefGHK",r"[a-f]+\A+",0,9}, TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11}, @@ -138,6 +138,8 @@ match_test_suite = [ // test bcksls chars TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31}, TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28}, + TestItem{"p_p", r"\w+",0,3}, + TestItem{"p_é", r"\w+",0,2}, // Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()]) TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},