regex: bug fix about \w \W backslash chars (#11176)

pull/11178/head
penguindark 2021-08-14 07:47:12 +02:00 committed by GitHub
parent b72d1e5e86
commit fb3671107e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 4 deletions

View File

@ -90,8 +90,8 @@ For example `\w` is the meta-char `w`.
A meta-char can match different types of characters. A meta-char can match different types of characters.
* `\w` matches an alphanumeric char `[a-zA-Z0-9_]` * `\w` matches a word char char `[a-zA-Z0-9_]`
* `\W` matches a non alphanumeric char * `\W` matches a non word char
* `\d` matches a digit `[0-9]` * `\d` matches a digit `[0-9]`
* `\D` matches a non digit * `\D` matches a non digit
* `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']` * `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`

View File

@ -127,7 +127,7 @@ fn is_alnum(in_char byte) bool {
if tmp <= 9 { if tmp <= 9 {
return true return true
} }
if tmp == `_` { if in_char == `_` {
return true return true
} }
return false return false
@ -159,6 +159,7 @@ fn is_not_digit(in_char byte) bool {
return !is_digit(in_char) return !is_digit(in_char)
} }
/*
[inline] [inline]
fn is_wordchar(in_char byte) bool { fn is_wordchar(in_char byte) bool {
return is_alnum(in_char) || in_char == `_` return is_alnum(in_char) || in_char == `_`
@ -168,6 +169,7 @@ fn is_wordchar(in_char byte) bool {
fn is_not_wordchar(in_char byte) bool { fn is_not_wordchar(in_char byte) bool {
return !is_alnum(in_char) return !is_alnum(in_char)
} }
*/
[inline] [inline]
fn is_lower(in_char byte) bool { fn is_lower(in_char byte) bool {

View File

@ -19,7 +19,7 @@ match_test_suite = [
TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0}, TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0},
TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,0}, TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,0},
TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8}, TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8},
TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,13}, TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,16},
TestItem{"abcdefGHK",r"[a-f]+\A+",0,9}, TestItem{"abcdefGHK",r"[a-f]+\A+",0,9},
TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11}, TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11},
@ -138,6 +138,8 @@ match_test_suite = [
// test bcksls chars // test bcksls chars
TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31}, TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28}, TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
TestItem{"p_p", r"\w+",0,3},
TestItem{"p_é", r"\w+",0,2},
// Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()]) // Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()])
TestItem{"1*1", r"(\d+)([*])(\d+)",0,3}, TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},