regex: added less restrictive use of '-' in CC (#9484)

pull/9455/head
penguindark 2021-03-27 17:15:06 +01:00 committed by GitHub
parent 1b7fd2cf00
commit 3b166d8327
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 3 deletions

View File

@ -69,8 +69,8 @@ and all the digits `\d`.
It is possible to mix all the properties of the char class together.
**Note:** In order to match the `-` (minus) char, it must be located at the first position
in the cc, for example `[-_\d\a]` will match `-` minus, `_`underscore, `\d` numeric chars,
**Note:** In order to match the `-` (minus) char, it must be preceded by a backslash
in the cc, for example `[\-_\d\a]` will match `-` minus, `_`underscore, `\d` numeric chars,
`\a` lower case chars.
### Meta-chars

View File

@ -390,7 +390,7 @@ const(
]
// these chars are escape if preceded by a \
bsls_escape_list = [`\\`, `|`, `.`, `:`, `*`, `+`, `-`, `{`, `}`, `[`, `]`, `(`, `)`, `?`]
bsls_escape_list = [`\\`, `|`, `.`, `:`, `*`, `+`, `-`, `{`, `}`, `[`, `]`, `(`, `)`, `?`, `^`, `!`]
)
enum BSLS_parse_state {
@ -613,8 +613,15 @@ fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
}
}
if status == .in_bsls {
// manage as a simple char
//println("CC bsls not found [${ch:c}]")
re.cc[tmp_index].cc_type = cc_char
re.cc[tmp_index].ch0 = char_tmp
re.cc[tmp_index].ch1 = char_tmp
i += char_len
tmp_index++
status = .in_char
continue
}else {
continue
}

View File

@ -15,6 +15,14 @@ struct TestItem {
const(
match_test_suite = [
// minus in CC
TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0},
TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,0},
TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8},
TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,13},
TestItem{"abcdefGHK",r"[a-f]+\A+",0,9},
TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11},
// base OR
TestItem{"a",r"a|b",0,1},
TestItem{"a",r"b|a",0,1},