regex: fix a bug, update docs, explain the assumptions of the `regex` module (#7629)
parent
2946673bc2
commit
a98adbb3a4
|
@ -2,9 +2,22 @@
|
||||||
|
|
||||||
[TOC]
|
[TOC]
|
||||||
|
|
||||||
## introduction
|
## Introduction, differences with PCRE
|
||||||
|
|
||||||
Write here the introduction... not today!! -_-
|
The first thing we must point out is that the **V-Regex module is not PCRE compliant** and
|
||||||
|
thus some behaviour will be different.
|
||||||
|
This module is born upon the V philosophy to have one way and keep it simple.
|
||||||
|
The main differences can be summarized in the following points:
|
||||||
|
|
||||||
|
- The basic element **is the token not the sequence of symbols**, the most simple token
|
||||||
|
is simple char.
|
||||||
|
|
||||||
|
- `|` **OR operator act on token,** for example `abc|ebc` is not `abc` OR `ebc` it
|
||||||
|
is evaluated like `ab` followed by `c OR e` followed by`bc`, this because the **token is
|
||||||
|
the base element** not the sequence of symbols.
|
||||||
|
- The **match operation stop at the end of the string** not at the new line chars.
|
||||||
|
|
||||||
|
Further information can be found in the other part of this document.
|
||||||
|
|
||||||
## Basic assumption
|
## Basic assumption
|
||||||
|
|
||||||
|
|
|
@ -1960,7 +1960,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if re.prog[state.pc].dot_check_pc >= 0 {
|
if re.prog[state.pc].dot_check_pc >= 0 && re.prog[state.pc].rep >= re.prog[state.pc].rep_min {
|
||||||
// load the char
|
// load the char
|
||||||
//ch_t, _ := re.get_charb(in_txt, state.i+char_len)
|
//ch_t, _ := re.get_charb(in_txt, state.i+char_len)
|
||||||
ch_t := ch
|
ch_t := ch
|
||||||
|
@ -2338,6 +2338,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
|
||||||
//println("find return")
|
//println("find return")
|
||||||
return state.first_match, state.i
|
return state.first_match, state.i
|
||||||
} else {
|
} else {
|
||||||
|
//println("Here!!")
|
||||||
return 0, state.i
|
return 0, state.i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -123,6 +123,7 @@ match_test_suite = [
|
||||||
TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11},
|
TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11},
|
||||||
TestItem{"accccb deer", r"^(.*)$",0,11},
|
TestItem{"accccb deer", r"^(.*)$",0,11},
|
||||||
TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,0},
|
TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,0},
|
||||||
|
TestItem{"##.#....#.##.####...#.##", r".{18}[.#]",0,19},
|
||||||
|
|
||||||
// test bcksls chars
|
// test bcksls chars
|
||||||
TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
|
TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
|
||||||
|
|
Loading…
Reference in New Issue