From a98adbb3a4f13c73ed8f215a5e2b5ec73f0a1533 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Mon, 28 Dec 2020 11:43:03 +0100 Subject: [PATCH] regex: fix a bug, update docs, explain the assumptions of the `regex` module (#7629) --- vlib/regex/README.md | 17 +++++++++++++++-- vlib/regex/regex.v | 3 ++- vlib/regex/regex_test.v | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/vlib/regex/README.md b/vlib/regex/README.md index c22ba5a7c7..bdd8755f17 100644 --- a/vlib/regex/README.md +++ b/vlib/regex/README.md @@ -2,9 +2,22 @@ [TOC] -## introduction +## Introduction, differences with PCRE -Write here the introduction... not today!! -_- +The first thing we must point out is that the **V-Regex module is not PCRE compliant** and +thus some behaviour will be different. +This module is born upon the V philosophy to have one way and keep it simple. +The main differences can be summarized in the following points: + +- The basic element **is the token not the sequence of symbols**, the most simple token +is simple char. + +- `|` **OR operator act on token,** for example `abc|ebc` is not `abc` OR `ebc` it +is evaluated like `ab` followed by `c OR e` followed by`bc`, this because the **token is +the base element** not the sequence of symbols. +- The **match operation stop at the end of the string** not at the new line chars. + +Further information can be found in the other part of this document. ## Basic assumption diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index c6bc1bd83f..1bd81b3bc2 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -1960,7 +1960,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { continue } - if re.prog[state.pc].dot_check_pc >= 0 { + if re.prog[state.pc].dot_check_pc >= 0 && re.prog[state.pc].rep >= re.prog[state.pc].rep_min { // load the char //ch_t, _ := re.get_charb(in_txt, state.i+char_len) ch_t := ch @@ -2338,6 +2338,7 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { //println("find return") return state.first_match, state.i } else { + //println("Here!!") return 0, state.i } } diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index ce8dcd03ac..36dd9d3bd1 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -123,6 +123,7 @@ match_test_suite = [ TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11}, TestItem{"accccb deer", r"^(.*)$",0,11}, TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,0}, + TestItem{"##.#....#.##.####...#.##", r".{18}[.#]",0,19}, // test bcksls chars TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},