From e8909cced0d8b6b01637e048bc7932ce1484a52b Mon Sep 17 00:00:00 2001 From: Ruofan XU <47302112+SleepyRoy@users.noreply.github.com> Date: Sat, 12 Sep 2020 14:51:30 +0800 Subject: [PATCH] examples: add regex_with_memoization.v (#6355) --- examples/regex_with_memoization.v | 126 ++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 examples/regex_with_memoization.v diff --git a/examples/regex_with_memoization.v b/examples/regex_with_memoization.v new file mode 100644 index 0000000000..9b985a7899 --- /dev/null +++ b/examples/regex_with_memoization.v @@ -0,0 +1,126 @@ +import os + +fn regex_match(src, pat string) bool { + src_size := src.len + 1 + pat_size := pat.len + 1 + mut memo := [][]int{len: src_size, init: []int{len: pat_size, init: -1}} + return regex_match_core(src, pat, 0, 0, mut memo) +} + +fn regex_match_core(src, pat string, src_pos, pat_pos int, mut memo [][]int) bool { + if memo[src_pos][pat_pos] != -1 { + return memo[src_pos][pat_pos] == 1 + } + mut spos := src_pos + mut ppos := pat_pos + if spos >= src.len && ppos >= pat.len { + memo[src_pos][pat_pos] = 1 + return true + } else if spos < src.len && ppos >= pat.len { + memo[src_pos][pat_pos] = 0 + return false + } else if spos >= src.len && ppos < pat.len { + if pat[ppos] == `\\` { + ppos++ + } + res := ppos + 1 < pat.len && + pat[ppos + 1] in [`*`, `?`] && regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + first_is_bslash := pat[ppos] == `\\` + if first_is_bslash { + ppos++ + } + first_bslash_and_match := first_is_bslash && ppos < pat.len && + (((pat[ppos] == `d` && src[spos].is_digit()) || + (pat[ppos] == `D` && !src[spos].is_digit()) || + (pat[ppos] == `s` && src[spos].is_space()) || + (pat[ppos] == `S` && !src[spos].is_space()) || + (pat[ppos] == `w` && (src[spos].is_digit() || src[spos].is_letter() || src[spos] == `_`)) || + (pat[ppos] == `W` && !(src[spos].is_digit() || src[spos].is_letter() || src[spos] == `_`))) || + (pat[ppos] in [`d`, `D`, `s`, `S`, `w`, `W`] && + ppos + 1 < pat.len && pat[ppos + 1] in [`*`, `?`, `+`]) || + (pat[ppos] !in [`d`, `D`, `s`, `S`, `w`, `W`] && src[spos] == pat[ppos])) + if ppos + 1 < pat.len { + match pat[ppos + 1] { + `*` { + if first_bslash_and_match { + res := regex_match_core(src, pat, spos + 1, ppos - 1, mut memo) || regex_match_core(src, pat, spos, ppos + + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else if src[spos] == pat[ppos] || pat[ppos] == `.` { + res := regex_match_core(src, pat, spos + 1, ppos, mut memo) || regex_match_core(src, pat, spos, ppos + + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + res := regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } + } + `+` { + if first_bslash_and_match { + res := regex_match_core(src, pat, spos + 1, ppos - 1, mut memo) || regex_match_core(src, pat, spos + + 1, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else if src[spos] == pat[ppos] || pat[ppos] == `.` { + res := regex_match_core(src, pat, spos + 1, ppos, mut memo) || regex_match_core(src, pat, spos + + 1, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + memo[src_pos][pat_pos] = 0 + return false + } + } + `?` { + if first_bslash_and_match || src[spos] == pat[ppos] || pat[ppos] == `.` { + res := regex_match_core(src, pat, spos + 1, ppos + 2, mut memo) || regex_match_core(src, pat, spos, ppos + + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + res := regex_match_core(src, pat, spos, ppos + 2, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } + } + else {} + } + } + if first_is_bslash { + res := first_bslash_and_match && regex_match_core(src, pat, spos + 1, ppos + 1, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } else { + res := (src[spos] == pat[ppos] || + pat[ppos] == `.`) && + pat[ppos] != `\\` && regex_match_core(src, pat, spos + 1, ppos + 1, mut memo) + memo[src_pos][pat_pos] = if res { 1 } else { 0 } + return res + } + } +} + +fn main() { + mut cnt := 0 + println('currently supported patterns: . ? + * \\ \\d \\D \\s \\S \\w \\W') + println('example: source `address@domain.net` matches pattern `\\w+@domain\\.net`') + println('enter `exit` to quit\n') + for { + cnt++ + src := os.input('[$cnt] enter source string: ') + if src == 'exit' { + break + } + pat := os.input('[$cnt] enter pattern string: ') + if pat == 'exit' { + break + } + println('[$cnt] whether `$src` matches `$pat`: ${regex_match(src,pat)}') + } +}