From 888b9c020cf0b02836dada810b840c507a9276b2 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Thu, 20 May 2021 11:23:25 +0300 Subject: [PATCH] strings: add a new module `strings.textscanner` to simplify writing small parsers --- vlib/strings/textscanner/textscanner.v | 107 ++++++++++++++++++++ vlib/strings/textscanner/textscanner_test.v | 100 ++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 vlib/strings/textscanner/textscanner.v create mode 100644 vlib/strings/textscanner/textscanner_test.v diff --git a/vlib/strings/textscanner/textscanner.v b/vlib/strings/textscanner/textscanner.v new file mode 100644 index 0000000000..066ea9c687 --- /dev/null +++ b/vlib/strings/textscanner/textscanner.v @@ -0,0 +1,107 @@ +module textscanner + +// TextScanner simplifies writing small scanners/parsers +// by providing safe methods to scan texts character by +// character, peek for the next characters, go back, etc. +pub struct TextScanner { +pub: + input string + ilen int +mut: + pos int // current position; pos is *always* kept in [0,ilen] +} + +pub fn new(input string) TextScanner { + return TextScanner{ + input: input + ilen: input.len + } +} + +[unsafe] +pub fn (mut ss TextScanner) free() { + unsafe { + ss.input.free() + } +} + +// remaining - return how many characters remain in the input +[inline] +pub fn (ss &TextScanner) remaining() int { + return ss.ilen - ss.pos +} + +// next - safely get a character from the input text +[direct_array_access; inline] +pub fn (mut ss TextScanner) next() int { + if ss.pos < ss.ilen { + opos := ss.pos + ss.pos++ + return ss.input[opos] + } + return -1 +} + +// skip - skip one character; skip() is slightly faster than .next() +// and ignoring the result. +[inline] +pub fn (mut ss TextScanner) skip() { + if ss.pos + 1 < ss.ilen { + ss.pos++ + } +} + +// skip_n - skip the next `n` characters +[inline] +pub fn (mut ss TextScanner) skip_n(n int) { + ss.pos += n + if ss.pos > ss.ilen { + ss.pos = ss.ilen + } +} + +// peek - safely get the *next* character from the input text +// if the character exists. NB: unlike next(), peek() *will not* change +// the state of the scanner. +[direct_array_access; inline] +pub fn (ss &TextScanner) peek() int { + if ss.pos < ss.ilen { + return ss.input[ss.pos] + } + return -1 +} + +// peek_n - safely get the *next* character from the input text at the current +// position + `n`, if the character exists, or else it returns -1. +// NB: .peek() and .peek_offset(0) are equivalent. +[direct_array_access; inline] +pub fn (ss &TextScanner) peek_n(n int) int { + if ss.pos + n < ss.ilen { + return ss.input[ss.pos + n] + } + return -1 +} + +// back - go back a character +[inline] +pub fn (mut ss TextScanner) back() { + if ss.pos > 0 { + ss.pos-- + } +} + +// back_n - go back `n` characters +pub fn (mut ss TextScanner) back_n(n int) { + ss.pos -= n + if ss.pos < 0 { + ss.pos = 0 + } + if ss.pos > ss.ilen { + ss.pos = ss.ilen + } +} + +// reset - go back to the start of the input +pub fn (mut ss TextScanner) reset() { + ss.pos = 0 +} diff --git a/vlib/strings/textscanner/textscanner_test.v b/vlib/strings/textscanner/textscanner_test.v new file mode 100644 index 0000000000..a672204f56 --- /dev/null +++ b/vlib/strings/textscanner/textscanner_test.v @@ -0,0 +1,100 @@ +import strings.textscanner + +fn test_remaining() { + mut s := textscanner.new('abc') + assert s.remaining() == 3 + s.next() + s.next() + assert s.remaining() == 1 + s.next() + assert s.remaining() == 0 + s.next() + s.next() + assert s.remaining() == 0 + s.reset() + assert s.remaining() == 3 +} + +fn test_next() { + mut s := textscanner.new('abc') + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 + assert s.next() == -1 + assert s.next() == -1 +} + +fn test_skip() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.skip() + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_skip_n() { + mut s := textscanner.new('abc') + s.skip_n(2) + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_peek() { + mut s := textscanner.new('abc') + assert s.peek() == `a` + assert s.peek() == `a` + assert s.peek() == `a` + // + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_peek_n() { + mut s := textscanner.new('abc') + assert s.peek_n(0) == `a` + assert s.peek_n(1) == `b` + assert s.peek_n(2) == `c` + assert s.peek_n(3) == -1 + assert s.peek_n(4) == -1 + // + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_back() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.back() + assert s.next() == `a` + assert s.next() == `b` + s.back() + assert s.next() == `b` + assert s.next() == `c` + assert s.next() == -1 +} + +fn test_back_n() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.back_n(10) + assert s.next() == `a` + assert s.next() == `b` + assert s.next() == `c` + s.back_n(2) + assert s.next() == `b` +} + +fn test_reset() { + mut s := textscanner.new('abc') + assert s.next() == `a` + s.next() + s.next() + assert s.next() == -1 + s.reset() + assert s.next() == `a` +}