strings: add a new module `strings.textscanner` to simplify writing small parsers

pull/10154/head
Delyan Angelov 2021-05-20 11:23:25 +03:00
parent c8a9052a5f
commit 888b9c020c
No known key found for this signature in database
GPG Key ID: 66886C0F12D595ED
2 changed files with 207 additions and 0 deletions

View File

@ -0,0 +1,107 @@
module textscanner
// TextScanner simplifies writing small scanners/parsers
// by providing safe methods to scan texts character by
// character, peek for the next characters, go back, etc.
pub struct TextScanner {
pub:
input string
ilen int
mut:
pos int // current position; pos is *always* kept in [0,ilen]
}
pub fn new(input string) TextScanner {
return TextScanner{
input: input
ilen: input.len
}
}
[unsafe]
pub fn (mut ss TextScanner) free() {
unsafe {
ss.input.free()
}
}
// remaining - return how many characters remain in the input
[inline]
pub fn (ss &TextScanner) remaining() int {
return ss.ilen - ss.pos
}
// next - safely get a character from the input text
[direct_array_access; inline]
pub fn (mut ss TextScanner) next() int {
if ss.pos < ss.ilen {
opos := ss.pos
ss.pos++
return ss.input[opos]
}
return -1
}
// skip - skip one character; skip() is slightly faster than .next()
// and ignoring the result.
[inline]
pub fn (mut ss TextScanner) skip() {
if ss.pos + 1 < ss.ilen {
ss.pos++
}
}
// skip_n - skip the next `n` characters
[inline]
pub fn (mut ss TextScanner) skip_n(n int) {
ss.pos += n
if ss.pos > ss.ilen {
ss.pos = ss.ilen
}
}
// peek - safely get the *next* character from the input text
// if the character exists. NB: unlike next(), peek() *will not* change
// the state of the scanner.
[direct_array_access; inline]
pub fn (ss &TextScanner) peek() int {
if ss.pos < ss.ilen {
return ss.input[ss.pos]
}
return -1
}
// peek_n - safely get the *next* character from the input text at the current
// position + `n`, if the character exists, or else it returns -1.
// NB: .peek() and .peek_offset(0) are equivalent.
[direct_array_access; inline]
pub fn (ss &TextScanner) peek_n(n int) int {
if ss.pos + n < ss.ilen {
return ss.input[ss.pos + n]
}
return -1
}
// back - go back a character
[inline]
pub fn (mut ss TextScanner) back() {
if ss.pos > 0 {
ss.pos--
}
}
// back_n - go back `n` characters
pub fn (mut ss TextScanner) back_n(n int) {
ss.pos -= n
if ss.pos < 0 {
ss.pos = 0
}
if ss.pos > ss.ilen {
ss.pos = ss.ilen
}
}
// reset - go back to the start of the input
pub fn (mut ss TextScanner) reset() {
ss.pos = 0
}

View File

@ -0,0 +1,100 @@
import strings.textscanner
fn test_remaining() {
mut s := textscanner.new('abc')
assert s.remaining() == 3
s.next()
s.next()
assert s.remaining() == 1
s.next()
assert s.remaining() == 0
s.next()
s.next()
assert s.remaining() == 0
s.reset()
assert s.remaining() == 3
}
fn test_next() {
mut s := textscanner.new('abc')
assert s.next() == `a`
assert s.next() == `b`
assert s.next() == `c`
assert s.next() == -1
assert s.next() == -1
assert s.next() == -1
}
fn test_skip() {
mut s := textscanner.new('abc')
assert s.next() == `a`
s.skip()
assert s.next() == `c`
assert s.next() == -1
}
fn test_skip_n() {
mut s := textscanner.new('abc')
s.skip_n(2)
assert s.next() == `c`
assert s.next() == -1
}
fn test_peek() {
mut s := textscanner.new('abc')
assert s.peek() == `a`
assert s.peek() == `a`
assert s.peek() == `a`
//
assert s.next() == `a`
assert s.next() == `b`
assert s.next() == `c`
assert s.next() == -1
}
fn test_peek_n() {
mut s := textscanner.new('abc')
assert s.peek_n(0) == `a`
assert s.peek_n(1) == `b`
assert s.peek_n(2) == `c`
assert s.peek_n(3) == -1
assert s.peek_n(4) == -1
//
assert s.next() == `a`
assert s.next() == `b`
assert s.next() == `c`
assert s.next() == -1
}
fn test_back() {
mut s := textscanner.new('abc')
assert s.next() == `a`
s.back()
assert s.next() == `a`
assert s.next() == `b`
s.back()
assert s.next() == `b`
assert s.next() == `c`
assert s.next() == -1
}
fn test_back_n() {
mut s := textscanner.new('abc')
assert s.next() == `a`
s.back_n(10)
assert s.next() == `a`
assert s.next() == `b`
assert s.next() == `c`
s.back_n(2)
assert s.next() == `b`
}
fn test_reset() {
mut s := textscanner.new('abc')
assert s.next() == `a`
s.next()
s.next()
assert s.next() == -1
s.reset()
assert s.next() == `a`
}