toml: streamline scanner.at() return type, fixes #12344 (#12370)

pull/12378/head
Larpon 2021-11-03 09:18:09 +01:00 committed by GitHub
parent bd5e2db460
commit 45c938bdec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 12 deletions

View File

@ -11,6 +11,8 @@ import toml.util
pub const digit_extras = [`_`, `.`, `x`, `o`, `b`, `e`, `E`] pub const digit_extras = [`_`, `.`, `x`, `o`, `b`, `e`, `E`]
const end_of_text = -1
// Scanner contains the necessary fields for the state of the scan process. // Scanner contains the necessary fields for the state of the scan process.
// the task the scanner does is also refered to as "lexing" or "tokenizing". // the task the scanner does is also refered to as "lexing" or "tokenizing".
// The Scanner methods are based on much of the work in `vlib/strings/textscanner`. // The Scanner methods are based on much of the work in `vlib/strings/textscanner`.
@ -79,7 +81,7 @@ pub fn (mut s Scanner) scan() ?token.Token {
for { for {
c := s.next() c := s.next()
byte_c := byte(c) byte_c := byte(c)
if c == -1 { if c == scanner.end_of_text {
s.inc_line_number() s.inc_line_number()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'reached EOF') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'reached EOF')
return s.new_token(.eof, '', 1) return s.new_token(.eof, '', 1)
@ -266,11 +268,11 @@ pub fn (mut s Scanner) skip_n(n int) {
// at returns `-1` if it can't get the current character. // at returns `-1` if it can't get the current character.
// unlike `next()`, `at()` does not change the state of the scanner. // unlike `next()`, `at()` does not change the state of the scanner.
[direct_array_access; inline] [direct_array_access; inline]
pub fn (s &Scanner) at() byte { pub fn (s &Scanner) at() int {
if s.pos < s.text.len { if s.pos < s.text.len {
return s.text[s.pos] return s.text[s.pos]
} }
return byte(-1) return -1
} }
// at_crlf returns `true` if the scanner is at a `\r` character // at_crlf returns `true` if the scanner is at a `\r` character
@ -325,9 +327,9 @@ fn (mut s Scanner) new_token(kind token.Kind, lit string, len int) token.Token {
fn (mut s Scanner) ignore_line() ?string { fn (mut s Scanner) ignore_line() ?string {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' ignoring until EOL...') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, ' ignoring until EOL...')
start := s.pos start := s.pos
for c := s.at(); c != -1 && c != `\n`; c = s.at() { for c := s.at(); c != scanner.end_of_text && c != `\n`; c = s.at() {
s.next() s.next()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()}"') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "${byte(c).ascii_str()} / $c"')
if s.at_crlf() { if s.at_crlf() {
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'letting `\\r\\n` slip through') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'letting `\\r\\n` slip through')
return s.text[start..s.pos] return s.text[start..s.pos]
@ -350,7 +352,7 @@ fn (mut s Scanner) extract_key() string {
s.col-- s.col--
start := s.pos start := s.pos
for s.pos < s.text.len { for s.pos < s.text.len {
c := s.at() c := byte(s.at())
if !(util.is_key_char(c) || c.is_digit() || c in [`_`, `-`]) { if !(util.is_key_char(c) || c.is_digit() || c in [`_`, `-`]) {
break break
} }
@ -370,7 +372,7 @@ fn (mut s Scanner) extract_string() ?string {
// a byte that is the start of a string so we rewind it to start at the correct // a byte that is the start of a string so we rewind it to start at the correct
s.pos-- s.pos--
s.col-- s.col--
quote := s.at() quote := byte(s.at())
start := s.pos start := s.pos
mut lit := quote.ascii_str() mut lit := quote.ascii_str()
@ -390,7 +392,7 @@ fn (mut s Scanner) extract_string() ?string {
' unfinished single-line string literal `$quote.ascii_str()` started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...') ' unfinished single-line string literal `$quote.ascii_str()` started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...')
} }
c := s.at() c := byte(s.at())
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c (quote type: $quote/$quote.ascii_str())') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c (quote type: $quote/$quote.ascii_str())')
// Check for escaped chars // Check for escaped chars
@ -433,7 +435,7 @@ fn (mut s Scanner) extract_string() ?string {
fn (mut s Scanner) extract_multiline_string() ?string { fn (mut s Scanner) extract_multiline_string() ?string {
// extract_multiline_string is called from extract_string so we know the 3 first // extract_multiline_string is called from extract_string so we know the 3 first
// characters is the quotes // characters is the quotes
quote := s.at() quote := byte(s.at())
start := s.pos start := s.pos
mut lit := quote.ascii_str() + quote.ascii_str() + quote.ascii_str() mut lit := quote.ascii_str() + quote.ascii_str() + quote.ascii_str()
@ -452,7 +454,7 @@ fn (mut s Scanner) extract_multiline_string() ?string {
' unfinished multi-line string literal ($quote.ascii_str()$quote.ascii_str()$quote.ascii_str()) started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...') ' unfinished multi-line string literal ($quote.ascii_str()$quote.ascii_str()$quote.ascii_str()) started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...')
} }
c := s.at() c := byte(s.at())
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c (quote type: $quote/$quote.ascii_str())') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c (quote type: $quote/$quote.ascii_str())')
if c == `\n` { if c == `\n` {
@ -479,7 +481,7 @@ fn (mut s Scanner) extract_multiline_string() ?string {
if c == quote { if c == quote {
if s.peek(1) == quote && s.peek(2) == quote { if s.peek(1) == quote && s.peek(2) == quote {
if s.peek(3) == -1 { if s.peek(3) == scanner.end_of_text {
s.pos += 3 s.pos += 3
s.col += 3 s.col += 3
lit += quote.ascii_str() + quote.ascii_str() + quote.ascii_str() lit += quote.ascii_str() + quote.ascii_str() + quote.ascii_str()
@ -504,7 +506,7 @@ fn (mut s Scanner) extract_multiline_string() ?string {
// handle_escapes returns any escape character sequence. // handle_escapes returns any escape character sequence.
// For escape sequence validation see `Checker.check_quoted_escapes`. // For escape sequence validation see `Checker.check_quoted_escapes`.
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) { fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
c := s.at() c := byte(s.at())
mut lit := c.ascii_str() mut lit := c.ascii_str()
is_literal_string := quote == `'` is_literal_string := quote == `'`
if !is_literal_string { if !is_literal_string {

View File

@ -79,3 +79,14 @@ fn test_hex_values() {
assert value as i64 == 11 assert value as i64 == 11
assert value.i64() == 11 assert value.i64() == 11
} }
fn test_comment_as_last_value() {
toml_txt := '
test = 42
# this line has comment as last thing'
toml_doc := toml.parse(toml_txt) or { panic(err) }
value := toml_doc.value('test')
assert value as i64 == 42
assert value.i64() == 42
}