toml: add quote details to ast.Quoted (#11997)

pull/12000/head
Larpon 2021-09-28 16:40:03 +02:00 committed by GitHub
parent a03693e881
commit bc4aad5fb4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 14 deletions

View File

@ -142,6 +142,8 @@ pub struct Quoted {
pub: pub:
text string text string
pos token.Position pos token.Position
is_multiline bool
quote byte
} }
// str returns the `string` representation of the `Quoted` type. // str returns the `string` representation of the `Quoted` type.
@ -149,6 +151,8 @@ pub fn (q Quoted) str() string {
mut str := typeof(q).name + '{\n' mut str := typeof(q).name + '{\n'
str += ' text: \'$q.text\'\n' str += ' text: \'$q.text\'\n'
str += ' pos: $q.pos\n' str += ' pos: $q.pos\n'
str += ' is_multiline: $q.is_multiline\n'
str += ' quote: \'$q.quote\'\n'
str += '}' str += '}'
return str return str
} }

View File

@ -691,9 +691,19 @@ pub fn (mut p Parser) bare() ast.Bare {
// quoted parse and returns an `ast.Quoted` type. // quoted parse and returns an `ast.Quoted` type.
pub fn (mut p Parser) quoted() ast.Quoted { pub fn (mut p Parser) quoted() ast.Quoted {
// To get more info about the quote type and enable better checking,
// the scanner is returning the literal *with* single- or double-quotes.
mut quote := p.tok.lit[0]
is_multiline := p.tok.lit.len >= 6 && p.tok.lit[1] == quote && p.tok.lit[2] == quote
mut lit := p.tok.lit[1..p.tok.lit.len - 1]
if is_multiline {
lit = p.tok.lit[3..p.tok.lit.len - 3]
}
return ast.Quoted{ return ast.Quoted{
text: p.tok.lit text: lit
pos: p.tok.position() pos: p.tok.position()
quote: quote
is_multiline: is_multiline
} }
} }

View File

@ -145,10 +145,9 @@ pub fn (mut s Scanner) scan() ?token.Token {
return s.new_token(.assign, ascii, ascii.len) return s.new_token(.assign, ascii, ascii.len)
} }
`"`, `'` { // ... some string "/' `"`, `'` { // ... some string "/'
ident_string, is_multiline := s.extract_string() ? ident_string := s.extract_string() ?
token_length := if is_multiline { 2 * 3 } else { 2 } util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified quoted string `$ident_string`')
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'identified quoted string (multiline: $is_multiline) `$ident_string`') return s.new_token(.quoted, ident_string, ident_string.len)
return s.new_token(.quoted, ident_string, ident_string.len + token_length) // + quote length
} }
`#` { `#` {
start := s.pos //+ 1 start := s.pos //+ 1
@ -333,20 +332,20 @@ fn (mut s Scanner) extract_key() string {
// any bytes recognized as a TOML string. // any bytes recognized as a TOML string.
// TOML strings are everything found between two double or single quotation marks (`"`/`'`). // TOML strings are everything found between two double or single quotation marks (`"`/`'`).
[direct_array_access; inline] [direct_array_access; inline]
fn (mut s Scanner) extract_string() ?(string, bool) { fn (mut s Scanner) extract_string() ?string {
// extract_string is called when the scanner has already reached // extract_string is called when the scanner has already reached
// a byte that is the start of a string so we rewind it to start at the correct // a byte that is the start of a string so we rewind it to start at the correct
s.pos-- s.pos--
s.col-- s.col--
quote := s.at() quote := s.at()
start := s.pos start := s.pos
mut lit := '' mut lit := quote.ascii_str()
is_multiline := s.text[s.pos + 1] == quote && s.text[s.pos + 2] == quote is_multiline := s.text[s.pos + 1] == quote && s.text[s.pos + 2] == quote
// Check for escaped multiline quote // Check for escaped multiline quote
if is_multiline { if is_multiline {
mls := s.extract_multiline_string() ? mls := s.extract_multiline_string() ?
return mls, is_multiline return mls
} }
for { for {
@ -375,7 +374,7 @@ fn (mut s Scanner) extract_string() ?(string, bool) {
if c == quote { if c == quote {
s.pos++ s.pos++
s.col++ s.col++
return lit, is_multiline return lit + quote.ascii_str()
} }
lit += c.ascii_str() lit += c.ascii_str()
@ -386,8 +385,7 @@ fn (mut s Scanner) extract_string() ?(string, bool) {
' unfinished single-line string literal `$quote.ascii_str()` started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...') ' unfinished single-line string literal `$quote.ascii_str()` started at $start ($s.line_nr,$s.col) "${byte(s.at()).ascii_str()}" near ...${s.excerpt(s.pos, 5)}...')
} }
} }
return lit
return lit, is_multiline
} }
// extract_multiline_string collects and returns a string containing // extract_multiline_string collects and returns a string containing
@ -399,7 +397,7 @@ fn (mut s Scanner) extract_multiline_string() ?string {
// characters is the quotes // characters is the quotes
quote := s.at() quote := s.at()
start := s.pos start := s.pos
mut lit := '' mut lit := quote.ascii_str() + quote.ascii_str() + quote.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'multi-line `$quote.ascii_str()${s.text[s.pos + util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'multi-line `$quote.ascii_str()${s.text[s.pos +
1].ascii_str()}${s.text[s.pos + 2].ascii_str()}` string started at pos $start ($s.line_nr,$s.col) (quote type: $quote.ascii_str() / $quote)') 1].ascii_str()}${s.text[s.pos + 2].ascii_str()}` string started at pos $start ($s.line_nr,$s.col) (quote type: $quote.ascii_str() / $quote)')
@ -441,6 +439,7 @@ fn (mut s Scanner) extract_multiline_string() ?string {
if s.peek(3) == -1 { if s.peek(3) == -1 {
s.pos += 3 s.pos += 3
s.col += 3 s.col += 3
lit += quote.ascii_str() + quote.ascii_str() + quote.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning at $c.ascii_str() `$lit`') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning at $c.ascii_str() `$lit`')
return lit return lit
} else if s.peek(3) != quote { } else if s.peek(3) != quote {
@ -448,6 +447,7 @@ fn (mut s Scanner) extract_multiline_string() ?string {
// lit += quote.ascii_str() // lit += quote.ascii_str()
s.pos += 3 s.pos += 3
s.col += 3 s.col += 3
lit += quote.ascii_str() + quote.ascii_str() + quote.ascii_str()
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning at $c.ascii_str() `$lit`') util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'returning at $c.ascii_str() `$lit`')
return lit return lit
} }