v.vmod: make the v.mod parser more robust, show line numbers in errors, add tests

pull/13776/head
Delyan Angelov 2022-03-18 21:50:34 +02:00
parent 5f79fa8a30
commit 1734e72638
No known key found for this signature in database
GPG Key ID: 66886C0F12D595ED
3 changed files with 74 additions and 17 deletions

View File

@ -203,7 +203,7 @@ fn cgen_attrs(attrs []ast.Attr) []string {
fn (mut g Gen) comptime_at(node ast.AtExpr) { fn (mut g Gen) comptime_at(node ast.AtExpr) {
if node.kind == .vmod_file { if node.kind == .vmod_file {
val := cnewlines(node.val.replace('\r', '')) val := cescape_nonascii(util.smart_quote(node.val, false))
g.write('_SLIT("$val")') g.write('_SLIT("$val")')
} else { } else {
val := node.val.replace('\\', '\\\\') val := node.val.replace('\\', '\\\\')

View File

@ -2,6 +2,8 @@ module vmod
import os import os
const err_label = 'vmod:'
enum TokenKind { enum TokenKind {
module_keyword module_keyword
field_key field_key
@ -32,6 +34,7 @@ pub mut:
struct Scanner { struct Scanner {
mut: mut:
pos int pos int
line int = 1
text string text string
inside_text bool inside_text bool
tokens []Token tokens []Token
@ -44,8 +47,9 @@ mut:
} }
struct Token { struct Token {
typ TokenKind typ TokenKind
val string val string
line int
} }
pub fn from_file(vmod_path string) ?Manifest { pub fn from_file(vmod_path string) ?Manifest {
@ -67,7 +71,7 @@ pub fn decode(contents string) ?Manifest {
} }
fn (mut s Scanner) tokenize(t_type TokenKind, val string) { fn (mut s Scanner) tokenize(t_type TokenKind, val string) {
s.tokens << Token{t_type, val} s.tokens << Token{t_type, val, s.line}
} }
fn (mut s Scanner) skip_whitespace() { fn (mut s Scanner) skip_whitespace() {
@ -82,7 +86,7 @@ fn is_name_alpha(chr byte) bool {
fn (mut s Scanner) create_string(q byte) string { fn (mut s Scanner) create_string(q byte) string {
mut str := '' mut str := ''
for s.text[s.pos] != q { for s.pos < s.text.len && s.text[s.pos] != q {
if s.text[s.pos] == `\\` && s.text[s.pos + 1] == q { if s.text[s.pos] == `\\` && s.text[s.pos + 1] == q {
str += s.text[s.pos..s.pos + 1] str += s.text[s.pos..s.pos + 1]
s.pos += 2 s.pos += 2
@ -96,7 +100,7 @@ fn (mut s Scanner) create_string(q byte) string {
fn (mut s Scanner) create_ident() string { fn (mut s Scanner) create_ident() string {
mut text := '' mut text := ''
for is_name_alpha(s.text[s.pos]) { for s.pos < s.text.len && is_name_alpha(s.text[s.pos]) {
text += s.text[s.pos].ascii_str() text += s.text[s.pos].ascii_str()
s.pos++ s.pos++
} }
@ -112,6 +116,9 @@ fn (mut s Scanner) scan_all() {
c := s.text[s.pos] c := s.text[s.pos]
if c.is_space() || c == `\\` { if c.is_space() || c == `\\` {
s.pos++ s.pos++
if c == `\n` {
s.line++
}
continue continue
} }
if is_name_alpha(c) { if is_name_alpha(c) {
@ -120,7 +127,7 @@ fn (mut s Scanner) scan_all() {
s.tokenize(.module_keyword, name) s.tokenize(.module_keyword, name)
s.pos++ s.pos++
continue continue
} else if s.text[s.pos] == `:` { } else if s.pos < s.text.len && s.text[s.pos] == `:` {
s.tokenize(.field_key, name + ':') s.tokenize(.field_key, name + ':')
s.pos += 2 s.pos += 2
continue continue
@ -155,7 +162,7 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
mut vals := []string{} mut vals := []string{}
mut idx := st_idx mut idx := st_idx
if tokens[idx].typ != .labr { if tokens[idx].typ != .labr {
return error('vmod: not a valid array') return error('$vmod.err_label not a valid array, at line ${tokens[idx].line}')
} }
idx++ idx++
for { for {
@ -164,7 +171,7 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
.str { .str {
vals << tok.val vals << tok.val
if tokens[idx + 1].typ !in [.comma, .rabr] { if tokens[idx + 1].typ !in [.comma, .rabr] {
return error('vmod: invalid separator "${tokens[idx + 1].val}"') return error('$vmod.err_label invalid separator "${tokens[idx + 1].val}", at line $tok.line')
} }
idx += if tokens[idx + 1].typ == .comma { 2 } else { 1 } idx += if tokens[idx + 1].typ == .comma { 2 } else { 1 }
} }
@ -173,7 +180,7 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
break break
} }
else { else {
return error('vmod: invalid token "$tok.val"') return error('$vmod.err_label invalid token "$tok.val", at line $tok.line')
} }
} }
} }
@ -181,15 +188,14 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
} }
fn (mut p Parser) parse() ?Manifest { fn (mut p Parser) parse() ?Manifest {
err_label := 'vmod:'
if p.scanner.text.len == 0 { if p.scanner.text.len == 0 {
return error('$err_label no content.') return error('$vmod.err_label no content.')
} }
p.scanner.scan_all() p.scanner.scan_all()
tokens := p.scanner.tokens tokens := p.scanner.tokens
mut mn := Manifest{} mut mn := Manifest{}
if tokens[0].typ != .module_keyword { if tokens[0].typ != .module_keyword {
return error('vmod: v.mod files should start with Module') return error('$vmod.err_label v.mod files should start with Module, at line ${tokens[0].line}')
} }
mut i := 1 mut i := 1
for i < tokens.len { for i < tokens.len {
@ -197,7 +203,7 @@ fn (mut p Parser) parse() ?Manifest {
match tok.typ { match tok.typ {
.lcbr { .lcbr {
if tokens[i + 1].typ !in [.field_key, .rcbr] { if tokens[i + 1].typ !in [.field_key, .rcbr] {
return error('$err_label invalid content after opening brace') return error('$vmod.err_label invalid content after opening brace, at line $tok.line')
} }
i++ i++
continue continue
@ -208,7 +214,7 @@ fn (mut p Parser) parse() ?Manifest {
.field_key { .field_key {
field_name := tok.val.trim_right(':') field_name := tok.val.trim_right(':')
if tokens[i + 1].typ !in [.str, .labr] { if tokens[i + 1].typ !in [.str, .labr] {
return error('$err_label value of field "$field_name" must be either string or an array of strings') return error('$vmod.err_label value of field "$field_name" must be either string or an array of strings, at line $tok.line')
} }
field_value := tokens[i + 1].val field_value := tokens[i + 1].val
match field_name { match field_name {
@ -251,13 +257,13 @@ fn (mut p Parser) parse() ?Manifest {
} }
.comma { .comma {
if tokens[i - 1].typ !in [.str, .rabr] || tokens[i + 1].typ != .field_key { if tokens[i - 1].typ !in [.str, .rabr] || tokens[i + 1].typ != .field_key {
return error('$err_label invalid comma placement') return error('$vmod.err_label invalid comma placement, at line $tok.line')
} }
i++ i++
continue continue
} }
else { else {
return error('$err_label invalid token "$tok.val"') return error('$vmod.err_label invalid token "$tok.val", at line $tok.line')
} }
} }
} }

View File

@ -0,0 +1,51 @@
import v.vmod
const quote = '\x22'
const apos = '\x27'
fn test_ok() ? {
ok_source := "Module {
name: 'V'
description: 'The V programming language.'
version: '0.2.4'
license: 'MIT'
repo_url: 'https://github.com/vlang/v'
dependencies: []
}"
for s in [ok_source, ok_source.replace(apos, quote), ok_source.replace('\n', '\r\n'),
ok_source.replace('\n', '\r\n '), ok_source.replace('\n', '\n ')] {
content := vmod.decode(s) ?
assert content.name == 'V'
assert content.description == 'The V programming language.'
assert content.version == '0.2.4'
assert content.license == 'MIT'
assert content.repo_url == 'https://github.com/vlang/v'
assert content.dependencies == []
assert content.unknown == {}
}
e := vmod.decode('Module{}') ?
assert e.name == ''
assert e.description == ''
assert e.version == ''
assert e.license == ''
assert e.repo_url == ''
assert e.dependencies == []
assert e.unknown == {}
}
fn test_invalid_start() ? {
vmod.decode('\n\nXYZ') or {
assert err.msg() == 'vmod: v.mod files should start with Module, at line 3'
return
}
assert false
}
fn test_invalid_end() ? {
vmod.decode('\nModule{\n \nname: ${quote}zzzz}') or {
assert err.msg() == 'vmod: invalid token ${quote}eof$quote, at line 4'
return
}
assert false
}