v.vmod: make the v.mod parser more robust, show line numbers in errors, add tests

2022-03-18 21:50:34 +02:00 · 2022-03-18 21:50:34 +02:00 · 1734e72638
parent 5f79fa8a30
commit 1734e72638
3 changed files with 74 additions and 17 deletions
--- a/vlib/v/gen/c/comptime.v
+++ b/vlib/v/gen/c/comptime.v
@ -203,7 +203,7 @@ fn cgen_attrs(attrs []ast.Attr) []string {

 fn (mut g Gen) comptime_at(node ast.AtExpr) {
 	if node.kind == .vmod_file {
-		val := cnewlines(node.val.replace('\r', ''))
+		val := cescape_nonascii(util.smart_quote(node.val, false))
 		g.write('_SLIT("$val")')
 	} else {
 		val := node.val.replace('\\', '\\\\')
--- a/vlib/v/vmod/parser.v
+++ b/vlib/v/vmod/parser.v
@ -2,6 +2,8 @@ module vmod

 import os

+const err_label = 'vmod:'
+
 enum TokenKind {
 	module_keyword
 	field_key
@ -32,6 +34,7 @@ pub mut:
 struct Scanner {
 mut:
 	pos         int
+	line        int = 1
 	text        string
 	inside_text bool
 	tokens      []Token
@ -44,8 +47,9 @@ mut:
 }

 struct Token {
-	typ TokenKind
-	val string
+	typ  TokenKind
+	val  string
+	line int
 }

 pub fn from_file(vmod_path string) ?Manifest {
@ -67,7 +71,7 @@ pub fn decode(contents string) ?Manifest {
 }

 fn (mut s Scanner) tokenize(t_type TokenKind, val string) {
-	s.tokens << Token{t_type, val}
+	s.tokens << Token{t_type, val, s.line}
 }

 fn (mut s Scanner) skip_whitespace() {
@ -82,7 +86,7 @@ fn is_name_alpha(chr byte) bool {

 fn (mut s Scanner) create_string(q byte) string {
 	mut str := ''
-	for s.text[s.pos] != q {
+	for s.pos < s.text.len && s.text[s.pos] != q {
 		if s.text[s.pos] == `\\` && s.text[s.pos + 1] == q {
 			str += s.text[s.pos..s.pos + 1]
 			s.pos += 2
@ -96,7 +100,7 @@ fn (mut s Scanner) create_string(q byte) string {

 fn (mut s Scanner) create_ident() string {
 	mut text := ''
-	for is_name_alpha(s.text[s.pos]) {
+	for s.pos < s.text.len && is_name_alpha(s.text[s.pos]) {
 		text += s.text[s.pos].ascii_str()
 		s.pos++
 	}
@ -112,6 +116,9 @@ fn (mut s Scanner) scan_all() {
 		c := s.text[s.pos]
 		if c.is_space() || c == `\\` {
 			s.pos++
+			if c == `\n` {
+				s.line++
+			}
 			continue
 		}
 		if is_name_alpha(c) {
@ -120,7 +127,7 @@ fn (mut s Scanner) scan_all() {
 				s.tokenize(.module_keyword, name)
 				s.pos++
 				continue
-			} else if s.text[s.pos] == `:` {
+			} else if s.pos < s.text.len && s.text[s.pos] == `:` {
 				s.tokenize(.field_key, name + ':')
 				s.pos += 2
 				continue
@ -155,7 +162,7 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
 	mut vals := []string{}
 	mut idx := st_idx
 	if tokens[idx].typ != .labr {
-		return error('vmod: not a valid array')
+		return error('$vmod.err_label not a valid array, at line ${tokens[idx].line}')
 	}
 	idx++
 	for {
@ -164,7 +171,7 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
 			.str {
 				vals << tok.val
 				if tokens[idx + 1].typ !in [.comma, .rabr] {
-					return error('vmod: invalid separator "${tokens[idx + 1].val}"')
+					return error('$vmod.err_label invalid separator "${tokens[idx + 1].val}", at line $tok.line')
 				}
 				idx += if tokens[idx + 1].typ == .comma { 2 } else { 1 }
 			}
@ -173,7 +180,7 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
 				break
 			}
 			else {
-				return error('vmod: invalid token "$tok.val"')
+				return error('$vmod.err_label invalid token "$tok.val", at line $tok.line')
 			}
 		}
 	}
@ -181,15 +188,14 @@ fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
 }

 fn (mut p Parser) parse() ?Manifest {
-	err_label := 'vmod:'
 	if p.scanner.text.len == 0 {
-		return error('$err_label no content.')
+		return error('$vmod.err_label no content.')
 	}
 	p.scanner.scan_all()
 	tokens := p.scanner.tokens
 	mut mn := Manifest{}
 	if tokens[0].typ != .module_keyword {
-		return error('vmod: v.mod files should start with Module')
+		return error('$vmod.err_label v.mod files should start with Module, at line ${tokens[0].line}')
 	}
 	mut i := 1
 	for i < tokens.len {
@ -197,7 +203,7 @@ fn (mut p Parser) parse() ?Manifest {
 		match tok.typ {
 			.lcbr {
 				if tokens[i + 1].typ !in [.field_key, .rcbr] {
-					return error('$err_label invalid content after opening brace')
+					return error('$vmod.err_label invalid content after opening brace, at line $tok.line')
 				}
 				i++
 				continue
@ -208,7 +214,7 @@ fn (mut p Parser) parse() ?Manifest {
 			.field_key {
 				field_name := tok.val.trim_right(':')
 				if tokens[i + 1].typ !in [.str, .labr] {
-					return error('$err_label value of field "$field_name" must be either string or an array of strings')
+					return error('$vmod.err_label value of field "$field_name" must be either string or an array of strings, at line $tok.line')
 				}
 				field_value := tokens[i + 1].val
 				match field_name {
@ -251,13 +257,13 @@ fn (mut p Parser) parse() ?Manifest {
 			}
 			.comma {
 				if tokens[i - 1].typ !in [.str, .rabr] || tokens[i + 1].typ != .field_key {
-					return error('$err_label invalid comma placement')
+					return error('$vmod.err_label invalid comma placement, at line $tok.line')
 				}
 				i++
 				continue
 			}
 			else {
-				return error('$err_label invalid token "$tok.val"')
+				return error('$vmod.err_label invalid token "$tok.val", at line $tok.line')
 			}
 		}
 	}
--- a/vlib/v/vmod/parser_test.v
+++ b/vlib/v/vmod/parser_test.v
@ -0,0 +1,51 @@
+import v.vmod
+
+const quote = '\x22'
+
+const apos = '\x27'
+
+fn test_ok() ? {
+	ok_source := "Module {
+	name: 'V'
+	description: 'The V programming language.'
+	version: '0.2.4'
+	license: 'MIT'
+	repo_url: 'https://github.com/vlang/v'
+	dependencies: []
+}"
+	for s in [ok_source, ok_source.replace(apos, quote), ok_source.replace('\n', '\r\n'),
+		ok_source.replace('\n', '\r\n '), ok_source.replace('\n', '\n ')] {
+		content := vmod.decode(s) ?
+		assert content.name == 'V'
+		assert content.description == 'The V programming language.'
+		assert content.version == '0.2.4'
+		assert content.license == 'MIT'
+		assert content.repo_url == 'https://github.com/vlang/v'
+		assert content.dependencies == []
+		assert content.unknown == {}
+	}
+	e := vmod.decode('Module{}') ?
+	assert e.name == ''
+	assert e.description == ''
+	assert e.version == ''
+	assert e.license == ''
+	assert e.repo_url == ''
+	assert e.dependencies == []
+	assert e.unknown == {}
+}
+
+fn test_invalid_start() ? {
+	vmod.decode('\n\nXYZ') or {
+		assert err.msg() == 'vmod: v.mod files should start with Module, at line 3'
+		return
+	}
+	assert false
+}
+
+fn test_invalid_end() ? {
+	vmod.decode('\nModule{\n \nname: ${quote}zzzz}') or {
+		assert err.msg() == 'vmod: invalid token ${quote}eof$quote, at line 4'
+		return
+	}
+	assert false
+}