From ae3df002a21a4e888bae92ba1deba1d4fa95c86a Mon Sep 17 00:00:00 2001
From: Ned Palacios <npdoesmc@gmail.com>
Date: Mon, 18 May 2020 20:35:28 +0800
Subject: [PATCH] vmod: add v.mod parser

---
 cmd/tools/vcreate.v                         |   1 -
 v.mod                                       |   2 -
 vlib/v/tests/project_with_c_code/mod1/v.mod |   2 -
 vlib/v/tests/vmod_parser_test.v             |  42 +++
 vlib/v/vmod/parser.v                        | 275 ++++++++++++++++++++
 5 files changed, 317 insertions(+), 5 deletions(-)
 create mode 100644 vlib/v/tests/vmod_parser_test.v
 create mode 100644 vlib/v/vmod/parser.v

diff --git a/cmd/tools/vcreate.v b/cmd/tools/vcreate.v
index 356f981d45..52cf7ee07c 100644
--- a/cmd/tools/vcreate.v
+++ b/cmd/tools/vcreate.v
@@ -22,7 +22,6 @@ fn cerror(e string){
 
 fn vmod_content(name, desc string) string {
 	return  [
-		'#V Project#\n',
 		'Module {',
 		'	name: \'${name}\',',
 		'	description: \'${desc}\',',
diff --git a/v.mod b/v.mod
index 5ded5195a1..11128c3532 100644
--- a/v.mod
+++ b/v.mod
@@ -1,5 +1,3 @@
-#V Project#
-
 Module {
 	name: 'V',
 	description: 'The V programming language.',
diff --git a/vlib/v/tests/project_with_c_code/mod1/v.mod b/vlib/v/tests/project_with_c_code/mod1/v.mod
index 9a9b5dc167..72ddcfdaf2 100644
--- a/vlib/v/tests/project_with_c_code/mod1/v.mod
+++ b/vlib/v/tests/project_with_c_code/mod1/v.mod
@@ -1,5 +1,3 @@
-#V Module#
-
 Module {
 	name: 'mod1',
 	description: 'A simple module, containing C code.',
diff --git a/vlib/v/tests/vmod_parser_test.v b/vlib/v/tests/vmod_parser_test.v
new file mode 100644
index 0000000000..3a4a85b846
--- /dev/null
+++ b/vlib/v/tests/vmod_parser_test.v
@@ -0,0 +1,42 @@
+import vmod
+
+fn test_from_file() {
+	data := vmod.from_file('./v.mod') or {
+		panic(err)
+	}
+	assert data.name == 'V'
+	assert data.description == 'The V programming language.'
+	assert data.version == '0.1.27'
+	assert data.dependencies.len == 0
+}
+
+fn test_decode() {
+	content := "
+	  Module {
+		name: \'foobar\',
+		description: \'Just a sample module\'
+		version: \'0.2.0\',
+		repo_url: \'https://gitlab.com\',
+		author: \'Fooz Bar\',
+		license: \'GPL-2.0\',
+		dependencies: [\'hello\'],
+		test: \'foo\'
+	  }
+	"
+	data := vmod.decode(content) or {
+		println(err)
+		exit(1)
+	}
+	assert data.name == 'foobar'
+	assert data.version == '0.2.0'
+	assert data.description == 'Just a sample module'
+	assert data.repo_url == 'https://gitlab.com'
+	assert data.author == 'Fooz Bar'
+	assert data.license == 'GPL-2.0'
+	assert data.dependencies[0] == 'hello'
+	assert data.unknown['test'][0] == 'foo'
+	_ := vmod.decode('') or {
+		assert err == 'vmod: no content.'
+		exit(0)
+	}
+}
diff --git a/vlib/v/vmod/parser.v b/vlib/v/vmod/parser.v
new file mode 100644
index 0000000000..40ab5c3206
--- /dev/null
+++ b/vlib/v/vmod/parser.v
@@ -0,0 +1,275 @@
+module vmod
+
+import os
+
+enum TokenKind {
+	module_keyword
+	field_key
+	lcbr
+	rcbr
+	labr
+	rabr
+	comma
+	colon
+	eof
+	str
+	ident
+	unknown
+}
+
+pub struct Manifest {
+pub mut:
+	name         string
+	version      string
+	description  string
+	dependencies []string
+	license      string
+	repo_url     string
+	author       string
+	unknown      map[string][]string
+}
+
+struct Scanner {
+mut:
+	pos         int
+	text        string
+	inside_text bool
+	tokens      []Token
+}
+
+struct Parser {
+mut:
+	file_path string
+	scanner   Scanner
+}
+
+struct Token {
+	typ TokenKind
+	val string
+}
+
+pub fn from_file(vmod_path string) ?Manifest {
+	if !os.exists(vmod_path) {
+		return error('v.mod: v.mod file not found.')
+	}
+	contents := os.read_file(vmod_path) or {
+		panic('v.mod: cannot parse v.mod')
+	}
+	return decode(contents)
+}
+
+pub fn decode(contents string) ?Manifest {
+	mut parser := Parser{
+		scanner: Scanner{
+			pos: 0
+			text: contents
+		}
+	}
+	return parser.parse()
+}
+
+fn (mut s Scanner) tokenize(t_type TokenKind, val string) {
+	s.tokens << Token{t_type, val}
+}
+
+fn (mut s Scanner) skip_whitespace() {
+	for s.pos < s.text.len && s.text[s.pos].is_space() {
+		s.pos++
+	}
+}
+
+fn is_name_alpha(chr byte) bool {
+	return chr.is_letter() || chr == `_`
+}
+
+fn (mut s Scanner) create_string(q byte) string {
+	mut str := ''
+	for s.text[s.pos] != q {
+		if s.text[s.pos] == `\\` && s.text[s.pos + 1] == q {
+			str += s.text[s.pos..s.pos + 1]
+			s.pos += 2
+		} else {
+			str += s.text[s.pos].str()
+			s.pos++
+		}
+	}
+	return str
+}
+
+fn (mut s Scanner) create_ident() string {
+	mut text := ''
+	for is_name_alpha(s.text[s.pos]) {
+		text += s.text[s.pos].str()
+		s.pos++
+	}
+	return text
+}
+
+fn (s Scanner) peek_char(c byte) bool {
+	return s.pos - 1 < s.text.len && s.text[s.pos - 1] == c
+}
+
+fn (mut s Scanner) scan_all() {
+	for s.pos < s.text.len {
+		c := s.text[s.pos]
+		if c.is_space() || c == `\\` {
+			s.pos++
+			continue
+		}
+		if is_name_alpha(c) {
+			name := s.create_ident()
+			if name == 'Module' {
+				s.tokenize(.module_keyword, name)
+				s.pos++
+				continue
+			} else if s.text[s.pos] == `:` {
+				s.tokenize(.field_key, name + ':')
+				s.pos += 2
+				continue
+			} else {
+				s.tokenize(.ident, name)
+				s.pos++
+				continue
+			}
+		}
+		if c in [`\'`, `\"`] && !s.peek_char(`\\`) {
+			s.pos++
+			str := s.create_string(c)
+			s.tokenize(.str, str)
+			s.pos++
+			continue
+		}
+		match c {
+			`{` { s.tokenize(.lcbr, c.str()) }
+			`}` { s.tokenize(.rcbr, c.str()) }
+			`[` { s.tokenize(.labr, c.str()) }
+			`]` { s.tokenize(.rabr, c.str()) }
+			`:` { s.tokenize(.colon, c.str()) }
+			`,` { s.tokenize(.comma, c.str()) }
+			else { s.tokenize(.unknown, c.str()) }
+		}
+		s.pos++
+	}
+	s.tokenize(.eof, 'eof')
+}
+
+fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
+	mut vals := []string{}
+	mut idx := st_idx
+	if tokens[idx].typ != .labr {
+		return error('vmod: not a valid array')
+	}
+	idx++
+	for {
+		tok := tokens[idx]
+		match tok.typ {
+			.str {
+				vals << tok.val
+				if tokens[idx + 1].typ !in [.comma, .rabr] {
+					return error('vmod: invalid separator "${tokens[idx+1].val}"')
+				}
+				idx += if tokens[idx + 1].typ == .comma {
+					2
+				} else {
+					1
+				}
+			}
+			.rabr {
+				idx++
+				break
+			}
+			else {
+				return error('vmod: invalid token "$tok.val"')
+			}
+		}
+	}
+	return vals, idx
+}
+
+fn (mut p Parser) parse() ?Manifest {
+	err_label := 'vmod:'
+	if p.scanner.text.len == 0 {
+		return error('$err_label no content.')
+	}
+	p.scanner.scan_all()
+	tokens := p.scanner.tokens
+	mut mn := Manifest{}
+	if tokens[0].typ != .module_keyword {
+		panic('not a valid v.mod')
+	}
+	mut i := 1
+	for i < tokens.len {
+		tok := tokens[i]
+		match tok.typ {
+			.lcbr {
+				if tokens[i + 1].typ !in [.field_key, .rcbr] {
+					return error('$err_label invalid content after opening brace')
+				}
+				i++
+				continue
+			}
+			.rcbr {
+				break
+			}
+			.field_key {
+				field_name := tok.val.trim_right(':')
+				if tokens[i + 1].typ !in [.str, .labr] {
+					return error('$err_label value of field "$field_name" must be either string or an array of strings')
+				}
+				field_value := tokens[i + 1].val
+				match field_name {
+					'name' {
+						mn.name = field_value
+					}
+					'version' {
+						mn.version = field_value
+					}
+					'license' {
+						mn.license = field_value
+					}
+					'repo_url' {
+						mn.repo_url = field_value
+					}
+					'description' {
+						mn.description = field_value
+					}
+					'author' {
+						mn.author = field_value
+					}
+					'dependencies' {
+						deps, idx := get_array_content(tokens, i + 1) or {
+							return error(err)
+						}
+						mn.dependencies = deps
+						i = idx
+						continue
+					}
+					else {
+						if tokens[i + 1].typ == .labr {
+							vals, idx := get_array_content(tokens, i + 1) or {
+								return error(err)
+							}
+							mn.unknown[field_name] = vals
+							i = idx
+							continue
+						}
+						mn.unknown[field_name] = [field_value]
+					}
+				}
+				i += 2
+				continue
+			}
+			.comma {
+				if tokens[i - 1].typ !in [.str, .rabr] || tokens[i + 1].typ != .field_key {
+					return error('$err_label invalid comma placement')
+				}
+				i++
+				continue
+			}
+			else {
+				return error('$err_label invalid token "$tok.val"')
+			}
+		}
+	}
+	return mn
+}