From ae3df002a21a4e888bae92ba1deba1d4fa95c86a Mon Sep 17 00:00:00 2001 From: Ned Palacios Date: Mon, 18 May 2020 20:35:28 +0800 Subject: [PATCH] vmod: add v.mod parser --- cmd/tools/vcreate.v | 1 - v.mod | 2 - vlib/v/tests/project_with_c_code/mod1/v.mod | 2 - vlib/v/tests/vmod_parser_test.v | 42 +++ vlib/v/vmod/parser.v | 275 ++++++++++++++++++++ 5 files changed, 317 insertions(+), 5 deletions(-) create mode 100644 vlib/v/tests/vmod_parser_test.v create mode 100644 vlib/v/vmod/parser.v diff --git a/cmd/tools/vcreate.v b/cmd/tools/vcreate.v index 356f981d45..52cf7ee07c 100644 --- a/cmd/tools/vcreate.v +++ b/cmd/tools/vcreate.v @@ -22,7 +22,6 @@ fn cerror(e string){ fn vmod_content(name, desc string) string { return [ - '#V Project#\n', 'Module {', ' name: \'${name}\',', ' description: \'${desc}\',', diff --git a/v.mod b/v.mod index 5ded5195a1..11128c3532 100644 --- a/v.mod +++ b/v.mod @@ -1,5 +1,3 @@ -#V Project# - Module { name: 'V', description: 'The V programming language.', diff --git a/vlib/v/tests/project_with_c_code/mod1/v.mod b/vlib/v/tests/project_with_c_code/mod1/v.mod index 9a9b5dc167..72ddcfdaf2 100644 --- a/vlib/v/tests/project_with_c_code/mod1/v.mod +++ b/vlib/v/tests/project_with_c_code/mod1/v.mod @@ -1,5 +1,3 @@ -#V Module# - Module { name: 'mod1', description: 'A simple module, containing C code.', diff --git a/vlib/v/tests/vmod_parser_test.v b/vlib/v/tests/vmod_parser_test.v new file mode 100644 index 0000000000..3a4a85b846 --- /dev/null +++ b/vlib/v/tests/vmod_parser_test.v @@ -0,0 +1,42 @@ +import vmod + +fn test_from_file() { + data := vmod.from_file('./v.mod') or { + panic(err) + } + assert data.name == 'V' + assert data.description == 'The V programming language.' + assert data.version == '0.1.27' + assert data.dependencies.len == 0 +} + +fn test_decode() { + content := " + Module { + name: \'foobar\', + description: \'Just a sample module\' + version: \'0.2.0\', + repo_url: \'https://gitlab.com\', + author: \'Fooz Bar\', + license: \'GPL-2.0\', + dependencies: [\'hello\'], + test: \'foo\' + } + " + data := vmod.decode(content) or { + println(err) + exit(1) + } + assert data.name == 'foobar' + assert data.version == '0.2.0' + assert data.description == 'Just a sample module' + assert data.repo_url == 'https://gitlab.com' + assert data.author == 'Fooz Bar' + assert data.license == 'GPL-2.0' + assert data.dependencies[0] == 'hello' + assert data.unknown['test'][0] == 'foo' + _ := vmod.decode('') or { + assert err == 'vmod: no content.' + exit(0) + } +} diff --git a/vlib/v/vmod/parser.v b/vlib/v/vmod/parser.v new file mode 100644 index 0000000000..40ab5c3206 --- /dev/null +++ b/vlib/v/vmod/parser.v @@ -0,0 +1,275 @@ +module vmod + +import os + +enum TokenKind { + module_keyword + field_key + lcbr + rcbr + labr + rabr + comma + colon + eof + str + ident + unknown +} + +pub struct Manifest { +pub mut: + name string + version string + description string + dependencies []string + license string + repo_url string + author string + unknown map[string][]string +} + +struct Scanner { +mut: + pos int + text string + inside_text bool + tokens []Token +} + +struct Parser { +mut: + file_path string + scanner Scanner +} + +struct Token { + typ TokenKind + val string +} + +pub fn from_file(vmod_path string) ?Manifest { + if !os.exists(vmod_path) { + return error('v.mod: v.mod file not found.') + } + contents := os.read_file(vmod_path) or { + panic('v.mod: cannot parse v.mod') + } + return decode(contents) +} + +pub fn decode(contents string) ?Manifest { + mut parser := Parser{ + scanner: Scanner{ + pos: 0 + text: contents + } + } + return parser.parse() +} + +fn (mut s Scanner) tokenize(t_type TokenKind, val string) { + s.tokens << Token{t_type, val} +} + +fn (mut s Scanner) skip_whitespace() { + for s.pos < s.text.len && s.text[s.pos].is_space() { + s.pos++ + } +} + +fn is_name_alpha(chr byte) bool { + return chr.is_letter() || chr == `_` +} + +fn (mut s Scanner) create_string(q byte) string { + mut str := '' + for s.text[s.pos] != q { + if s.text[s.pos] == `\\` && s.text[s.pos + 1] == q { + str += s.text[s.pos..s.pos + 1] + s.pos += 2 + } else { + str += s.text[s.pos].str() + s.pos++ + } + } + return str +} + +fn (mut s Scanner) create_ident() string { + mut text := '' + for is_name_alpha(s.text[s.pos]) { + text += s.text[s.pos].str() + s.pos++ + } + return text +} + +fn (s Scanner) peek_char(c byte) bool { + return s.pos - 1 < s.text.len && s.text[s.pos - 1] == c +} + +fn (mut s Scanner) scan_all() { + for s.pos < s.text.len { + c := s.text[s.pos] + if c.is_space() || c == `\\` { + s.pos++ + continue + } + if is_name_alpha(c) { + name := s.create_ident() + if name == 'Module' { + s.tokenize(.module_keyword, name) + s.pos++ + continue + } else if s.text[s.pos] == `:` { + s.tokenize(.field_key, name + ':') + s.pos += 2 + continue + } else { + s.tokenize(.ident, name) + s.pos++ + continue + } + } + if c in [`\'`, `\"`] && !s.peek_char(`\\`) { + s.pos++ + str := s.create_string(c) + s.tokenize(.str, str) + s.pos++ + continue + } + match c { + `{` { s.tokenize(.lcbr, c.str()) } + `}` { s.tokenize(.rcbr, c.str()) } + `[` { s.tokenize(.labr, c.str()) } + `]` { s.tokenize(.rabr, c.str()) } + `:` { s.tokenize(.colon, c.str()) } + `,` { s.tokenize(.comma, c.str()) } + else { s.tokenize(.unknown, c.str()) } + } + s.pos++ + } + s.tokenize(.eof, 'eof') +} + +fn get_array_content(tokens []Token, st_idx int) ?([]string, int) { + mut vals := []string{} + mut idx := st_idx + if tokens[idx].typ != .labr { + return error('vmod: not a valid array') + } + idx++ + for { + tok := tokens[idx] + match tok.typ { + .str { + vals << tok.val + if tokens[idx + 1].typ !in [.comma, .rabr] { + return error('vmod: invalid separator "${tokens[idx+1].val}"') + } + idx += if tokens[idx + 1].typ == .comma { + 2 + } else { + 1 + } + } + .rabr { + idx++ + break + } + else { + return error('vmod: invalid token "$tok.val"') + } + } + } + return vals, idx +} + +fn (mut p Parser) parse() ?Manifest { + err_label := 'vmod:' + if p.scanner.text.len == 0 { + return error('$err_label no content.') + } + p.scanner.scan_all() + tokens := p.scanner.tokens + mut mn := Manifest{} + if tokens[0].typ != .module_keyword { + panic('not a valid v.mod') + } + mut i := 1 + for i < tokens.len { + tok := tokens[i] + match tok.typ { + .lcbr { + if tokens[i + 1].typ !in [.field_key, .rcbr] { + return error('$err_label invalid content after opening brace') + } + i++ + continue + } + .rcbr { + break + } + .field_key { + field_name := tok.val.trim_right(':') + if tokens[i + 1].typ !in [.str, .labr] { + return error('$err_label value of field "$field_name" must be either string or an array of strings') + } + field_value := tokens[i + 1].val + match field_name { + 'name' { + mn.name = field_value + } + 'version' { + mn.version = field_value + } + 'license' { + mn.license = field_value + } + 'repo_url' { + mn.repo_url = field_value + } + 'description' { + mn.description = field_value + } + 'author' { + mn.author = field_value + } + 'dependencies' { + deps, idx := get_array_content(tokens, i + 1) or { + return error(err) + } + mn.dependencies = deps + i = idx + continue + } + else { + if tokens[i + 1].typ == .labr { + vals, idx := get_array_content(tokens, i + 1) or { + return error(err) + } + mn.unknown[field_name] = vals + i = idx + continue + } + mn.unknown[field_name] = [field_value] + } + } + i += 2 + continue + } + .comma { + if tokens[i - 1].typ !in [.str, .rabr] || tokens[i + 1].typ != .field_key { + return error('$err_label invalid comma placement') + } + i++ + continue + } + else { + return error('$err_label invalid token "$tok.val"') + } + } + } + return mn +}