From da43267e09e749e02dfd1578f6f29123629f24fa Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Thu, 26 Sep 2019 05:28:43 +0300 Subject: [PATCH] parser: cached tokens (second step) --- compiler/comptime.v | 53 +++++++++++++++++++++++++-------- compiler/fn.v | 32 +++++++++----------- compiler/main.v | 54 ++++++++++++++++++++++++--------- compiler/parser.v | 35 ++++++++++++++++++---- compiler/scanner.v | 56 ++++------------------------------- compiler/table.v | 15 ++++++++++ compiler/tests/str_gen_test.v | 6 ++-- september.plan | 2 +- 8 files changed, 149 insertions(+), 104 deletions(-) diff --git a/compiler/comptime.v b/compiler/comptime.v index e6b9fad910..d0dd9e2b25 100644 --- a/compiler/comptime.v +++ b/compiler/comptime.v @@ -7,6 +7,7 @@ module main import ( vweb.tmpl // for `$vweb_html()` os + strings ) fn (p mut Parser) comp_time() { @@ -244,21 +245,47 @@ fn (p mut Parser) gen_array_str(typ Type) { !p.table.type_has_method(elm_type2, 'str') { p.error('cant print ${elm_type}[], unhandled print of ${elm_type}') } - p.cgen.fns << ' -string ${typ.name}_str($typ.name a) { - strings__Builder sb = strings__new_builder(a.len * 3); - strings__Builder_write(&sb, tos2("[")) ; - for (int i = 0; i < a.len; i++) { - strings__Builder_write(&sb, ${elm_type}_str( (($elm_type *) a.data)[i])); - - if (i < a.len - 1) { - strings__Builder_write(&sb, tos2(", ")) ; - + p.v.vgen_file.writeln(' +fn (a $typ.name) str() string { + mut sb := strings.new_builder(a.len * 3) + sb.write("[") + for i, elm in a { + sb.write(elm.str()) + if i < a.len - 1 { + sb.write(", ") + } } + sb.write("]") + return sb.str() } -strings__Builder_write(&sb, tos2("]")) ; -return strings__Builder_str(sb); -} ' + ') + p.cgen.fns << 'string ${typ.name}_str();' +} + +// `Foo { bar: 3, baz: 'hi' }` => '{ bar: 3, baz: "hi" }' +fn (p mut Parser) gen_struct_str(typ Type) { + p.add_method(typ.name, Fn{ + name: 'str' + typ: 'string' + args: [Var{typ: typ.name, is_arg:true}] + is_method: true + is_public: true + receiver_typ: typ.name + }) + + mut sb := strings.new_builder(typ.fields.len * 20) + sb.writeln('fn (a $typ.name) str() string {\nreturn') + sb.writeln("'{") + for field in typ.fields { + sb.writeln('\t$field.name: \$a.${field.name}') + } + sb.writeln("\n}'") + sb.writeln('}') + p.v.vgen_file.writeln(sb.str()) + // Need to manually add the definition to `fns` so that it stays + // at the top of the file. + // This function will get parsee by V after the main pass. + p.cgen.fns << 'string ${typ.name}_str();' } diff --git a/compiler/fn.v b/compiler/fn.v index 8b0bb41b86..a1debd4b28 100644 --- a/compiler/fn.v +++ b/compiler/fn.v @@ -127,8 +127,12 @@ fn (p mut Parser) register_var(v Var) { fn (p mut Parser) clear_vars() { // shared a := [1, 2, 3] p.var_idx = 0 - p.local_vars.free() - p.local_vars = []Var + if p.local_vars.len > 0 { + if p.pref.autofree { + p.local_vars.free() + } + p.local_vars = []Var + } } // vlib header file? @@ -173,7 +177,8 @@ fn (p mut Parser) fn_decl() { p.error('invalid receiver type `$receiver_typ` (`$receiver_typ` is an interface)') } // Don't allow modifying types from a different module - if !p.first_pass() && !p.builtin_mod && T.mod != p.mod { + if !p.first_pass() && !p.builtin_mod && T.mod != p.mod && + !p.fileis(vgen_file_name) { // allow .str() on builtin arrays println('T.mod=$T.mod') println('p.mod=$p.mod') p.error('cannot define new methods on non-local type `$receiver_typ`') @@ -918,21 +923,13 @@ fn (p mut Parser) fn_call_args(f mut Fn) &Fn { p.cgen.set_placeholder(ph, '${typ}_str(') p.gen(')') continue - } + } else if T.cat == .struct_ { + p.gen_struct_str(T) + p.cgen.set_placeholder(ph, '${typ}_str(') + p.gen(')') + continue + } error_msg := ('`$typ` needs to have method `str() string` to be printable') - if T.fields.len > 0 { - mut index := p.cgen.cur_line.len - 1 - for index > 0 && p.cgen.cur_line[index - 1] != `(` { index-- } - name := p.cgen.cur_line.right(index + 1) - if name == '}' { - p.error(error_msg) - } - p.cgen.resetln(p.cgen.cur_line.left(index)) - p.scanner.create_type_string(T, name) - p.cgen.cur_line.replace(typ, '') - p.next() - return p.fn_call_args(mut f) - } p.error(error_msg) } p.cgen.set_placeholder(ph, '${typ}_str(') @@ -1029,7 +1026,6 @@ fn (p mut Parser) fn_call_args(f mut Fn) &Fn { p.error('wrong number of arguments for fn `$f.name`: expected $f.args.len, but got more') } p.check(.rpar) - // p.gen(')') return f // TODO is return f right? } diff --git a/compiler/main.v b/compiler/main.v index 952a82b57a..8115510789 100644 --- a/compiler/main.v +++ b/compiler/main.v @@ -74,6 +74,7 @@ mut: vroot string mod string // module being built with -lib parsers []Parser + vgen_file os.File } struct Preferences { @@ -153,6 +154,8 @@ fn main() { vfmt(args) return } + + // Construct the V object from command line arguments mut v := new_v(args) if args.join(' ').contains(' test v') { @@ -223,13 +226,9 @@ fn (v mut V) compile() { if os.user_os() != 'windows' && v.os == .msvc { verror('Cannot build with msvc on ${os.user_os()}') } - + mut cgen := v.cgen cgen.genln('// Generated by V') - // Add builtin parsers - for i, file in v.files { - // v.parsers << v.new_parser(file) - } if v.pref.is_verbose { println('all .v files before:') println(v.files) @@ -239,11 +238,24 @@ fn (v mut V) compile() { println('all .v files:') println(v.files) } + if v.pref.is_debug { + println('\nparsers:') + for q in v.parsers { + println(q.file_name) + } + println('\nfiles:') + for q in v.files { + println(q) + } + } // First pass (declarations) for file in v.files { - mut p := v.new_parser(file) - p.parse(.decl) - //if p.pref.autofree { p.scanner.text.free() free(p.scanner) } + for i, p in v.parsers { + if p.file_path == file { + v.parsers[i].parse(.decl) + break + } + } } // Main pass cgen.pass = Pass.main @@ -279,13 +291,15 @@ fn (v mut V) compile() { v.dir.contains('/ui'))) { cgen.genln('id defaultFont = 0; // main.v') } - // We need the cjson header for all the json decoding user will do in default mode + // We need the cjson header for all the json decoding that will be done in + // default mode if v.pref.build_mode == .default_mode { if imports_json { cgen.genln('#include "cJSON.h"') } } if v.pref.build_mode == .embed_vlib || v.pref.build_mode == .default_mode { + //if v.pref.build_mode in [.embed_vlib, .default_mode] { // If we declare these for all modes, then when running `v a.v` we'll get // `/usr/bin/ld: multiple definition of 'total_m'` // TODO @@ -294,7 +308,7 @@ fn (v mut V) compile() { $if !js { cgen.genln('int g_test_ok = 1; ') } - if 'json' in v.table.imports { + if imports_json { cgen.genln(' #define js_get(object, key) cJSON_GetObjectItemCaseSensitive((object), (key)) ') @@ -307,8 +321,12 @@ fn (v mut V) compile() { cgen.genln('this line will be replaced with definitions') defs_pos := cgen.lines.len - 1 for file in v.files { - mut p := v.new_parser(file) - p.parse(.main) + for i, p in v.parsers { + if p.file_path == file { + v.parsers[i].parse(.main) + break + } + } //if p.pref.autofree { p.scanner.text.free() free(p.scanner) } // p.g.gen_x64() // Format all files (don't format automatically generated vlib headers) @@ -316,6 +334,10 @@ fn (v mut V) compile() { // new vfmt is not ready yet } } + // Close the file with generated V code (str() methods etc) and parse it + v.vgen_file.close() + mut vgen_parser := v.new_parser(vgen_file_name) + vgen_parser.parse(.main) v.log('Done parsing.') // Write everything mut d := strings.new_builder(10000)// Avoid unnecessary allocations @@ -335,8 +357,7 @@ fn (v mut V) compile() { d.writeln('; // Prof counters:') d.writeln(v.prof_counters()) } - dd := d.str() - cgen.lines[defs_pos] = dd// TODO `def.str()` doesn't compile + cgen.lines[defs_pos] = d.str() v.generate_main() v.generate_hot_reload_code() if v.pref.is_verbose { @@ -716,6 +737,10 @@ fn (v &V) log(s string) { } fn new_v(args[]string) &V { + os.rm(vgen_file_name) + vgen_file := os.open_append(vgen_file_name) or { panic(err) } + vgen_file.writeln('module main\nimport strings') + joined_args := args.join(' ') target_os := get_arg(joined_args, 'os', '') mut out_name := get_arg(joined_args, 'o', 'a.out') @@ -907,6 +932,7 @@ fn new_v(args[]string) &V { vroot: vroot pref: pref mod: mod + vgen_file: vgen_file } } diff --git a/compiler/parser.v b/compiler/parser.v index da61c9c93b..cf9547097d 100644 --- a/compiler/parser.v +++ b/compiler/parser.v @@ -9,12 +9,17 @@ import ( strings ) +const ( + vgen_file_name = 'vgen.tmp' +) + // TODO rename to Token // TODO rename enum Token to TokenType struct Tok { - tok Token - lit string - line_nr int + tok Token + lit string + line_nr int + name_idx int // name table index for O(1) lookup // col int } @@ -103,6 +108,8 @@ fn (v mut V) new_parser(path string) Parser { break } } + + //vgen_file := os.open_append(vgen_file_name) or { panic(err) } mut p := Parser { v: v @@ -120,7 +127,6 @@ fn (v mut V) new_parser(path string) Parser { os: v.os vroot: v.vroot local_vars: [Var{}].repeat(MaxLocalVars) - } $if js { p.is_js = true @@ -144,7 +150,15 @@ fn (v mut V) new_parser(path string) Parser { break } } - + + v.add_parser(p) + /* + if !(p in v.parsers) { + v.parsers << p + + } + */ + //p.next() //p.scanner.debug_tokens() return p @@ -202,6 +216,7 @@ fn (p & Parser) peek() Token { +/* fn (p mut Parser) next_old() { p.prev_tok2 = p.prev_tok p.prev_tok = p.tok @@ -210,6 +225,7 @@ fn (p mut Parser) next_old() { p.tok = res.tok p.lit = res.lit } +*/ fn (p &Parser) log(s string) { /* @@ -423,7 +439,7 @@ fn (p mut Parser) import_statement() { if p.tok != .name { p.error('bad import format') } - if p.peek() == .number && p.scanner.text[p.scanner.pos + 1] == `.` { + if p.peek() == .number { // && p.scanner.text[p.scanner.pos + 1] == `.` { p.error('bad import format. module/submodule names cannot begin with a number') } mut mod := p.check_name().trim_space() @@ -842,15 +858,18 @@ fn (p mut Parser) check(expected Token) { print_backtrace() p.error(s) } + /* if expected == .rcbr { p.fmt_dec() } p.fgen(p.strtok()) // vfmt: increase indentation on `{` unless it's `{}` + // TODO if expected == .lcbr && p.scanner.pos + 1 < p.scanner.text.len && p.scanner.text[p.scanner.pos + 1] != `}` { p.fgenln('') p.fmt_inc() } + */ p.next() if p.scanner.line_comment != '' { @@ -3896,6 +3915,10 @@ fn (p mut Parser) check_and_register_used_imported_type(typ_name string) { } fn (p mut Parser) check_unused_imports() { + // Don't run in the generated V file with `.str()` + if p.fileis(vgen_file_name) { + return + } mut output := '' for alias, mod in p.import_table.imports { if !p.import_table.is_used_import(alias) { diff --git a/compiler/scanner.v b/compiler/scanner.v index a790d2409d..dae1b67b5b 100644 --- a/compiler/scanner.v +++ b/compiler/scanner.v @@ -38,11 +38,11 @@ mut: fn new_scanner(file_path string) &Scanner { if !os.file_exists(file_path) { - verror('"$file_path" doesn\'t exist') + verror("$file_path doesn't exist") } mut raw_text := os.read_file(file_path) or { - verror('scanner: failed to open "$file_path"') + verror('scanner: failed to open $file_path') return 0 } @@ -660,14 +660,15 @@ fn (s &Scanner) error(msg string) { println(pointerline) } fullpath := os.realpath( s.file_path ) + _ = fullpath // The filepath:line:col: format is the default C compiler // error output format. It allows editors and IDE's like // emacs to quickly find the errors in the output // and jump to their source with a keyboard shortcut. // Using only the filename leads to inability of IDE/editors // to find the source file, when it is in another folder. - //println('${s.file_path}:${s.line_nr + 1}:${column+1}: $msg') - println('${fullpath}:${s.line_nr + 1}:${column+1}: $msg') + println('${s.file_path}:${s.line_nr + 1}:${column+1}: $msg') + //println('${fullpath}:${s.line_nr + 1}:${column+1}: $msg') exit(1) } @@ -834,53 +835,8 @@ fn is_nl(c byte) bool { return c == `\r` || c == `\n` } -fn (s &Scanner) get_opening_bracket() int { - mut pos := s.pos - mut parentheses := 0 - mut inside_string := false - - for pos > 0 && s.text[pos] != `\n` { - if s.text[pos] == `)` && !inside_string { - parentheses++ - } - if s.text[pos] == `(` && !inside_string { - parentheses-- - } - if s.text[pos] == `\'` && s.text[pos - 1] != `\\` && s.text[pos - 1] != `\`` { // ` // apostrophe balance comment. do not remove - inside_string = !inside_string - } - if parentheses == 0 { - break - } - pos-- - } - return pos -} - -// Foo { bar: 3, baz: 'hi' } => '{ bar: 3, baz: "hi" }' -fn (s mut Scanner) create_type_string(T Type, name string) { - line := s.line_nr - inside_string := s.inside_string - mut newtext := '\'{ ' - start := s.get_opening_bracket() + 1 - end := s.pos - for i, field in T.fields { - if i != 0 { - newtext += ', ' - } - newtext += '$field.name: ' + '$${name}.${field.name}' - } - newtext += ' }\'' - s.text = s.text.substr(0, start) + newtext + s.text.substr(end, s.text.len) - s.pos = start - 2 - s.line_nr = line - s.inside_string = inside_string -} - fn contains_capital(s string) bool { - // for c in s { - for i := 0; i < s.len; i++ { - c := s[i] + for c in s { if c >= `A` && c <= `Z` { return true } diff --git a/compiler/table.v b/compiler/table.v index 172ec57329..409ab31e40 100644 --- a/compiler/table.v +++ b/compiler/table.v @@ -20,8 +20,23 @@ mut: cflags []CFlag // ['-framework Cocoa', '-lglfw3'] fn_cnt int //atomic obfuscate bool + //names []Name } + +/* +enum NameCategory { + constant + mod + var + typ +} + +struct Name { + cat NameCategory +} +*/ + struct GenTable { fn_name string mut: diff --git a/compiler/tests/str_gen_test.v b/compiler/tests/str_gen_test.v index 200796a9ed..3bb31ef8ae 100644 --- a/compiler/tests/str_gen_test.v +++ b/compiler/tests/str_gen_test.v @@ -1,9 +1,11 @@ struct Foo { - a int + number int + str string + f f64 } fn test_array_str() { - f := Foo{34} + f := Foo{34, 'hello', 1.2} println(f) //s := f.str() //println(s) diff --git a/september.plan b/september.plan index c7cb654d22..69d5efeed3 100644 --- a/september.plan +++ b/september.plan @@ -7,7 +7,7 @@ - remove all compiler memory leaks - fix child function calls + fix non-ascii rendering in gg (ä, å, etc) -- cache all tokens once ++ cache all tokens once - enable vfmt - bring back vdoc and regenerate all module docs - optimize the parser (reduce map lookups)