From 9712213f50e34e411490eac30284b7b35ccdc85f Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Tue, 19 Nov 2019 09:53:52 +0300 Subject: [PATCH] x64 machine code generation (ELF) --- examples/x64/hello_world.v | 6 + v.v | 11 +- vlib/builtin/bare/string_bare.v | 4 + vlib/compiler/fn.v | 20 +++- vlib/compiler/for.v | 13 +++ vlib/compiler/gen_x64.v | 25 ++++ vlib/compiler/main.v | 12 +- vlib/compiler/parser.v | 7 ++ vlib/compiler/struct.v | 4 +- vlib/compiler/x64/elf.v | 96 +++++++++++++++ vlib/compiler/x64/elf_obj.v | 161 ++++++++++++++++++++++++++ vlib/compiler/x64/gen.v | 199 ++++++++++++++++++++++++++++++++ vlib/os/os.v | 21 ++++ 13 files changed, 564 insertions(+), 15 deletions(-) create mode 100644 examples/x64/hello_world.v create mode 100644 vlib/compiler/gen_x64.v create mode 100644 vlib/compiler/x64/elf.v create mode 100644 vlib/compiler/x64/elf_obj.v create mode 100644 vlib/compiler/x64/gen.v diff --git a/examples/x64/hello_world.v b/examples/x64/hello_world.v new file mode 100644 index 0000000000..5c77122f7f --- /dev/null +++ b/examples/x64/hello_world.v @@ -0,0 +1,6 @@ +fn main() { + for _ in 0..5 { + println('Hello world from V x64 machine code generator!') + } + println('Hello again!') +} diff --git a/v.v b/v.v index bdf975d695..b34832ab11 100755 --- a/v.v +++ b/v.v @@ -95,31 +95,30 @@ fn main() { //println('unknown command/argument\n') //println(compiler.help_text) } - // Construct the V object from command line arguments mut v := compiler.new_v(args) if v.pref.is_verbose { println(args) } - if 'run' in args { // always recompile for now, too error prone to skip recompilation otherwise // for example for -repl usage, especially when piping lines to v v.compile() v.run_compiled_executable_and_exit() } - mut tmark := benchmark.new_benchmark() - v.compile() + if v.pref.x64 { + v.compile_x64() + } else { + v.compile() + } if v.pref.is_stats { tmark.stop() println( 'compilation took: ' + tmark.total_duration().str() + 'ms') } - if v.pref.is_test { v.run_compiled_executable_and_exit() } - v.finalize_compilation() } diff --git a/vlib/builtin/bare/string_bare.v b/vlib/builtin/bare/string_bare.v index 32a03aa33d..f323bdcc72 100644 --- a/vlib/builtin/bare/string_bare.v +++ b/vlib/builtin/bare/string_bare.v @@ -53,6 +53,10 @@ pub fn tos3(s *C.char) string { } } +pub fn println(s string) { + +} + /* pub fn (a string) clone() string { mut b := string { diff --git a/vlib/compiler/fn.v b/vlib/compiler/fn.v index 1d608b0b5a..80a0adbbd9 100644 --- a/vlib/compiler/fn.v +++ b/vlib/compiler/fn.v @@ -283,9 +283,10 @@ fn (p mut Parser) fn_decl() { // C function header def? (fn C.NSMakeRect(int,int,int,int)) is_c := f.name == 'C' && p.tok == .dot // Just fn signature? only builtin.v + default build mode - if p.is_vh { - //println('\n\nfn_decl() name=$f.name receiver_typ=$receiver_typ nogen=$p.cgen.nogen') - } + //if p.is_vh { + //if f.name == 'main' { + //println('\n\nfn_decl() name=$f.name pass=$p.pass $p.file_name receiver_typ=$receiver_typ nogen=$p.cgen.nogen') + //} if is_c { p.check(.dot) f.name = p.check_name() @@ -369,7 +370,7 @@ fn (p mut Parser) fn_decl() { //p.fgen_nl() } // Register ?option type for return value and args - if typ.starts_with('Option_') { + if typ.starts_with('Option_') { p.cgen.typedefs << 'typedef Option $typ;' } for arg in f.args { @@ -989,6 +990,12 @@ fn (p mut Parser) fn_call_args(f mut Fn) { if clone { p.gen('/*YY f=$f.name arg=$arg.name is_moved=$arg.is_moved*/string_clone(') } + + // x64 println gen + if p.pref.x64 && i == 0 && f.name == 'println' && p.tok == .str && p.peek() == .rpar { + p.x64.gen_print(p.lit) + } + mut typ := p.bool_expression() // Register an interface type usage: // fn run(r Animal) { ... } @@ -1008,6 +1015,8 @@ fn (p mut Parser) fn_call_args(f mut Fn) { if clone { p.gen(')') } + + // Optimize `println`: replace it with `printf` to avoid extra allocations and // function calls. // `println(777)` => `printf("%d\n", 777)` @@ -1021,6 +1030,7 @@ fn (p mut Parser) fn_call_args(f mut Fn) { if i == 0 && (f.name == 'println' || f.name == 'print') && !(typ in ['string', 'ustring', 'void' ]) { + // T := p.table.find_type(typ) $if !windows { $if !js { @@ -1543,7 +1553,7 @@ fn (p &Parser) fn_signature_v(f &Fn) string { if f.is_method { receiver_arg := f.args[0] receiver_type := receiver_arg.typ.trim('*') - f_name = f_name.all_after('${receiver_type}_') + f_name = f_name.all_after('${receiver_type}_') mut rcv_typ := receiver_arg.typ.replace('array_', '[]').replace('map_', 'map[string]') if receiver_arg.is_mut { rcv_typ = 'mut '+rcv_typ.trim('*') } else if rcv_typ.ends_with('*') || receiver_arg.ptr { rcv_typ = '&'+rcv_typ.trim_right('&*') } diff --git a/vlib/compiler/for.v b/vlib/compiler/for.v index c9b9848a79..24e807e120 100644 --- a/vlib/compiler/for.v +++ b/vlib/compiler/for.v @@ -11,6 +11,8 @@ fn (p mut Parser) for_st() { next_tok := p.peek() //debug := p.scanner.file_path.contains('r_draw') p.open_scope() + mut label := 0 + mut to := 0 if p.tok == .lcbr { // Infinite loop p.gen('while (1) {') @@ -130,9 +132,17 @@ fn (p mut Parser) for_st() { if is_range { p.check_types(typ, 'int') p.check_space(.dotdot) + if p.pref.x64 { + to = p.lit.int() + } range_typ, range_expr := p.tmp_expr() p.check_types(range_typ, 'int') range_end = range_expr + if p.pref.x64 { + label = p.x64.gen_loop_start(expr.int()) + //to = range_expr.int() // TODO why empty? + } + } is_arr := typ.contains('array') is_str := typ == 'string' @@ -189,5 +199,8 @@ fn (p mut Parser) for_st() { p.close_scope() p.for_expr_cnt-- p.returns = false // TODO handle loops that are guaranteed to return + if label > 0 { + p.x64.gen_loop_end(to, label) + } } diff --git a/vlib/compiler/gen_x64.v b/vlib/compiler/gen_x64.v new file mode 100644 index 0000000000..8638ea4830 --- /dev/null +++ b/vlib/compiler/gen_x64.v @@ -0,0 +1,25 @@ +module compiler + +import filepath + +//import compiler.x64 + +pub fn (v mut V) compile_x64() { + $if !linux { + println('v -x64 can only generate Linux binaries for now') + println('You are not on a Linux system, so you will not ' + + 'be able to run the resulting executable') + } + + v.files << v.v_files_from_dir(filepath.join(v.pref.vlib_path, 'builtin', 'bare')) + v.files << v.dir + v.x64.generate_elf_header() + for f in v.files { + v.parse(f, .decl) + } + for f in v.files { + v.parse(f, .main) + } + v.x64.generate_elf_footer() + +} diff --git a/vlib/compiler/main.v b/vlib/compiler/main.v index 1d93bff8ce..9e6eac667e 100644 --- a/vlib/compiler/main.v +++ b/vlib/compiler/main.v @@ -8,6 +8,7 @@ import ( os strings filepath + compiler.x64 ) pub const ( @@ -63,6 +64,7 @@ pub mut: dir string // directory (or file) being compiled (TODO rename to path?) table &Table // table with types, vars, functions etc cgen &CGen // C code generator + x64 &x64.Gen pref &Preferences // all the preferences and settings extracted to a struct for reusability lang_dir string // "~/code/v" out_name string // "program.exe" @@ -123,6 +125,7 @@ pub mut: vlib_path string vpath string + x64 bool } // Should be called by main at the end of the compilation process, to cleanup @@ -968,9 +971,12 @@ pub fn new_v(args[]string) &V { mut out_name_c := get_vtmp_filename(out_name, '.tmp.c') cflags := get_cmdline_cflags(args) - rdir := os.realpath(dir) rdir_name := os.filename(rdir) + + if '-bare' in args { + verror('use -freestanding instead of -bare') + } obfuscate := '-obf' in args is_repl := '-repl' in args @@ -998,7 +1004,8 @@ pub fn new_v(args[]string) &V { compress: '-compress' in args enable_globals: '--enable-globals' in args fast: '-fast' in args - is_bare: '-bare' in args + is_bare: '-freestanding' in args + x64: '-x64' in args is_repl: is_repl build_mode: build_mode cflags: cflags @@ -1028,6 +1035,7 @@ pub fn new_v(args[]string) &V { table: new_table(obfuscate) out_name_c: out_name_c cgen: new_cgen(out_name_c) + x64: x64.new_gen(out_name) vroot: vroot pref: pref mod: mod diff --git a/vlib/compiler/parser.v b/vlib/compiler/parser.v index 87916c8fc9..be6cbb0e3c 100644 --- a/vlib/compiler/parser.v +++ b/vlib/compiler/parser.v @@ -7,6 +7,7 @@ module compiler import ( os strings + compiler.x64 ) struct Parser { @@ -28,6 +29,7 @@ mut: prev_tok2 TokenKind // TODO remove these once the tokens are cached lit string cgen &CGen + x64 &x64.Gen table &Table import_table ImportTable // Holds imports for just the file being parsed pass Pass @@ -161,6 +163,7 @@ fn (v mut V) new_parser(scanner &Scanner) Parser { table: v.table cur_fn: EmptyFn cgen: v.cgen + x64: v.x64 pref: v.pref os: v.os vroot: v.vroot @@ -2967,3 +2970,7 @@ fn (p mut Parser) is_expr_fn_call(start_tok_idx int) (bool, string) { } return is_fn_call, expr } + +fn todo_remove() { + x64.new_gen('f') +} diff --git a/vlib/compiler/struct.v b/vlib/compiler/struct.v index c7b4a3b5a9..3d03c6ba90 100644 --- a/vlib/compiler/struct.v +++ b/vlib/compiler/struct.v @@ -299,8 +299,8 @@ fn (p mut Parser) struct_init(typ string) string { continue } field_typ := field.typ - if !p.builtin_mod && field_typ.ends_with('*') && field_typ.contains('Cfg') { - p.error('pointer field `${typ}.${field.name}` must be initialized') + if !p.builtin_mod && field_typ.ends_with('*') && p.mod != 'os' { //&& + p.warn('pointer field `${typ}.${field.name}` must be initialized') } // init map fields if field_typ.starts_with('map_') { diff --git a/vlib/compiler/x64/elf.v b/vlib/compiler/x64/elf.v new file mode 100644 index 0000000000..e79260a2dd --- /dev/null +++ b/vlib/compiler/x64/elf.v @@ -0,0 +1,96 @@ +// Copyright (c) 2019 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +module x64 + +import os + +const ( + mag0 = 0x7f + mag1 = `E` + mag2 = `L` + mag3 = `F` + ei_class = 4 + elfclass64 = 2 + elfdata2lsb = 1 + + ev_current = 1 + elf_osabi = 0 + + // ELF file types + et_rel = 1 + et_exec = 2 + et_dyn = 3 + + e_machine = 0x3e + + shn_xindex = 0xffff + + sht_null = 0 +) + +const ( + segment_start = 0x400000 +) + + +pub fn (g mut Gen) generate_elf_header() { + g.buf << [byte(mag0), mag1, mag2, mag3] + g.buf << elfclass64 // file class + g.buf << elfdata2lsb // data encoding + g.buf << ev_current // file version + g.buf << 1//elf_osabi + g.write64(0)//et_rel) // et_rel for .o + g.write16(2) // e_type + g.write16(e_machine) // + g.write32(ev_current) // e_version + eh_size := 0x40 + phent_size := 0x38 + g.write64(segment_start + eh_size + phent_size) // e_entry + g.write64(0x40) // e_phoff + g.write64(0) // e_shoff + g.write32(0) // e_flags + g.write16(eh_size) // e_ehsize + g.write16(phent_size) // e_phentsize + g.write16(1) // e_phnum + g.write16(0) // e_shentsize + g.write16(0) // e_shnum (number of sections) + g.write16(0) // e_shstrndx + // Elf64_Phdr + g.write32(1) // p_type + g.write32(5) // p_flags + g.write64(0) // p_offset + g.write64(segment_start) // p_vaddr addr:050 + g.write64(segment_start) // + g.file_size_pos = g.buf.len + g.write64(0) // p_filesz PLACEHOLDER, set to file_size later // addr: 060 + g.write64(0) // p_memsz + g.write64(0x1000) // p_align + // user code starts here at + // address: 00070 and a half +} + +pub fn (g mut Gen) generate_elf_footer() { + // Return 0 + g.mov(.edi, 0) // ret value + g.mov(.eax, 60) + g.syscall() + // Strings table + // Loop thru all strings and set the right addresses + for i, s in g.strings { + g.write64_at(segment_start + g.buf.len, int(g.str_pos[i])) + g.write_string(s) + g.write8(6) + } + // Now we know the file size, set it + file_size := g.buf.len + g.write64_at(file_size, g.file_size_pos) // set file size 64 bit value + g.write64_at(file_size, g.file_size_pos+8) + // Create the binary + f := os.create('out.bin') or { panic(err) } + f.write_bytes(g.buf.data, g.buf.len) + f.close() + println('x64 elf binary has been successfully generated') +} + diff --git a/vlib/compiler/x64/elf_obj.v b/vlib/compiler/x64/elf_obj.v new file mode 100644 index 0000000000..0491deeedb --- /dev/null +++ b/vlib/compiler/x64/elf_obj.v @@ -0,0 +1,161 @@ +// Copyright (c) 2019 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +module x64 + +/* +This file is unused right now, since binaries without sections +are generated. + +But it will be necessary once we have dynamic linking. +*/ + +enum SectionType { + null = 0 + progbits = 1 + symtab = 2 + strtab = 3 + rela = 4 +} + +struct SectionConfig { + name string + typ SectionType + flags i64 + data voidptr + is_saa bool + datalen i64 + link int + info int + align i64 + entsize i64 +} + +fn (g mut Gen) section_header(c SectionConfig) { + g.write32(g.sect_header_name_pos) + g.sect_header_name_pos += c.name.len + 1 + g.write32(int(c.typ)) + g.write64(c.flags) + g.write64(0) // sh_addr + g.write64(g.offset)// offset + g.offset += c.datalen+1 + g.write64(c.datalen) + g.write32(c.link) + g.write32(c.info) + g.write64(c.align) + g.write64(c.entsize) +} + + +fn genobj() { + /* + // SHN_UNDEF + mut g := Gen{} + nr_sections := 7 + g.section_header(SectionConfig{ + name: '' + typ: .null + flags:0 + data: 0 + is_saa: false + link: 0 + info:0 + align:0 + entsize: 0 + }) + + /* + for sect in sections { + g.section_header(SectionConfig{ + name:0 + typ: sect.typ + flags: sect.flags + data: sect.data + is_saa: true + datalen: sect.len + link: 0 + info: 0 + align: sect.align + entsize: sect.entsize + }) + + } + */ + + g.section_header(SectionConfig{ + name: '.DATA' + typ: .progbits + flags: 0x2 + //data: sect.data + is_saa: true + datalen: 0xd + link: 0 + info: 0 + align: 1 + entsize: 0 + }) + + g.section_header(SectionConfig{ + name: '.TEXT' + typ: .progbits + flags: 0x2 + //data: sect.data + is_saa: true + datalen: 0xd + link: 0 + info: 0 + align: 1 + entsize: 0 + }) + g.section_header(SectionConfig{ + name: '.shstrtab' + typ: .strtab + flags: 0x2 + //data: sect.data + is_saa: true + datalen: 0x22 + link: 0 + info: 0 + align: 1 + entsize: 0 + }) + g.section_header(SectionConfig{ + name: '.symtab' + typ: .symtab + flags: 0x2 + //data: sect.data + is_saa: true + datalen: 0xd + link: 0 + info: 0 + align: 1 + entsize: 0 + }) + g.section_header(SectionConfig{ + name: '.strtab' + typ: .symtab + flags: 0x2 + //data: sect.data + is_saa: true + datalen: 0xd + link: 0 + info: 0 + align: 1 + entsize: 0 + }) + g.section_header(SectionConfig{ + name: '.rela.TEXT' + typ: .rela + flags: 0x0 + //data: sect.data + is_saa: true + datalen: 0x18 + link: 4 + info: 2 + align: 8 + entsize: 0x18 + }) + */ +} + diff --git a/vlib/compiler/x64/gen.v b/vlib/compiler/x64/gen.v new file mode 100644 index 0000000000..602a01f95f --- /dev/null +++ b/vlib/compiler/x64/gen.v @@ -0,0 +1,199 @@ +// Copyright (c) 2019 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +module x64 + +pub struct Gen { + out_name string +mut: + buf []byte + sect_header_name_pos int + offset i64 + str_pos []i64 + strings []string // TODO use a map and don't duplicate strings + //str string + file_size_pos i64 + //string_addr map[string]i64 +} + +enum Register { + eax + edi + rax + rdi + rsi + edx + rdx + r12 +} + +enum Size { + _8 + _16 + _32 + _64 +} + +pub fn new_gen(out_name string) &Gen { + return &Gen{ + sect_header_name_pos : 0 + buf: [] + out_name: out_name + } +} + + +fn (g mut Gen) write8(n int) { + // write 1 byte + g.buf << byte(n) +} + +fn (g mut Gen) write16(n int) { + // write 2 bytes + g.buf << byte(n) + g.buf << byte(n >> 8) +} + +fn (g mut Gen) write32(n int) { + // write 4 bytes + g.buf << byte(n) + g.buf << byte(n >> 8) + g.buf << byte(n >> 16) + g.buf << byte(n >> 24) +} + +fn (g mut Gen) write64(n i64) { + // write 8 bytes + g.buf << byte(n) + g.buf << byte(n >> 8) + g.buf << byte(n >> 16) + g.buf << byte(n >> 24) + g.buf << byte(n >> 32) + g.buf << byte(n >> 40) + g.buf << byte(n >> 48) + g.buf << byte(n >> 56) +} + +fn (g mut Gen) write64_at(n i64, at i64) { + // write 8 bytes + g.buf[at] = byte(n) + g.buf[at+1] = byte(n >> 8) + g.buf[at+2] = byte(n >> 16) + g.buf[at+3] = byte(n >> 24) + g.buf[at+4] = byte(n >> 32) + g.buf[at+5] = byte(n >> 40) + g.buf[at+6] = byte(n >> 48) + g.buf[at+7] = byte(n >> 56) +} + +fn (g mut Gen) write_string(s string) { + for c in s { + g.write8(int(c)) + } +} + +fn (g mut Gen) inc(reg Register) { + g.write16(0xff49) + match reg { + .r12 { g.write8(0xc4) } + else { panic('unhandled inc $reg') } + } +} + +fn (g mut Gen) cmp(reg Register, size Size, val i64) { + g.write8(0x49) + // Second byte depends on the size of the value + match size { + ._8 { g.write8(0x83) } + ._32 { g.write8(0x81) } + else { panic('unhandled cmp') } + } + // Third byte depends on the register being compared to + match reg { + .r12 { g.write8(0xfc) } + else { panic('unhandled cmp') } + } + g.write8(int(val)) +} + +fn abs(a i64) i64 { return if a < 0 { -a } else { a } } + +fn (g mut Gen) jle(addr i64) { + offset := 0xff - int(abs(addr - g.buf.len))-1 + g.write8(0x7e) + g.write8(offset) +} + +fn (g mut Gen) mov64(reg Register, val i64) { + match reg { + .rsi { + g.write8(0x48) + g.write8(0xbe) + } + else { println('unhandled mov $reg') } + } + g.write64(val) +} + +fn (g mut Gen) call(val int) { + g.write8(0xe8) +} + +fn (g mut Gen) syscall() { + // g.write(0x050f) + g.write8(0x0f) + g.write8(0x05) +} + +fn (g mut Gen) ret() { + g.write8(0xc3) +} + +// returns label's relative address +pub fn (g mut Gen) gen_loop_start(from int) int { + g.mov(.r12, from) + label := g.buf.len + g.inc(.r12) + return label +} + +pub fn (g mut Gen) gen_loop_end(to int, label int) { + g.cmp(.r12, ._8, to) + g.jle(label) +} + +pub fn (g mut Gen) gen_print(s string) { + g.strings << s + '\n' + //g.string_addr[s] = str_pos + g.mov(.eax, 1) + g.mov(.edi, 1) + str_pos := g.buf.len + 2 + g.str_pos << str_pos + g.mov64(.rsi, 0) //segment_start + 0x9f) // str pos // PLACEHOLDER + g.mov(.edx, s.len+1) // len + g.syscall() +} + +fn (g mut Gen) mov(reg Register, val int) { + match reg { + .eax { g.write8(0xb8) } + .edi { g.write8(0xbf) } + .edx { g.write8(0xba) } + .rsi { + g.write8(0x48) + g.write8(0xbe) + } + .r12 { + g.write8(0x41) + g.write8(0xbc) // r11 is 0xbb etc + } + else { + panic('unhandled mov $reg') + } + + } + g.write32(val) +} + + diff --git a/vlib/os/os.v b/vlib/os/os.v index e9e18e1ecf..7959c18aa1 100644 --- a/vlib/os/os.v +++ b/vlib/os/os.v @@ -87,6 +87,27 @@ pub fn (f File) read_bytes_at(size, pos int) []byte { return arr } +pub fn read_bytes(path string) ?[]byte { + mut fp := vfopen(path, 'rb') + if isnil(fp) { + return error('failed to open file "$path"') + } + C.fseek(fp, 0, C.SEEK_END) + fsize := C.ftell(fp) + C.rewind(fp) + println('fsize=$fsize') + mut data := malloc(fsize) + C.fread(data, fsize, 1, fp) + mut res := [`0`].repeat(fsize) + for i in 0..fsize { + res[i] = data[i] + } + C.fclose(fp) + //res := []byte(data, 10) // TODO can't `return []byte(data)` + //println('res0 = ' + res[0].str()) + return res +} + // read_file reads the file in `path` and returns the contents. pub fn read_file(path string) ?string { mode := 'rb'