x64 machine code generation (ELF)

pull/2845/head
Alexander Medvednikov 2019-11-19 09:53:52 +03:00
parent ab91733a28
commit 9712213f50
13 changed files with 564 additions and 15 deletions

View File

@ -0,0 +1,6 @@
fn main() {
for _ in 0..5 {
println('Hello world from V x64 machine code generator!')
}
println('Hello again!')
}

11
v.v
View File

@ -95,31 +95,30 @@ fn main() {
//println('unknown command/argument\n')
//println(compiler.help_text)
}
// Construct the V object from command line arguments
mut v := compiler.new_v(args)
if v.pref.is_verbose {
println(args)
}
if 'run' in args {
// always recompile for now, too error prone to skip recompilation otherwise
// for example for -repl usage, especially when piping lines to v
v.compile()
v.run_compiled_executable_and_exit()
}
mut tmark := benchmark.new_benchmark()
v.compile()
if v.pref.x64 {
v.compile_x64()
} else {
v.compile()
}
if v.pref.is_stats {
tmark.stop()
println( 'compilation took: ' + tmark.total_duration().str() + 'ms')
}
if v.pref.is_test {
v.run_compiled_executable_and_exit()
}
v.finalize_compilation()
}

View File

@ -53,6 +53,10 @@ pub fn tos3(s *C.char) string {
}
}
pub fn println(s string) {
}
/*
pub fn (a string) clone() string {
mut b := string {

View File

@ -283,9 +283,10 @@ fn (p mut Parser) fn_decl() {
// C function header def? (fn C.NSMakeRect(int,int,int,int))
is_c := f.name == 'C' && p.tok == .dot
// Just fn signature? only builtin.v + default build mode
if p.is_vh {
//println('\n\nfn_decl() name=$f.name receiver_typ=$receiver_typ nogen=$p.cgen.nogen')
}
//if p.is_vh {
//if f.name == 'main' {
//println('\n\nfn_decl() name=$f.name pass=$p.pass $p.file_name receiver_typ=$receiver_typ nogen=$p.cgen.nogen')
//}
if is_c {
p.check(.dot)
f.name = p.check_name()
@ -369,7 +370,7 @@ fn (p mut Parser) fn_decl() {
//p.fgen_nl()
}
// Register ?option type for return value and args
if typ.starts_with('Option_') {
if typ.starts_with('Option_') {
p.cgen.typedefs << 'typedef Option $typ;'
}
for arg in f.args {
@ -989,6 +990,12 @@ fn (p mut Parser) fn_call_args(f mut Fn) {
if clone {
p.gen('/*YY f=$f.name arg=$arg.name is_moved=$arg.is_moved*/string_clone(')
}
// x64 println gen
if p.pref.x64 && i == 0 && f.name == 'println' && p.tok == .str && p.peek() == .rpar {
p.x64.gen_print(p.lit)
}
mut typ := p.bool_expression()
// Register an interface type usage:
// fn run(r Animal) { ... }
@ -1008,6 +1015,8 @@ fn (p mut Parser) fn_call_args(f mut Fn) {
if clone {
p.gen(')')
}
// Optimize `println`: replace it with `printf` to avoid extra allocations and
// function calls.
// `println(777)` => `printf("%d\n", 777)`
@ -1021,6 +1030,7 @@ fn (p mut Parser) fn_call_args(f mut Fn) {
if i == 0 && (f.name == 'println' || f.name == 'print') &&
!(typ in ['string', 'ustring', 'void' ])
{
//
T := p.table.find_type(typ)
$if !windows {
$if !js {
@ -1543,7 +1553,7 @@ fn (p &Parser) fn_signature_v(f &Fn) string {
if f.is_method {
receiver_arg := f.args[0]
receiver_type := receiver_arg.typ.trim('*')
f_name = f_name.all_after('${receiver_type}_')
f_name = f_name.all_after('${receiver_type}_')
mut rcv_typ := receiver_arg.typ.replace('array_', '[]').replace('map_', 'map[string]')
if receiver_arg.is_mut { rcv_typ = 'mut '+rcv_typ.trim('*') }
else if rcv_typ.ends_with('*') || receiver_arg.ptr { rcv_typ = '&'+rcv_typ.trim_right('&*') }

View File

@ -11,6 +11,8 @@ fn (p mut Parser) for_st() {
next_tok := p.peek()
//debug := p.scanner.file_path.contains('r_draw')
p.open_scope()
mut label := 0
mut to := 0
if p.tok == .lcbr {
// Infinite loop
p.gen('while (1) {')
@ -130,9 +132,17 @@ fn (p mut Parser) for_st() {
if is_range {
p.check_types(typ, 'int')
p.check_space(.dotdot)
if p.pref.x64 {
to = p.lit.int()
}
range_typ, range_expr := p.tmp_expr()
p.check_types(range_typ, 'int')
range_end = range_expr
if p.pref.x64 {
label = p.x64.gen_loop_start(expr.int())
//to = range_expr.int() // TODO why empty?
}
}
is_arr := typ.contains('array')
is_str := typ == 'string'
@ -189,5 +199,8 @@ fn (p mut Parser) for_st() {
p.close_scope()
p.for_expr_cnt--
p.returns = false // TODO handle loops that are guaranteed to return
if label > 0 {
p.x64.gen_loop_end(to, label)
}
}

View File

@ -0,0 +1,25 @@
module compiler
import filepath
//import compiler.x64
pub fn (v mut V) compile_x64() {
$if !linux {
println('v -x64 can only generate Linux binaries for now')
println('You are not on a Linux system, so you will not ' +
'be able to run the resulting executable')
}
v.files << v.v_files_from_dir(filepath.join(v.pref.vlib_path, 'builtin', 'bare'))
v.files << v.dir
v.x64.generate_elf_header()
for f in v.files {
v.parse(f, .decl)
}
for f in v.files {
v.parse(f, .main)
}
v.x64.generate_elf_footer()
}

View File

@ -8,6 +8,7 @@ import (
os
strings
filepath
compiler.x64
)
pub const (
@ -63,6 +64,7 @@ pub mut:
dir string // directory (or file) being compiled (TODO rename to path?)
table &Table // table with types, vars, functions etc
cgen &CGen // C code generator
x64 &x64.Gen
pref &Preferences // all the preferences and settings extracted to a struct for reusability
lang_dir string // "~/code/v"
out_name string // "program.exe"
@ -123,6 +125,7 @@ pub mut:
vlib_path string
vpath string
x64 bool
}
// Should be called by main at the end of the compilation process, to cleanup
@ -968,9 +971,12 @@ pub fn new_v(args[]string) &V {
mut out_name_c := get_vtmp_filename(out_name, '.tmp.c')
cflags := get_cmdline_cflags(args)
rdir := os.realpath(dir)
rdir_name := os.filename(rdir)
if '-bare' in args {
verror('use -freestanding instead of -bare')
}
obfuscate := '-obf' in args
is_repl := '-repl' in args
@ -998,7 +1004,8 @@ pub fn new_v(args[]string) &V {
compress: '-compress' in args
enable_globals: '--enable-globals' in args
fast: '-fast' in args
is_bare: '-bare' in args
is_bare: '-freestanding' in args
x64: '-x64' in args
is_repl: is_repl
build_mode: build_mode
cflags: cflags
@ -1028,6 +1035,7 @@ pub fn new_v(args[]string) &V {
table: new_table(obfuscate)
out_name_c: out_name_c
cgen: new_cgen(out_name_c)
x64: x64.new_gen(out_name)
vroot: vroot
pref: pref
mod: mod

View File

@ -7,6 +7,7 @@ module compiler
import (
os
strings
compiler.x64
)
struct Parser {
@ -28,6 +29,7 @@ mut:
prev_tok2 TokenKind // TODO remove these once the tokens are cached
lit string
cgen &CGen
x64 &x64.Gen
table &Table
import_table ImportTable // Holds imports for just the file being parsed
pass Pass
@ -161,6 +163,7 @@ fn (v mut V) new_parser(scanner &Scanner) Parser {
table: v.table
cur_fn: EmptyFn
cgen: v.cgen
x64: v.x64
pref: v.pref
os: v.os
vroot: v.vroot
@ -2967,3 +2970,7 @@ fn (p mut Parser) is_expr_fn_call(start_tok_idx int) (bool, string) {
}
return is_fn_call, expr
}
fn todo_remove() {
x64.new_gen('f')
}

View File

@ -299,8 +299,8 @@ fn (p mut Parser) struct_init(typ string) string {
continue
}
field_typ := field.typ
if !p.builtin_mod && field_typ.ends_with('*') && field_typ.contains('Cfg') {
p.error('pointer field `${typ}.${field.name}` must be initialized')
if !p.builtin_mod && field_typ.ends_with('*') && p.mod != 'os' { //&&
p.warn('pointer field `${typ}.${field.name}` must be initialized')
}
// init map fields
if field_typ.starts_with('map_') {

View File

@ -0,0 +1,96 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module x64
import os
const (
mag0 = 0x7f
mag1 = `E`
mag2 = `L`
mag3 = `F`
ei_class = 4
elfclass64 = 2
elfdata2lsb = 1
ev_current = 1
elf_osabi = 0
// ELF file types
et_rel = 1
et_exec = 2
et_dyn = 3
e_machine = 0x3e
shn_xindex = 0xffff
sht_null = 0
)
const (
segment_start = 0x400000
)
pub fn (g mut Gen) generate_elf_header() {
g.buf << [byte(mag0), mag1, mag2, mag3]
g.buf << elfclass64 // file class
g.buf << elfdata2lsb // data encoding
g.buf << ev_current // file version
g.buf << 1//elf_osabi
g.write64(0)//et_rel) // et_rel for .o
g.write16(2) // e_type
g.write16(e_machine) //
g.write32(ev_current) // e_version
eh_size := 0x40
phent_size := 0x38
g.write64(segment_start + eh_size + phent_size) // e_entry
g.write64(0x40) // e_phoff
g.write64(0) // e_shoff
g.write32(0) // e_flags
g.write16(eh_size) // e_ehsize
g.write16(phent_size) // e_phentsize
g.write16(1) // e_phnum
g.write16(0) // e_shentsize
g.write16(0) // e_shnum (number of sections)
g.write16(0) // e_shstrndx
// Elf64_Phdr
g.write32(1) // p_type
g.write32(5) // p_flags
g.write64(0) // p_offset
g.write64(segment_start) // p_vaddr addr:050
g.write64(segment_start) //
g.file_size_pos = g.buf.len
g.write64(0) // p_filesz PLACEHOLDER, set to file_size later // addr: 060
g.write64(0) // p_memsz
g.write64(0x1000) // p_align
// user code starts here at
// address: 00070 and a half
}
pub fn (g mut Gen) generate_elf_footer() {
// Return 0
g.mov(.edi, 0) // ret value
g.mov(.eax, 60)
g.syscall()
// Strings table
// Loop thru all strings and set the right addresses
for i, s in g.strings {
g.write64_at(segment_start + g.buf.len, int(g.str_pos[i]))
g.write_string(s)
g.write8(6)
}
// Now we know the file size, set it
file_size := g.buf.len
g.write64_at(file_size, g.file_size_pos) // set file size 64 bit value
g.write64_at(file_size, g.file_size_pos+8)
// Create the binary
f := os.create('out.bin') or { panic(err) }
f.write_bytes(g.buf.data, g.buf.len)
f.close()
println('x64 elf binary has been successfully generated')
}

View File

@ -0,0 +1,161 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module x64
/*
This file is unused right now, since binaries without sections
are generated.
But it will be necessary once we have dynamic linking.
*/
enum SectionType {
null = 0
progbits = 1
symtab = 2
strtab = 3
rela = 4
}
struct SectionConfig {
name string
typ SectionType
flags i64
data voidptr
is_saa bool
datalen i64
link int
info int
align i64
entsize i64
}
fn (g mut Gen) section_header(c SectionConfig) {
g.write32(g.sect_header_name_pos)
g.sect_header_name_pos += c.name.len + 1
g.write32(int(c.typ))
g.write64(c.flags)
g.write64(0) // sh_addr
g.write64(g.offset)// offset
g.offset += c.datalen+1
g.write64(c.datalen)
g.write32(c.link)
g.write32(c.info)
g.write64(c.align)
g.write64(c.entsize)
}
fn genobj() {
/*
// SHN_UNDEF
mut g := Gen{}
nr_sections := 7
g.section_header(SectionConfig{
name: ''
typ: .null
flags:0
data: 0
is_saa: false
link: 0
info:0
align:0
entsize: 0
})
/*
for sect in sections {
g.section_header(SectionConfig{
name:0
typ: sect.typ
flags: sect.flags
data: sect.data
is_saa: true
datalen: sect.len
link: 0
info: 0
align: sect.align
entsize: sect.entsize
})
}
*/
g.section_header(SectionConfig{
name: '.DATA'
typ: .progbits
flags: 0x2
//data: sect.data
is_saa: true
datalen: 0xd
link: 0
info: 0
align: 1
entsize: 0
})
g.section_header(SectionConfig{
name: '.TEXT'
typ: .progbits
flags: 0x2
//data: sect.data
is_saa: true
datalen: 0xd
link: 0
info: 0
align: 1
entsize: 0
})
g.section_header(SectionConfig{
name: '.shstrtab'
typ: .strtab
flags: 0x2
//data: sect.data
is_saa: true
datalen: 0x22
link: 0
info: 0
align: 1
entsize: 0
})
g.section_header(SectionConfig{
name: '.symtab'
typ: .symtab
flags: 0x2
//data: sect.data
is_saa: true
datalen: 0xd
link: 0
info: 0
align: 1
entsize: 0
})
g.section_header(SectionConfig{
name: '.strtab'
typ: .symtab
flags: 0x2
//data: sect.data
is_saa: true
datalen: 0xd
link: 0
info: 0
align: 1
entsize: 0
})
g.section_header(SectionConfig{
name: '.rela.TEXT'
typ: .rela
flags: 0x0
//data: sect.data
is_saa: true
datalen: 0x18
link: 4
info: 2
align: 8
entsize: 0x18
})
*/
}

View File

@ -0,0 +1,199 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module x64
pub struct Gen {
out_name string
mut:
buf []byte
sect_header_name_pos int
offset i64
str_pos []i64
strings []string // TODO use a map and don't duplicate strings
//str string
file_size_pos i64
//string_addr map[string]i64
}
enum Register {
eax
edi
rax
rdi
rsi
edx
rdx
r12
}
enum Size {
_8
_16
_32
_64
}
pub fn new_gen(out_name string) &Gen {
return &Gen{
sect_header_name_pos : 0
buf: []
out_name: out_name
}
}
fn (g mut Gen) write8(n int) {
// write 1 byte
g.buf << byte(n)
}
fn (g mut Gen) write16(n int) {
// write 2 bytes
g.buf << byte(n)
g.buf << byte(n >> 8)
}
fn (g mut Gen) write32(n int) {
// write 4 bytes
g.buf << byte(n)
g.buf << byte(n >> 8)
g.buf << byte(n >> 16)
g.buf << byte(n >> 24)
}
fn (g mut Gen) write64(n i64) {
// write 8 bytes
g.buf << byte(n)
g.buf << byte(n >> 8)
g.buf << byte(n >> 16)
g.buf << byte(n >> 24)
g.buf << byte(n >> 32)
g.buf << byte(n >> 40)
g.buf << byte(n >> 48)
g.buf << byte(n >> 56)
}
fn (g mut Gen) write64_at(n i64, at i64) {
// write 8 bytes
g.buf[at] = byte(n)
g.buf[at+1] = byte(n >> 8)
g.buf[at+2] = byte(n >> 16)
g.buf[at+3] = byte(n >> 24)
g.buf[at+4] = byte(n >> 32)
g.buf[at+5] = byte(n >> 40)
g.buf[at+6] = byte(n >> 48)
g.buf[at+7] = byte(n >> 56)
}
fn (g mut Gen) write_string(s string) {
for c in s {
g.write8(int(c))
}
}
fn (g mut Gen) inc(reg Register) {
g.write16(0xff49)
match reg {
.r12 { g.write8(0xc4) }
else { panic('unhandled inc $reg') }
}
}
fn (g mut Gen) cmp(reg Register, size Size, val i64) {
g.write8(0x49)
// Second byte depends on the size of the value
match size {
._8 { g.write8(0x83) }
._32 { g.write8(0x81) }
else { panic('unhandled cmp') }
}
// Third byte depends on the register being compared to
match reg {
.r12 { g.write8(0xfc) }
else { panic('unhandled cmp') }
}
g.write8(int(val))
}
fn abs(a i64) i64 { return if a < 0 { -a } else { a } }
fn (g mut Gen) jle(addr i64) {
offset := 0xff - int(abs(addr - g.buf.len))-1
g.write8(0x7e)
g.write8(offset)
}
fn (g mut Gen) mov64(reg Register, val i64) {
match reg {
.rsi {
g.write8(0x48)
g.write8(0xbe)
}
else { println('unhandled mov $reg') }
}
g.write64(val)
}
fn (g mut Gen) call(val int) {
g.write8(0xe8)
}
fn (g mut Gen) syscall() {
// g.write(0x050f)
g.write8(0x0f)
g.write8(0x05)
}
fn (g mut Gen) ret() {
g.write8(0xc3)
}
// returns label's relative address
pub fn (g mut Gen) gen_loop_start(from int) int {
g.mov(.r12, from)
label := g.buf.len
g.inc(.r12)
return label
}
pub fn (g mut Gen) gen_loop_end(to int, label int) {
g.cmp(.r12, ._8, to)
g.jle(label)
}
pub fn (g mut Gen) gen_print(s string) {
g.strings << s + '\n'
//g.string_addr[s] = str_pos
g.mov(.eax, 1)
g.mov(.edi, 1)
str_pos := g.buf.len + 2
g.str_pos << str_pos
g.mov64(.rsi, 0) //segment_start + 0x9f) // str pos // PLACEHOLDER
g.mov(.edx, s.len+1) // len
g.syscall()
}
fn (g mut Gen) mov(reg Register, val int) {
match reg {
.eax { g.write8(0xb8) }
.edi { g.write8(0xbf) }
.edx { g.write8(0xba) }
.rsi {
g.write8(0x48)
g.write8(0xbe)
}
.r12 {
g.write8(0x41)
g.write8(0xbc) // r11 is 0xbb etc
}
else {
panic('unhandled mov $reg')
}
}
g.write32(val)
}

View File

@ -87,6 +87,27 @@ pub fn (f File) read_bytes_at(size, pos int) []byte {
return arr
}
pub fn read_bytes(path string) ?[]byte {
mut fp := vfopen(path, 'rb')
if isnil(fp) {
return error('failed to open file "$path"')
}
C.fseek(fp, 0, C.SEEK_END)
fsize := C.ftell(fp)
C.rewind(fp)
println('fsize=$fsize')
mut data := malloc(fsize)
C.fread(data, fsize, 1, fp)
mut res := [`0`].repeat(fsize)
for i in 0..fsize {
res[i] = data[i]
}
C.fclose(fp)
//res := []byte(data, 10) // TODO can't `return []byte(data)`
//println('res0 = ' + res[0].str())
return res
}
// read_file reads the file in `path` and returns the contents.
pub fn read_file(path string) ?string {
mode := 'rb'