native: generate relocatable ELFs and support relative strings (#13671)

pull/13685/head
pancake 2022-03-07 17:24:01 +01:00 committed by GitHub
parent 0fd4c699dd
commit b20c911d3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 171 additions and 94 deletions

View File

@ -458,11 +458,9 @@ pub fn (mut g Gen) gen_loop_end(to int, label int) {
g.jl(label) g.jl(label)
} }
pub fn (mut g Gen) allocate_string(s string, opsize int) int { pub fn (mut g Gen) allocate_string(s string, opsize int, typ RelocType) int {
g.strings << s
str_pos := g.buf.len + opsize str_pos := g.buf.len + opsize
g.str_pos << str_pos g.strs << String{s, str_pos, typ}
g.strs << String{s, str_pos}
return str_pos return str_pos
} }
@ -507,7 +505,7 @@ pub fn (mut g Gen) inline_strlen(r Register) {
// TODO: strlen of string at runtime // TODO: strlen of string at runtime
pub fn (mut g Gen) gen_print_reg(r Register, n int, fd int) { pub fn (mut g Gen) gen_print_reg(r Register, n int, fd int) {
mystrlen := true mystrlen := true // if n < 0 maybe?
g.mov_reg(.rsi, r) g.mov_reg(.rsi, r)
if mystrlen { if mystrlen {
g.inline_strlen(.rsi) g.inline_strlen(.rsi)
@ -550,25 +548,21 @@ pub fn (mut g Gen) gen_print(s string, fd int) {
g.apicall('GetStdHandle') g.apicall('GetStdHandle')
g.mov_reg(.rcx, .rax) g.mov_reg(.rcx, .rax)
// g.mov64(.rdx, g.allocate_string(s, 3)) // g.mov64(.rdx, g.allocate_string(s, 3))
g.lea(.rdx, g.allocate_string(s, 3)) g.lea(.rdx, g.allocate_string(s, 3, .abs64))
g.mov(.r8, s.len) // string length g.mov(.r8, s.len) // string length
g.write([byte(0x4c), 0x8d, 0x4c, 0x24, 0x20]) // lea r9, [rsp+0x20] g.write([byte(0x4c), 0x8d, 0x4c, 0x24, 0x20]) // lea r9, [rsp+0x20]
g.write([byte(0x48), 0xc7, 0x44, 0x24, 0x20]) g.write([byte(0x48), 0xc7, 0x44, 0x24, 0x20])
g.write32(0) // mov qword[rsp+0x20], 0 g.write32(0) // mov qword[rsp+0x20], 0
// g.mov(.r9, rsp+0x20) // g.mov(.r9, rsp+0x20)
g.apicall('WriteFile') g.apicall('WriteFile')
return } else {
}
//
// qq := s + '\n'
//
g.mov(.eax, g.nsyscall_write()) g.mov(.eax, g.nsyscall_write())
g.mov(.edi, fd) g.mov(.edi, fd)
// segment_start + 0x9f) // str pos // placeholder g.learel(.rsi, g.allocate_string(s, 3, .rel32)) // for rsi its 2
g.mov64(.rsi, g.allocate_string(s, 2)) // for rsi its 2
g.mov(.edx, s.len) // len g.mov(.edx, s.len) // len
g.syscall() g.syscall()
} }
}
fn (mut g Gen) nsyscall_write() int { fn (mut g Gen) nsyscall_write() int {
match g.pref.os { match g.pref.os {
@ -638,6 +632,24 @@ pub fn (mut g Gen) gen_amd64_exit(expr ast.Expr) {
g.trap() // should never be reached, just in case g.trap() // should never be reached, just in case
} }
fn (mut g Gen) learel(reg Register, val int) {
g.write8(0x48)
g.write8(0x8d)
match reg {
.rax {
g.write8(0x05)
}
.rsi {
g.write8(0x35)
}
else {
g.n_error('learel must use rsi or rax')
}
}
g.write32(val)
g.println('lea $reg, rip + $val')
}
fn (mut g Gen) lea(reg Register, val int) { fn (mut g Gen) lea(reg Register, val int) {
g.write8(0x48) g.write8(0x48)
g.write8(0x8d) g.write8(0x8d)
@ -1109,7 +1121,7 @@ g.v_error('oops', node.pos)
pos += 8 pos += 8
} }
ast.StringLiteral { ast.StringLiteral {
g.mov64(.rsi, g.allocate_string('$e.val', 2)) // for rsi its 2 g.mov64(.rsi, g.allocate_string('$e.val', 2, .abs64)) // for rsi its 2
g.mov_reg_to_var(pos, .rsi) g.mov_reg_to_var(pos, .rsi)
pos += 8 pos += 8
} }
@ -1148,7 +1160,7 @@ g.v_error('oops', node.pos)
ast.StringLiteral { ast.StringLiteral {
dest := g.allocate_var(name, 4, 0) dest := g.allocate_var(name, 4, 0)
ie := node.right[i] as ast.StringLiteral ie := node.right[i] as ast.StringLiteral
g.mov64(.rsi, g.allocate_string(ie.str(), 2)) // for rsi its 2 g.mov64(.rsi, g.allocate_string(ie.str(), 2, .abs64)) // for rsi its 2
g.mov_reg_to_var(dest, .rsi) g.mov_reg_to_var(dest, .rsi)
} }
ast.CallExpr { ast.CallExpr {
@ -1254,6 +1266,10 @@ fn (mut g Gen) gen_asm_stmt_amd64(asm_node ast.AsmStmt) {
line += a.val.str() line += a.val.str()
imm = if a.val { 1 } else { 0 } imm = if a.val { 1 } else { 0 }
} }
ast.CharLiteral {
line += a.val.str()
imm = a.val.int()
}
/* /*
ast.AsmAddressing { ast.AsmAddressing {
} }
@ -1261,8 +1277,6 @@ fn (mut g Gen) gen_asm_stmt_amd64(asm_node ast.AsmStmt) {
} }
ast.AsmDisp { ast.AsmDisp {
} }
ast.CharLiteral {
}
ast.FloatLiteral { ast.FloatLiteral {
} }
*/ */
@ -1489,7 +1503,6 @@ fn (mut g Gen) for_stmt(node ast.ForStmt) {
return return
} }
infix_expr := node.cond as ast.InfixExpr infix_expr := node.cond as ast.InfixExpr
// g.mov(.eax, 0x77777777)
mut jump_addr := 0 // location of `jne *00 00 00 00*` mut jump_addr := 0 // location of `jne *00 00 00 00*`
start := g.pos() start := g.pos()
match mut infix_expr.left { match mut infix_expr.left {

View File

@ -4,58 +4,82 @@
module native module native
const ( const (
mag0 = byte(0x7f) elf_class32 = 1
mag1 = `E` elf_class64 = 2
mag2 = `L`
mag3 = `F`
ei_class = 4
elfclass64 = 2
elfdata2lsb = 1
ev_current = 1
)
// ELF file types elf_data_le = 1
const ( elf_data_be = 2
elf_osabi = 0
et_rel = 1 elf_version = 1
et_exec = 2 elf_abiversion = 0
et_dyn = 3
e_machine_amd64 = 0x3e elf_type_rel = 1
e_machine_arm64 = 0xb7 elf_type_exec = 2
shn_xindex = 0xffff elf_type_dyn = 3
sht_null = 0
elf_amd64 = 0x3e
elf_arm64 = 0xb7
elf_osabi_none = 0
elf_osabi_hpux = 1
elf_osabi_netbsd = 2
elf_osabi_linux = 3
elf_osabi_freebsd = 9
elf_header_size = 0x40
elf_phentry_size = 0x38
) )
const ( const (
segment_start = 0x400000 segment_start = 0x400000
placeholder = 0 placeholder = 0
sevens = 0x77777777
) )
pub fn (mut g Gen) generate_elf_header() { /*
g.buf << [byte(native.mag0), native.mag1, native.mag2, native.mag3] struct Header64 {
g.buf << native.elfclass64 // file class ident u8 // File identification.
g.buf << native.elfdata2lsb // data encoding @type u16 // File type.
g.buf << native.ev_current // file version machine u16 // Machine architecture.
g.buf << native.elf_osabi version u32 // ELF format version.
g.write64(0) // et_rel) // et_rel for .o entry u64 // Entry point.
g.write16(2) // e_type phoff u64 // Program header file offset.
if g.pref.arch == .arm64 { shoff u64 // Section header file offset.
g.write16(native.e_machine_arm64) flags u32 // Architecture-specific flags.
} else { ehsize u16 // Size of ELF header in bytes.
g.write16(native.e_machine_amd64) phentsize u16 // Size of program header entry.
phnum u16 // Number of program header entries.
shentsize u16 // Size of section header entry.
shnum u16 // Number of section header entries.
shstrndx u16 // Section name strings section.
} }
g.write32(native.ev_current) // e_version */
eh_size := 0x40
phent_size := 0x38 pub fn (mut g Gen) generate_elf_header() {
g.write64(native.segment_start + eh_size + phent_size) // e_entry elf_type := native.elf_type_dyn // PIE (use _exec for non-relocatable executables)
g.write64(0x40) // e_phoff
g.buf << '\x7fELF'.bytes()
g.buf << native.elf_class64
g.buf << native.elf_data_le
g.buf << native.elf_version
g.buf << native.elf_osabi_none
g.write64(0) // abiversion(1)+pad(6)+nident(1)
g.write16(elf_type)
if g.pref.arch == .arm64 {
g.write16(native.elf_arm64)
} else {
g.write16(native.elf_amd64)
}
g.write32(native.elf_version)
g.write64(native.segment_start + native.elf_header_size + native.elf_phentry_size) // e_entry
g.write64(native.elf_header_size) // e_phoff
g.write64(0) // e_shoff g.write64(0) // e_shoff
g.write32(0) // e_flags g.write32(0) // e_flags
g.write16(eh_size) // e_ehsize g.write16(native.elf_header_size)
g.write16(phent_size) // e_phentsize g.write16(native.elf_phentry_size)
g.write16(1) // e_phnum g.write16(1) // e_phnum
g.write16(0) // e_shentsize g.write16(0) // e_shentsize
// e_shnum := g.buf.len
g.write16(0) // e_shnum (number of sections) g.write16(0) // e_shnum (number of sections)
g.write16(0) // e_shstrndx g.write16(0) // e_shstrndx
// Elf64_Phdr // Elf64_Phdr
@ -68,6 +92,16 @@ pub fn (mut g Gen) generate_elf_header() {
g.write64(0) // p_filesz PLACEHOLDER, set to file_size later // addr: 060 g.write64(0) // p_filesz PLACEHOLDER, set to file_size later // addr: 060
g.write64(0) // p_memsz g.write64(0) // p_memsz
g.write64(0x1000) // p_align g.write64(0x1000) // p_align
// write sections
/*
sections := []string{}
g.write16_at(e_shnum, sections.len) // e_shnum (number of sections)
for section in sections {
// write section data
println('section $section')
}
*/
// user code starts here at // user code starts here at
// address: 00070 and a half // address: 00070 and a half
if g.pref.is_verbose { if g.pref.is_verbose {
@ -79,24 +113,30 @@ pub fn (mut g Gen) generate_elf_header() {
g.println('; call fn main') g.println('; call fn main')
} }
pub fn (mut g Gen) generate_elf_footer() { fn (mut g Gen) elf_string_table() {
// Return 0 for _, s in g.strs {
/* match s.typ {
g.mov(.edi, 0) // ret value .abs64 {
g.mov(.eax, 60) // g.write64_at(native.segment_start + g.buf.len, int(g.str_pos[i]))
g.syscall() g.write64_at(s.pos, g.buf.len)
*/
// Strings table
// Loop thru all strings and set the right addresses
for i, s in g.strings {
g.write64_at(native.segment_start + g.buf.len, int(g.str_pos[i]))
g.write_string(s)
g.write8(0)
} }
// Now we know the file size, set it .rel32 {
g.write32_at(s.pos, g.buf.len - s.pos - 4)
}
else {
g.n_error('unsupported string reloc type')
}
}
g.write_string(s.str)
}
}
pub fn (mut g Gen) generate_elf_footer() {
g.elf_string_table()
// file_size holds the address at the end of the code and const strings table
file_size := g.buf.len file_size := g.buf.len
g.write64_at(file_size, g.file_size_pos) // set file size 64 bit value g.write64_at(g.file_size_pos, file_size) // set file size 64 bit value
g.write64_at(file_size, g.file_size_pos + 8) g.write64_at(g.file_size_pos + 8, file_size)
if g.pref.arch == .arm64 { if g.pref.arch == .arm64 {
bl_next := u32(0x94000001) bl_next := u32(0x94000001)
g.write32_at(g.code_start_pos, int(bl_next)) g.write32_at(g.code_start_pos, int(bl_next))

View File

@ -31,9 +31,7 @@ mut:
buf []byte buf []byte
sect_header_name_pos int sect_header_name_pos int
offset i64 offset i64
str_pos []i64
stackframe_size int stackframe_size int
strings []string // TODO use a map and don't duplicate strings
file_size_pos i64 file_size_pos i64
main_fn_addr i64 main_fn_addr i64
code_start_pos i64 // location of the start of the assembly instructions code_start_pos i64 // location of the start of the assembly instructions
@ -50,9 +48,18 @@ mut:
strs []String strs []String
} }
enum RelocType {
rel8
rel16
rel32
rel64
abs64
}
struct String { struct String {
str string str string
pos int pos int
typ RelocType
} }
struct CallPatch { struct CallPatch {
@ -168,7 +175,10 @@ pub fn (mut g Gen) generate_footer() {
.raw { .raw {
g.create_executable() g.create_executable()
} }
else {} else {
eprintln('Unsupported target file format')
exit(1)
}
} }
} }
@ -224,7 +234,7 @@ fn (mut g Gen) write64(n i64) {
g.buf << byte(n >> 56) g.buf << byte(n >> 56)
} }
fn (mut g Gen) write64_at(n i64, at i64) { fn (mut g Gen) write64_at(at i64, n i64) {
// write 8 bytes // write 8 bytes
g.buf[at] = byte(n) g.buf[at] = byte(n)
g.buf[at + 1] = byte(n >> 8) g.buf[at + 1] = byte(n >> 8)
@ -244,11 +254,17 @@ fn (mut g Gen) write32_at(at i64, n int) {
g.buf[at + 3] = byte(n >> 24) g.buf[at + 3] = byte(n >> 24)
} }
fn (mut g Gen) write16_at(at i64, n int) {
// write 2 bytes
g.buf[at] = byte(n)
g.buf[at + 1] = byte(n >> 8)
}
fn (mut g Gen) write_string(s string) { fn (mut g Gen) write_string(s string) {
for c in s { for c in s {
g.write8(int(c)) g.write8(int(c))
} }
// g.write8(0) // null terminated strings g.zeroes(1)
} }
fn (mut g Gen) write_string_with_padding(s string, max int) { fn (mut g Gen) write_string_with_padding(s string, max int) {
@ -288,15 +304,16 @@ pub fn (mut g Gen) gen_print_from_expr(expr ast.Expr, name string) {
g.gen_print_reg(.rax, 3, fd) g.gen_print_reg(.rax, 3, fd)
} }
ast.IntegerLiteral { ast.IntegerLiteral {
g.mov64(.rax, g.allocate_string('$expr.val\n', 2)) g.learel(.rax, g.allocate_string('$expr.val\n', 3, .rel32))
g.gen_print_reg(.rax, 3, fd) g.gen_print_reg(.rax, 3, fd)
} }
ast.BoolLiteral { ast.BoolLiteral {
// register 'true' and 'false' strings // g.expr(expr) // register 'true' and 'false' strings // g.expr(expr)
// XXX mov64 shuoldnt be used for addressing
if expr.val { if expr.val {
g.mov64(.rax, g.allocate_string('true', 2)) g.learel(.rax, g.allocate_string('true', 3, .rel32))
} else { } else {
g.mov64(.rax, g.allocate_string('false', 2)) g.learel(.rax, g.allocate_string('false', 3, .rel32))
} }
g.gen_print_reg(.rax, 3, fd) g.gen_print_reg(.rax, 3, fd)
} }
@ -310,7 +327,7 @@ pub fn (mut g Gen) gen_print_from_expr(expr ast.Expr, name string) {
mut off := 0 mut off := 0
for f in s.fields { for f in s.fields {
if f.name == field_name { if f.name == field_name {
g.mov64(.rax, g.allocate_string('$off\n', 2)) g.learel(.rax, g.allocate_string('$off\n', 3, .rel32))
g.gen_print_reg(.rax, 3, fd) g.gen_print_reg(.rax, 3, fd)
break break
} }
@ -591,7 +608,7 @@ fn (mut g Gen) stmt(node ast.Stmt) {
ast.StringLiteral { ast.StringLiteral {
s = e0.val.str() s = e0.val.str()
g.expr(node.exprs[0]) g.expr(node.exprs[0])
g.mov64(.rax, g.allocate_string(s, 2)) g.mov64(.rax, g.allocate_string(s, 2, .abs64))
} }
ast.Ident { ast.Ident {
g.expr(e0) g.expr(e0)
@ -643,7 +660,7 @@ fn (mut g Gen) gen_syscall(node ast.CallExpr) {
match expr.expr { match expr.expr {
ast.StringLiteral { ast.StringLiteral {
s := expr.expr.val.replace('\\n', '\n') s := expr.expr.val.replace('\\n', '\n')
g.allocate_string(s, 2) g.allocate_string(s, 2, .abs64)
g.mov64(ra[i], 1) g.mov64(ra[i], 1)
done = true done = true
} }
@ -660,7 +677,7 @@ fn (mut g Gen) gen_syscall(node ast.CallExpr) {
node.pos) node.pos)
} }
s := expr.val.replace('\\n', '\n') s := expr.val.replace('\\n', '\n')
g.allocate_string(s, 2) g.allocate_string(s, 2, .abs64)
g.mov64(ra[i], 1) g.mov64(ra[i], 1)
} }
else { else {

View File

@ -395,16 +395,23 @@ fn (mut g Gen) write_symbol(s Symbol) {
fn (mut g Gen) sym_string_table() int { fn (mut g Gen) sym_string_table() int {
begin := g.buf.len begin := g.buf.len
g.zeroes(1) g.zeroes(1)
for i, s in g.strings { for _, s in g.strs {
pos := g.buf.len - int(g.str_pos[i]) pos := g.buf.len - s.pos - 4
match s.typ {
.rel32 {
g.write32_at(s.pos, pos)
}
else {
if g.pref.os == .windows { if g.pref.os == .windows {
g.write32_at(int(g.str_pos[i]), pos - 4) // 0x402028 + pos) // that should be .rel32, not windows-specific
g.write32_at(s.pos, pos)
} else { } else {
baddr := i64(0x100000000) baddr := i64(0x100000000)
g.write64_at(g.buf.len + baddr, int(g.str_pos[i])) g.write64_at(s.pos, g.buf.len + baddr)
} }
g.write_string(s) }
g.write8(0) }
g.write_string(s.str)
} }
return g.buf.len - begin return g.buf.len - begin
} }