cgen: support closures with any number of parameters of any size on amd64 (#12891)

pull/12895/head
spaceface 2021-12-18 16:33:24 +01:00 committed by GitHub
parent 3a504480d9
commit de92f819f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 189 additions and 28 deletions

View File

@ -109,6 +109,7 @@ const (
'vlib/orm/orm_test.v',
'vlib/v/tests/orm_sub_struct_test.v',
'vlib/v/tests/closure_test.v',
'vlib/v/tests/closure_generator_test.v',
'vlib/net/websocket/ws_test.v',
'vlib/net/unix/unix_test.v',
'vlib/net/websocket/websocket_test.v',
@ -132,6 +133,7 @@ const (
'do_not_remove',
]
skip_on_arm64 = [
'vlib/v/tests/closure_generator_test.v',
'do_not_remove',
]
skip_on_non_amd64_or_arm64 = [

View File

@ -76,6 +76,49 @@ fn arm32_bytes(nargs int) string {
return bytes.replace('<REG>', nargs.str())
}
// gen_amd64_bytecode generates the amd64 bytecode a closure with `nargs` parameters.
// NB: `nargs` includes the last `userdata` parameter that will be passed to the original
// function, and as such nargs must always be > 0
fn amd64_bytes(nargs int) string {
match nargs {
1 {
return '0x48, 0x8b, 0x3d, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
}
2 {
return '0x48, 0x8b, 0x35, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
}
3 {
return '0x48, 0x8b, 0x15, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
}
4 {
return '0x48, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
}
5 {
return '0x4C, 0x8b, 0x05, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
}
6 {
return '0x4C, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
}
else {
// see https://godbolt.org/z/64e5TEf5n for similar assembly
mut sb := strings.new_builder(256)
s := (((byte(nargs) & 1) + 1) << 3).hex()
sb.write_string('0x48, 0x83, 0xec, 0x$s, ') // sub rsp,0x8 <OR> sub rsp,0x10
sb.write_string('0xff, 0x35, 0xe6, 0xff, 0xff, 0xff, ') // push QWORD PTR [rip+0xffffffffffffffe6]
rsp_offset := byte(0x18 + ((byte(nargs - 7) >> 1) << 4)).hex()
for _ in 0 .. nargs - 7 {
sb.write_string('0xff, 0xb4, 0x24, 0x$rsp_offset, 0x00, 0x00, 0x00, ') // push QWORD PTR [rsp+$rsp_offset]
}
sb.write_string('0xff, 0x15, 0x${byte(256 - sb.len / 6 - 6 - 8).hex()}, 0xff, 0xff, 0xff, ') // call QWORD PTR [rip+OFFSET]
sb.write_string('0x48, 0x81, 0xc4, 0x$rsp_offset, 0x00, 0x00, 0x00, ') // add rsp,$rsp_offset
sb.write_string('0xc3') // ret
return sb.str()
}
}
}
// Heavily based on Chris Wellons's work
// https://nullprogram.com/blog/2017/01/08/
@ -90,33 +133,50 @@ fn c_closure_helpers(pref &pref.Preferences) string {
if pref.os != .windows {
builder.writeln('#include <sys/mman.h>')
}
// TODO: support additional arguments by pushing them onto the stack
// https://en.wikipedia.org/wiki/Calling_convention
if pref.arch == .amd64 {
// TODO: the `amd64_bytes()` function above should work for an arbitrary* number of arguments,
// so we should just remove the table and call the function directly at runtime
builder.write_string('
static unsigned char __closure_thunk[6][13] = {
{
0x48, 0x8b, 0x3d, 0xe9, 0xff, 0xff, 0xff,
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
}, {
0x48, 0x8b, 0x35, 0xe9, 0xff, 0xff, 0xff,
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
}, {
0x48, 0x8b, 0x15, 0xe9, 0xff, 0xff, 0xff,
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
}, {
0x48, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff,
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
}, {
0x4C, 0x8b, 0x05, 0xe9, 0xff, 0xff, 0xff,
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
}, {
0x4C, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff,
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
},
static unsigned char __closure_thunk[32][${amd64_bytes(31).len / 6 +
2}] = {
{ ${amd64_bytes(1)} },
{ ${amd64_bytes(2)} },
{ ${amd64_bytes(3)} },
{ ${amd64_bytes(4)} },
{ ${amd64_bytes(5)} },
{ ${amd64_bytes(6)} },
{ ${amd64_bytes(7)} },
{ ${amd64_bytes(8)} },
{ ${amd64_bytes(9)} },
{ ${amd64_bytes(10)} },
{ ${amd64_bytes(11)} },
{ ${amd64_bytes(12)} },
{ ${amd64_bytes(13)} },
{ ${amd64_bytes(14)} },
{ ${amd64_bytes(15)} },
{ ${amd64_bytes(16)} },
{ ${amd64_bytes(17)} },
{ ${amd64_bytes(18)} },
{ ${amd64_bytes(19)} },
{ ${amd64_bytes(20)} },
{ ${amd64_bytes(21)} },
{ ${amd64_bytes(22)} },
{ ${amd64_bytes(23)} },
{ ${amd64_bytes(24)} },
{ ${amd64_bytes(25)} },
{ ${amd64_bytes(26)} },
{ ${amd64_bytes(27)} },
{ ${amd64_bytes(28)} },
{ ${amd64_bytes(29)} },
{ ${amd64_bytes(30)} },
{ ${amd64_bytes(31)} },
};
')
} else if pref.arch == .arm64 {
builder.write_string('
static unsigned char __closure_thunk[6][12] = {
static unsigned char __closure_thunk[8][12] = {
{
${arm64_bytes(0)}
}, {
@ -129,12 +189,16 @@ static unsigned char __closure_thunk[6][12] = {
${arm64_bytes(4)}
}, {
${arm64_bytes(5)}
}, {
${arm64_bytes(6)}
}, {
${arm64_bytes(7)}
},
};
')
} else if pref.arch == .arm32 {
builder.write_string('
static unsigned char __closure_thunk[6][12] = {
static unsigned char __closure_thunk[4][12] = {
{
${arm32_bytes(0)}
}, {
@ -143,10 +207,6 @@ static unsigned char __closure_thunk[6][12] = {
${arm32_bytes(2)}
}, {
${arm32_bytes(3)}
}, {
${arm32_bytes(4)}
}, {
${arm32_bytes(5)}
},
};
')
@ -161,6 +221,14 @@ static void __closure_set_function(void *closure, void *f) {
void **p = closure;
p[-1] = f;
}
static inline int __closure_check_nargs(int nargs) {
if (nargs > (int)_ARR_LEN(__closure_thunk)) {
_v_panic(_SLIT("Closure too large. Reduce the number of parameters, or pass the parameters by reference."));
VUNREACHABLE();
}
return nargs;
}
')
if pref.os != .windows {
builder.write_string('
@ -260,6 +328,9 @@ const c_common_macros = '
#define __offsetof(PTYPE,FIELDNAME) ((size_t)((char *)&((PTYPE *)0)->FIELDNAME - (char *)0))
#endif
// returns the number of CPU registers that TYPE takes up
#define _REG_WIDTH(T) (((sizeof(T) + sizeof(void*) - 1) & ~(sizeof(void*) - 1)) / sizeof(void*))
#define OPTION_CAST(x) (x)
#ifndef V64_PRINTFORMAT

View File

@ -452,8 +452,20 @@ fn (mut g Gen) gen_anon_fn(mut node ast.AnonFn) {
// it may be possible to optimize `memdup` out if the closure never leaves current scope
ctx_struct := closure_ctx_struct(node.decl)
// TODO in case of an assignment, this should only call "__closure_set_data" and "__closure_set_function" (and free the former data)
g.write('__closure_create($node.decl.name, ${node.decl.params.len + 1}, ')
g.writeln('($ctx_struct*) memdup(&($ctx_struct){')
mut size_sb := strings.new_builder(node.decl.params.len * 50)
for param in node.decl.params {
size_sb.write_string('_REG_WIDTH(${g.typ(param.typ)}) + ')
}
size_sb.write_string('1')
args_size := size_sb.str()
g.writeln('')
// ensure that nargs maps to a known entry in the __closure_thunk array
// TODO make it a compile-time error (you can't call `sizeof()` inside preprocessor `#if`s)
// NB: this won't be necessary when (if) we have functions that return the machine code for
// an arbitrary number of arguments
g.write('__closure_create($node.decl.name, __closure_check_nargs($args_size), ($ctx_struct*) memdup(&($ctx_struct){')
g.indent++
for var in node.inherited_vars {
g.writeln('.$var.name = $var.name,')

View File

@ -0,0 +1,76 @@
import strings
import os
const (
max_params = 16
all_param_names = []string{len: max_params, init: '${`a` + it}'}
all_param_values = []string{len: max_params, init: '${it + 1}'}
)
// test_closures_with_n_args generates a new V file containing closures of `i`
// and parameters of type `typ`, to makes sure that all combinations work correctly
fn test_closures_with_n_args() ? {
mut v_code := strings.new_builder(1024)
// NB: the type or value of the captured arg doesn't matter for this test,
// as the entire closure context is always passed as one pointer anyways
for typ in ['byte', 'u16', 'int', 'i64', 'voidptr', 'string'] {
for i in 0 .. max_params {
param_names := all_param_names[..i]
params := param_names.map('$it $typ')
mut values := all_param_values[..i]
if typ == 'string' {
values = values.map("'$it'")
}
values = values.map('${typ}($it)')
mut expected_val := if typ == 'string' {
s := all_param_values[..i].join('')
"'127' + '$s'"
} else {
'127 + ${i * (i + 1) / 2}'
}
init_val, return_type := if typ != 'string' {
'u64(127)', 'u64'
} else {
"'127'", 'string'
}
// NB: the captured arg doesn't matter for this test, as closures always receive
// a pointer to the entire closure context as their last argument anyways
v_code.writeln("
fn test_big_closure_${typ}_${i}() {
println('test_big_closure_${typ}_$i')
mut z := $init_val
c := fn [z] (${params.join(', ')}) $return_type {
mut sum := z")
for j in 0 .. i {
v_code.writeln('\t\tsum += ${return_type}(${param_names[j]})')
}
v_code.writeln('
return sum
}
assert c(${values.join(', ')}) == $expected_val
}')
}
}
code := v_code.str()
println('Compiling V code (${code.count('\n')} lines) ...')
wrkdir := os.join_path(os.temp_dir(), 'vtests', 'closures')
os.mkdir_all(wrkdir) ?
os.chdir(wrkdir) ?
os.write_file('closure_args_test.v', code) ?
vexe := os.getenv('VEXE')
res := os.execute('"$vexe" closure_args_test.v')
if res.exit_code != 0 {
eprintln(res.output)
assert false
}
println('Process exited with code $res.exit_code')
os.chdir(os.dir(vexe)) or {}
os.rmdir_all(wrkdir) or {}
}