cgen: support closures with any number of parameters of any size on amd64 (#12891)
parent
3a504480d9
commit
de92f819f0
|
@ -109,6 +109,7 @@ const (
|
||||||
'vlib/orm/orm_test.v',
|
'vlib/orm/orm_test.v',
|
||||||
'vlib/v/tests/orm_sub_struct_test.v',
|
'vlib/v/tests/orm_sub_struct_test.v',
|
||||||
'vlib/v/tests/closure_test.v',
|
'vlib/v/tests/closure_test.v',
|
||||||
|
'vlib/v/tests/closure_generator_test.v',
|
||||||
'vlib/net/websocket/ws_test.v',
|
'vlib/net/websocket/ws_test.v',
|
||||||
'vlib/net/unix/unix_test.v',
|
'vlib/net/unix/unix_test.v',
|
||||||
'vlib/net/websocket/websocket_test.v',
|
'vlib/net/websocket/websocket_test.v',
|
||||||
|
@ -132,6 +133,7 @@ const (
|
||||||
'do_not_remove',
|
'do_not_remove',
|
||||||
]
|
]
|
||||||
skip_on_arm64 = [
|
skip_on_arm64 = [
|
||||||
|
'vlib/v/tests/closure_generator_test.v',
|
||||||
'do_not_remove',
|
'do_not_remove',
|
||||||
]
|
]
|
||||||
skip_on_non_amd64_or_arm64 = [
|
skip_on_non_amd64_or_arm64 = [
|
||||||
|
|
|
@ -76,6 +76,49 @@ fn arm32_bytes(nargs int) string {
|
||||||
return bytes.replace('<REG>', nargs.str())
|
return bytes.replace('<REG>', nargs.str())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gen_amd64_bytecode generates the amd64 bytecode a closure with `nargs` parameters.
|
||||||
|
// NB: `nargs` includes the last `userdata` parameter that will be passed to the original
|
||||||
|
// function, and as such nargs must always be > 0
|
||||||
|
fn amd64_bytes(nargs int) string {
|
||||||
|
match nargs {
|
||||||
|
1 {
|
||||||
|
return '0x48, 0x8b, 0x3d, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
|
||||||
|
}
|
||||||
|
2 {
|
||||||
|
return '0x48, 0x8b, 0x35, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
|
||||||
|
}
|
||||||
|
3 {
|
||||||
|
return '0x48, 0x8b, 0x15, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
|
||||||
|
}
|
||||||
|
4 {
|
||||||
|
return '0x48, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
|
||||||
|
}
|
||||||
|
5 {
|
||||||
|
return '0x4C, 0x8b, 0x05, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
|
||||||
|
}
|
||||||
|
6 {
|
||||||
|
return '0x4C, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff, 0xff, 0x25, 0xeb, 0xff, 0xff, 0xff'
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// see https://godbolt.org/z/64e5TEf5n for similar assembly
|
||||||
|
mut sb := strings.new_builder(256)
|
||||||
|
s := (((byte(nargs) & 1) + 1) << 3).hex()
|
||||||
|
sb.write_string('0x48, 0x83, 0xec, 0x$s, ') // sub rsp,0x8 <OR> sub rsp,0x10
|
||||||
|
sb.write_string('0xff, 0x35, 0xe6, 0xff, 0xff, 0xff, ') // push QWORD PTR [rip+0xffffffffffffffe6]
|
||||||
|
|
||||||
|
rsp_offset := byte(0x18 + ((byte(nargs - 7) >> 1) << 4)).hex()
|
||||||
|
for _ in 0 .. nargs - 7 {
|
||||||
|
sb.write_string('0xff, 0xb4, 0x24, 0x$rsp_offset, 0x00, 0x00, 0x00, ') // push QWORD PTR [rsp+$rsp_offset]
|
||||||
|
}
|
||||||
|
sb.write_string('0xff, 0x15, 0x${byte(256 - sb.len / 6 - 6 - 8).hex()}, 0xff, 0xff, 0xff, ') // call QWORD PTR [rip+OFFSET]
|
||||||
|
sb.write_string('0x48, 0x81, 0xc4, 0x$rsp_offset, 0x00, 0x00, 0x00, ') // add rsp,$rsp_offset
|
||||||
|
sb.write_string('0xc3') // ret
|
||||||
|
|
||||||
|
return sb.str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Heavily based on Chris Wellons's work
|
// Heavily based on Chris Wellons's work
|
||||||
// https://nullprogram.com/blog/2017/01/08/
|
// https://nullprogram.com/blog/2017/01/08/
|
||||||
|
|
||||||
|
@ -90,33 +133,50 @@ fn c_closure_helpers(pref &pref.Preferences) string {
|
||||||
if pref.os != .windows {
|
if pref.os != .windows {
|
||||||
builder.writeln('#include <sys/mman.h>')
|
builder.writeln('#include <sys/mman.h>')
|
||||||
}
|
}
|
||||||
|
// TODO: support additional arguments by pushing them onto the stack
|
||||||
|
// https://en.wikipedia.org/wiki/Calling_convention
|
||||||
if pref.arch == .amd64 {
|
if pref.arch == .amd64 {
|
||||||
|
// TODO: the `amd64_bytes()` function above should work for an arbitrary* number of arguments,
|
||||||
|
// so we should just remove the table and call the function directly at runtime
|
||||||
builder.write_string('
|
builder.write_string('
|
||||||
static unsigned char __closure_thunk[6][13] = {
|
static unsigned char __closure_thunk[32][${amd64_bytes(31).len / 6 +
|
||||||
{
|
2}] = {
|
||||||
0x48, 0x8b, 0x3d, 0xe9, 0xff, 0xff, 0xff,
|
{ ${amd64_bytes(1)} },
|
||||||
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
|
{ ${amd64_bytes(2)} },
|
||||||
}, {
|
{ ${amd64_bytes(3)} },
|
||||||
0x48, 0x8b, 0x35, 0xe9, 0xff, 0xff, 0xff,
|
{ ${amd64_bytes(4)} },
|
||||||
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
|
{ ${amd64_bytes(5)} },
|
||||||
}, {
|
{ ${amd64_bytes(6)} },
|
||||||
0x48, 0x8b, 0x15, 0xe9, 0xff, 0xff, 0xff,
|
{ ${amd64_bytes(7)} },
|
||||||
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
|
{ ${amd64_bytes(8)} },
|
||||||
}, {
|
{ ${amd64_bytes(9)} },
|
||||||
0x48, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff,
|
{ ${amd64_bytes(10)} },
|
||||||
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
|
{ ${amd64_bytes(11)} },
|
||||||
}, {
|
{ ${amd64_bytes(12)} },
|
||||||
0x4C, 0x8b, 0x05, 0xe9, 0xff, 0xff, 0xff,
|
{ ${amd64_bytes(13)} },
|
||||||
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
|
{ ${amd64_bytes(14)} },
|
||||||
}, {
|
{ ${amd64_bytes(15)} },
|
||||||
0x4C, 0x8b, 0x0d, 0xe9, 0xff, 0xff, 0xff,
|
{ ${amd64_bytes(16)} },
|
||||||
0xff, 0x25, 0xeb, 0xff, 0xff, 0xff
|
{ ${amd64_bytes(17)} },
|
||||||
},
|
{ ${amd64_bytes(18)} },
|
||||||
|
{ ${amd64_bytes(19)} },
|
||||||
|
{ ${amd64_bytes(20)} },
|
||||||
|
{ ${amd64_bytes(21)} },
|
||||||
|
{ ${amd64_bytes(22)} },
|
||||||
|
{ ${amd64_bytes(23)} },
|
||||||
|
{ ${amd64_bytes(24)} },
|
||||||
|
{ ${amd64_bytes(25)} },
|
||||||
|
{ ${amd64_bytes(26)} },
|
||||||
|
{ ${amd64_bytes(27)} },
|
||||||
|
{ ${amd64_bytes(28)} },
|
||||||
|
{ ${amd64_bytes(29)} },
|
||||||
|
{ ${amd64_bytes(30)} },
|
||||||
|
{ ${amd64_bytes(31)} },
|
||||||
};
|
};
|
||||||
')
|
')
|
||||||
} else if pref.arch == .arm64 {
|
} else if pref.arch == .arm64 {
|
||||||
builder.write_string('
|
builder.write_string('
|
||||||
static unsigned char __closure_thunk[6][12] = {
|
static unsigned char __closure_thunk[8][12] = {
|
||||||
{
|
{
|
||||||
${arm64_bytes(0)}
|
${arm64_bytes(0)}
|
||||||
}, {
|
}, {
|
||||||
|
@ -129,12 +189,16 @@ static unsigned char __closure_thunk[6][12] = {
|
||||||
${arm64_bytes(4)}
|
${arm64_bytes(4)}
|
||||||
}, {
|
}, {
|
||||||
${arm64_bytes(5)}
|
${arm64_bytes(5)}
|
||||||
|
}, {
|
||||||
|
${arm64_bytes(6)}
|
||||||
|
}, {
|
||||||
|
${arm64_bytes(7)}
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
')
|
')
|
||||||
} else if pref.arch == .arm32 {
|
} else if pref.arch == .arm32 {
|
||||||
builder.write_string('
|
builder.write_string('
|
||||||
static unsigned char __closure_thunk[6][12] = {
|
static unsigned char __closure_thunk[4][12] = {
|
||||||
{
|
{
|
||||||
${arm32_bytes(0)}
|
${arm32_bytes(0)}
|
||||||
}, {
|
}, {
|
||||||
|
@ -143,10 +207,6 @@ static unsigned char __closure_thunk[6][12] = {
|
||||||
${arm32_bytes(2)}
|
${arm32_bytes(2)}
|
||||||
}, {
|
}, {
|
||||||
${arm32_bytes(3)}
|
${arm32_bytes(3)}
|
||||||
}, {
|
|
||||||
${arm32_bytes(4)}
|
|
||||||
}, {
|
|
||||||
${arm32_bytes(5)}
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
')
|
')
|
||||||
|
@ -161,6 +221,14 @@ static void __closure_set_function(void *closure, void *f) {
|
||||||
void **p = closure;
|
void **p = closure;
|
||||||
p[-1] = f;
|
p[-1] = f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int __closure_check_nargs(int nargs) {
|
||||||
|
if (nargs > (int)_ARR_LEN(__closure_thunk)) {
|
||||||
|
_v_panic(_SLIT("Closure too large. Reduce the number of parameters, or pass the parameters by reference."));
|
||||||
|
VUNREACHABLE();
|
||||||
|
}
|
||||||
|
return nargs;
|
||||||
|
}
|
||||||
')
|
')
|
||||||
if pref.os != .windows {
|
if pref.os != .windows {
|
||||||
builder.write_string('
|
builder.write_string('
|
||||||
|
@ -260,6 +328,9 @@ const c_common_macros = '
|
||||||
#define __offsetof(PTYPE,FIELDNAME) ((size_t)((char *)&((PTYPE *)0)->FIELDNAME - (char *)0))
|
#define __offsetof(PTYPE,FIELDNAME) ((size_t)((char *)&((PTYPE *)0)->FIELDNAME - (char *)0))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// returns the number of CPU registers that TYPE takes up
|
||||||
|
#define _REG_WIDTH(T) (((sizeof(T) + sizeof(void*) - 1) & ~(sizeof(void*) - 1)) / sizeof(void*))
|
||||||
|
|
||||||
#define OPTION_CAST(x) (x)
|
#define OPTION_CAST(x) (x)
|
||||||
|
|
||||||
#ifndef V64_PRINTFORMAT
|
#ifndef V64_PRINTFORMAT
|
||||||
|
|
|
@ -452,8 +452,20 @@ fn (mut g Gen) gen_anon_fn(mut node ast.AnonFn) {
|
||||||
// it may be possible to optimize `memdup` out if the closure never leaves current scope
|
// it may be possible to optimize `memdup` out if the closure never leaves current scope
|
||||||
ctx_struct := closure_ctx_struct(node.decl)
|
ctx_struct := closure_ctx_struct(node.decl)
|
||||||
// TODO in case of an assignment, this should only call "__closure_set_data" and "__closure_set_function" (and free the former data)
|
// TODO in case of an assignment, this should only call "__closure_set_data" and "__closure_set_function" (and free the former data)
|
||||||
g.write('__closure_create($node.decl.name, ${node.decl.params.len + 1}, ')
|
|
||||||
g.writeln('($ctx_struct*) memdup(&($ctx_struct){')
|
mut size_sb := strings.new_builder(node.decl.params.len * 50)
|
||||||
|
for param in node.decl.params {
|
||||||
|
size_sb.write_string('_REG_WIDTH(${g.typ(param.typ)}) + ')
|
||||||
|
}
|
||||||
|
size_sb.write_string('1')
|
||||||
|
args_size := size_sb.str()
|
||||||
|
g.writeln('')
|
||||||
|
|
||||||
|
// ensure that nargs maps to a known entry in the __closure_thunk array
|
||||||
|
// TODO make it a compile-time error (you can't call `sizeof()` inside preprocessor `#if`s)
|
||||||
|
// NB: this won't be necessary when (if) we have functions that return the machine code for
|
||||||
|
// an arbitrary number of arguments
|
||||||
|
g.write('__closure_create($node.decl.name, __closure_check_nargs($args_size), ($ctx_struct*) memdup(&($ctx_struct){')
|
||||||
g.indent++
|
g.indent++
|
||||||
for var in node.inherited_vars {
|
for var in node.inherited_vars {
|
||||||
g.writeln('.$var.name = $var.name,')
|
g.writeln('.$var.name = $var.name,')
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
import strings
|
||||||
|
import os
|
||||||
|
|
||||||
|
const (
|
||||||
|
max_params = 16
|
||||||
|
all_param_names = []string{len: max_params, init: '${`a` + it}'}
|
||||||
|
all_param_values = []string{len: max_params, init: '${it + 1}'}
|
||||||
|
)
|
||||||
|
|
||||||
|
// test_closures_with_n_args generates a new V file containing closures of `i`
|
||||||
|
// and parameters of type `typ`, to makes sure that all combinations work correctly
|
||||||
|
fn test_closures_with_n_args() ? {
|
||||||
|
mut v_code := strings.new_builder(1024)
|
||||||
|
// NB: the type or value of the captured arg doesn't matter for this test,
|
||||||
|
// as the entire closure context is always passed as one pointer anyways
|
||||||
|
|
||||||
|
for typ in ['byte', 'u16', 'int', 'i64', 'voidptr', 'string'] {
|
||||||
|
for i in 0 .. max_params {
|
||||||
|
param_names := all_param_names[..i]
|
||||||
|
params := param_names.map('$it $typ')
|
||||||
|
|
||||||
|
mut values := all_param_values[..i]
|
||||||
|
if typ == 'string' {
|
||||||
|
values = values.map("'$it'")
|
||||||
|
}
|
||||||
|
values = values.map('${typ}($it)')
|
||||||
|
|
||||||
|
mut expected_val := if typ == 'string' {
|
||||||
|
s := all_param_values[..i].join('')
|
||||||
|
"'127' + '$s'"
|
||||||
|
} else {
|
||||||
|
'127 + ${i * (i + 1) / 2}'
|
||||||
|
}
|
||||||
|
|
||||||
|
init_val, return_type := if typ != 'string' {
|
||||||
|
'u64(127)', 'u64'
|
||||||
|
} else {
|
||||||
|
"'127'", 'string'
|
||||||
|
}
|
||||||
|
|
||||||
|
// NB: the captured arg doesn't matter for this test, as closures always receive
|
||||||
|
// a pointer to the entire closure context as their last argument anyways
|
||||||
|
v_code.writeln("
|
||||||
|
fn test_big_closure_${typ}_${i}() {
|
||||||
|
println('test_big_closure_${typ}_$i')
|
||||||
|
mut z := $init_val
|
||||||
|
c := fn [z] (${params.join(', ')}) $return_type {
|
||||||
|
mut sum := z")
|
||||||
|
for j in 0 .. i {
|
||||||
|
v_code.writeln('\t\tsum += ${return_type}(${param_names[j]})')
|
||||||
|
}
|
||||||
|
v_code.writeln('
|
||||||
|
return sum
|
||||||
|
}
|
||||||
|
assert c(${values.join(', ')}) == $expected_val
|
||||||
|
}')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
code := v_code.str()
|
||||||
|
println('Compiling V code (${code.count('\n')} lines) ...')
|
||||||
|
wrkdir := os.join_path(os.temp_dir(), 'vtests', 'closures')
|
||||||
|
os.mkdir_all(wrkdir) ?
|
||||||
|
os.chdir(wrkdir) ?
|
||||||
|
os.write_file('closure_args_test.v', code) ?
|
||||||
|
vexe := os.getenv('VEXE')
|
||||||
|
res := os.execute('"$vexe" closure_args_test.v')
|
||||||
|
if res.exit_code != 0 {
|
||||||
|
eprintln(res.output)
|
||||||
|
assert false
|
||||||
|
}
|
||||||
|
println('Process exited with code $res.exit_code')
|
||||||
|
|
||||||
|
os.chdir(os.dir(vexe)) or {}
|
||||||
|
os.rmdir_all(wrkdir) or {}
|
||||||
|
}
|
Loading…
Reference in New Issue