gen: make the closure implementation more performant (#14352)

spaceface 2022-05-11 23:05:14 +02:00 committed by Jef Roosens
parent 50ab19d136
commit 1bde8611f0
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
3 changed files with 35 additions and 167 deletions

View File

@ -2422,8 +2422,6 @@ V supports closures too.
This means that anonymous functions can inherit variables from the scope they were created in. This means that anonymous functions can inherit variables from the scope they were created in.
They must do so explicitly by listing all variables that are inherited. They must do so explicitly by listing all variables that are inherited.
> Warning: currently works on x64 and arm64 architectures only.
```v oksyntax ```v oksyntax
my_int := 1 my_int := 1
my_closure := fn [my_int] () { my_closure := fn [my_int] () {

View File

@ -69,94 +69,51 @@ fn c_closure_helpers(pref &pref.Preferences) string {
builder.write_string(' builder.write_string('
#ifdef _MSC_VER #ifdef _MSC_VER
#define __RETURN_ADDRESS() _ReturnAddress() #define __RETURN_ADDRESS() ((char*)_ReturnAddress())
#elif defined(__TINYC__) && defined(_WIN32) #elif defined(__TINYC__) && defined(_WIN32)
#define __RETURN_ADDRESS() __builtin_return_address(0) #define __RETURN_ADDRESS() ((char*)__builtin_return_address(0))
#else #else
#define __RETURN_ADDRESS() __builtin_extract_return_addr(__builtin_return_address(0)) #define __RETURN_ADDRESS() ((char*)__builtin_extract_return_addr(__builtin_return_address(0)))
#endif #endif
#ifdef __V_amd64 #ifdef __V_amd64
#ifdef _WIN32
static const char __closure_thunk[] = { static const char __closure_thunk[] = {
0x48, 0x89, 0x0d, 0xc1, 0xff, 0xff, 0xff, // mov qword ptr [rip - 63], rcx # <_orig_rcx> 0x8f, 0x05, 0xda, 0xff, 0xff, 0xff, // pop QWORD PTR [rip - 0x26] # <_orig_rbp>
0x8f, 0x05, 0xc3, 0xff, 0xff, 0xff, // pop qword ptr [rip - 61] # <_orig_rbp> 0xff, 0x15, 0xe4, 0xff, 0xff, 0xff, // call QWORD PTR [rip - 0x1C] # <fn>
0xff, 0x15, 0xd5, 0xff, 0xff, 0xff, // call qword ptr [rip - 43] # <wrapper> 0xff, 0x25, 0xce, 0xff, 0xff, 0xff, // jmp QWORD PTR [rip - 0x32] # <orig_rbp>
0x48, 0x8b, 0x0d, 0xae, 0xff, 0xff, 0xff, // mov rcx, qword ptr [rip - 82] # <_orig_rcx>
0xff, 0x15, 0xc0, 0xff, 0xff, 0xff, // call qword ptr [rip - 64] # <unwrapper>
0xff, 0x35, 0xaa, 0xff, 0xff, 0xff, // push qword ptr [rip - 86] # <_orig_rbp>
0xc3 // ret
}; };
#else #define __CLOSURE_DATA_OFFSET 20
static const char __closure_thunk[] = {
0x48, 0x89, 0x3d, 0xc1, 0xff, 0xff, 0xff, // mov qword ptr [rip - 63], rdi # <_orig_rdi>
0x8f, 0x05, 0xc3, 0xff, 0xff, 0xff, // pop qword ptr [rip - 61] # <_orig_rbp>
0xff, 0x15, 0xd5, 0xff, 0xff, 0xff, // call qword ptr [rip - 43] # <wrapper>
0x48, 0x8b, 0x3d, 0xae, 0xff, 0xff, 0xff, // mov rdi, qword ptr [rip - 82] # <_orig_rdi>
0xff, 0x15, 0xc0, 0xff, 0xff, 0xff, // call qword ptr [rip - 64] # <unwrapper>
0xff, 0x35, 0xaa, 0xff, 0xff, 0xff, // push qword ptr [rip - 86] # <_orig_rbp>
0xc3 // ret
};
#endif
#define __CLOSURE_WRAPPER_OFFSET 19
#define __CLOSURE_UNWRAPPER_OFFSET 32
#define __CLOSURE_WRAPPER_EXTRA_PARAM void* _t
#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA ,
#elif defined(__V_x86) #elif defined(__V_x86)
static char __closure_thunk[] = { static char __closure_thunk[] = {
0xe8, 0x00, 0x00, 0x00, 0x00, // call 4 0xe8, 0x00, 0x00, 0x00, 0x00, // call 4
0x58, // pop eax 0x59, // pop ecx
0x8f, 0x40, 0xe3, // pop dword ptr [eax - 29] # <_orig_rbp> 0x8f, 0x41, 0xeb, // pop DWORD PTR [ecx - 21] # <_orig_rbp>
0xff, 0x50, 0xef, // call dword ptr [eax - 17] # <wrapper> 0xff, 0x51, 0xf3, // call DWORD PTR [ecx - 13] # <fn>
0xe8, 0x00, 0x00, 0x00, 0x00, // call 4 0xe8, 0x00, 0x00, 0x00, 0x00, // call 4
0x58, // pop eax 0x59, // pop ecx
0xff, 0x50, 0xdf, // call dword ptr [eax - 33] # <unwrapper> 0xff, 0x61, 0xdf, // jmp DWORD PTR [ecx - 33] # <_orig_rbp>
0xe8, 0x00, 0x00, 0x00, 0x00, // call 4
0x58, // pop eax
0xff, 0x70, 0xce, // push dword ptr [eax - 50] # <_orig_rbp>
0xc3 // ret
}; };
#define __CLOSURE_WRAPPER_OFFSET 12 #define __CLOSURE_DATA_OFFSET 16
#define __CLOSURE_UNWRAPPER_OFFSET 21
#define __CLOSURE_WRAPPER_EXTRA_PARAM void* _t
#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA ,
#elif defined(__V_arm64) #elif defined(__V_arm64)
static char __closure_thunk[] = { static char __closure_thunk[] = {
0x10, 0x00, 0x00, 0x10, // adr x16, start 0x10, 0x00, 0x00, 0x10, // adr x16, start
0x08, 0x82, 0x1c, 0xf8, // str x8, _orig_x8 0x1e, 0x02, 0x1e, 0xf8, // str x30, _orig_x30
0x1e, 0x02, 0x1d, 0xf8, // str x30, _orig_x30 0x50, 0xff, 0xff, 0x58, // ldr x16, fn
0xf0, 0xfe, 0xff, 0x58, // ldr x16, wrapper
0x00, 0x02, 0x3f, 0xd6, // blr x16 0x00, 0x02, 0x3f, 0xd6, // blr x16
0x70, 0xff, 0xff, 0x10, // adr x16, start 0x9e, 0xfe, 0xff, 0x58, // ldr x30, _orig_x30
0x08, 0x82, 0x5c, 0xf8, // ldr x8, _orig_x8
0x30, 0xfe, 0xff, 0x58, // ldr x16, unwrapper
0x00, 0x02, 0x3f, 0xd6, // blr x16
0xf0, 0xfe, 0xff, 0x10, // adr x16, start
0x1e, 0x02, 0x5d, 0xf8, // ldr x30, _orig_x30
0xc0, 0x03, 0x5f, 0xd6 // ret 0xc0, 0x03, 0x5f, 0xd6 // ret
}; };
#define __CLOSURE_WRAPPER_OFFSET 20 #define __CLOSURE_DATA_OFFSET 24
#define __CLOSURE_UNWRAPPER_OFFSET 36
#define __CLOSURE_WRAPPER_EXTRA_PARAM
#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA
#elif defined(__V_arm32) #elif defined(__V_arm32)
static char __closure_thunk[] = { static char __closure_thunk[] = {
0x24, 0x00, 0x0f, 0xe5, // str r0, orig_r0 0x18, 0xe0, 0x0f, 0xe5, // str lr, orig_lr
0x24, 0xe0, 0x0f, 0xe5, // str lr, orig_lr 0x14, 0xc0, 0x1f, 0xe5, // ldr ip, fn
0x1c, 0xc0, 0x1f, 0xe5, // ldr ip, wrapper 0x3c, 0xff, 0x2f, 0xe1, // blx ip
0x3c, 0xff, 0x2f, 0xe1, // blx ip 0x24, 0xe0, 0x1f, 0xe5, // ldr lr, orig_lr
0x34, 0x00, 0x1f, 0xe5, // ldr r0, orig_r0 0x1e, 0xff, 0x2f, 0xe1 // bx lr
0x2c, 0xc0, 0x1f, 0xe5, // ldr ip, unwrapper
0x3c, 0xff, 0x2f, 0xe1, // blx ip
0x3c, 0xe0, 0x1f, 0xe5, // ldr lr, orig_lr
0x1e, 0xff, 0x2f, 0xe1 // bx lr
}; };
#define __CLOSURE_WRAPPER_OFFSET 16 #define __CLOSURE_DATA_OFFSET 16
#define __CLOSURE_UNWRAPPER_OFFSET 28
#define __CLOSURE_WRAPPER_EXTRA_PARAM void* _t
#define __CLOSURE_WRAPPER_EXTRA_PARAM_COMMA ,
#endif #endif
static int _V_PAGE_SIZE = 4096; // pre-initialized to the most common value, in case _vinit is not called (in a DLL, for example) static int _V_PAGE_SIZE = 4096; // pre-initialized to the most common value, in case _vinit is not called (in a DLL, for example)
@ -171,22 +128,7 @@ static inline void __closure_set_function(void* closure, void* f) {
p[-2] = f; p[-2] = f;
} }
static inline void __closure_set_wrapper(void* closure, void* f) { static void* __closure_create(void* fn, void* data) {
void** p = closure;
p[-3] = f;
}
static inline void __closure_set_unwrapper(void* closure, void* f) {
void** p = closure;
p[-4] = f;
}
static inline void __closure_set_base_ptr(void* closure, void* bp) {
void** p = closure;
p[-5] = bp;
}
static void* __closure_create(void* fn, void* wrapper, void* unwrapper, void* data) {
#ifdef _WIN32 #ifdef _WIN32
SYSTEM_INFO si; SYSTEM_INFO si;
GetNativeSystemInfo(&si); GetNativeSystemInfo(&si);
@ -213,9 +155,6 @@ static void* __closure_create(void* fn, void* wrapper, void* unwrapper, void* da
__closure_set_data(closure, data); __closure_set_data(closure, data);
__closure_set_function(closure, fn); __closure_set_function(closure, fn);
__closure_set_wrapper(closure, wrapper);
__closure_set_unwrapper(closure, unwrapper);
__closure_set_base_ptr(closure, p);
return closure; return closure;
} }
') ')

View File

@ -195,7 +195,7 @@ fn (mut g Gen) gen_fn_decl(node &ast.FnDecl, skip bool) {
is_closure := node.scope.has_inherited_vars() is_closure := node.scope.has_inherited_vars()
mut cur_closure_ctx := '' mut cur_closure_ctx := ''
if is_closure { if is_closure {
cur_closure_ctx, _ = closure_ctx(node) cur_closure_ctx = closure_ctx(node)
// declare the struct before its implementation // declare the struct before its implementation
g.definitions.write_string(cur_closure_ctx) g.definitions.write_string(cur_closure_ctx)
g.definitions.writeln(';') g.definitions.writeln(';')
@ -288,15 +288,6 @@ fn (mut g Gen) gen_fn_decl(node &ast.FnDecl, skip bool) {
arg_start_pos := g.out.len arg_start_pos := g.out.len
fargs, fargtypes, heap_promoted := g.fn_decl_params(node.params, node.scope, node.is_variadic) fargs, fargtypes, heap_promoted := g.fn_decl_params(node.params, node.scope, node.is_variadic)
if is_closure { if is_closure {
mut s := '$cur_closure_ctx *$c.closure_ctx'
if node.params.len > 0 {
s = ', ' + s
} else {
// remove generated `void`
g.out.cut_to(arg_start_pos)
}
g.definitions.write_string(s)
g.write(s)
g.nr_closures++ g.nr_closures++
} }
arg_str := g.out.after(arg_start_pos) arg_str := g.out.after(arg_start_pos)
@ -312,6 +303,9 @@ fn (mut g Gen) gen_fn_decl(node &ast.FnDecl, skip bool) {
} }
g.definitions.writeln(');') g.definitions.writeln(');')
g.writeln(') {') g.writeln(') {')
if is_closure {
g.writeln('$cur_closure_ctx* $c.closure_ctx = *(void**)(__RETURN_ADDRESS() - __CLOSURE_DATA_OFFSET);')
}
for i, is_promoted in heap_promoted { for i, is_promoted in heap_promoted {
if is_promoted { if is_promoted {
g.writeln('${fargtypes[i]}* ${fargs[i]} = HEAP(${fargtypes[i]}, _v_toheap_${fargs[i]});') g.writeln('${fargtypes[i]}* ${fargs[i]} = HEAP(${fargtypes[i]}, _v_toheap_${fargs[i]});')
@ -472,8 +466,8 @@ fn (mut g Gen) c_fn_name(node &ast.FnDecl) ?string {
const closure_ctx = '_V_closure_ctx' const closure_ctx = '_V_closure_ctx'
fn closure_ctx(node ast.FnDecl) (string, string) { fn closure_ctx(node ast.FnDecl) string {
return 'struct _V_${node.name}_Ctx', 'struct _V_${node.name}_Args' return 'struct _V_${node.name}_Ctx'
} }
fn (mut g Gen) gen_anon_fn(mut node ast.AnonFn) { fn (mut g Gen) gen_anon_fn(mut node ast.AnonFn) {
@ -482,73 +476,17 @@ fn (mut g Gen) gen_anon_fn(mut node ast.AnonFn) {
g.write(node.decl.name) g.write(node.decl.name)
return return
} }
ctx_struct, arg_struct := closure_ctx(node.decl) ctx_struct := closure_ctx(node.decl)
// it may be possible to optimize `memdup` out if the closure never leaves current scope // it may be possible to optimize `memdup` out if the closure never leaves current scope
// TODO in case of an assignment, this should only call "__closure_set_data" and "__closure_set_function" (and free the former data) // TODO in case of an assignment, this should only call "__closure_set_data" and "__closure_set_function" (and free the former data)
g.write('__closure_create($node.decl.name, ${node.decl.name}_wrapper, ${node.decl.name}_unwrapper, ($ctx_struct*) memdup(&($ctx_struct){') g.write('__closure_create($node.decl.name, ($ctx_struct*) memdup(&($ctx_struct){')
g.indent++ g.indent++
for var in node.inherited_vars { for var in node.inherited_vars {
g.writeln('.$var.name = $var.name,') g.writeln('.$var.name = $var.name,')
} }
g.indent-- g.indent--
ps := g.table.pointer_size
is_big_cutoff := if g.pref.os == .windows || g.pref.arch == .arm32 { ps } else { ps * 2 }
rt_size, _ := g.table.type_size(node.decl.return_type)
is_big := rt_size > is_big_cutoff
g.write('}, sizeof($ctx_struct)))') g.write('}, sizeof($ctx_struct)))')
mut sb := strings.new_builder(512)
ret_styp := g.typ(node.decl.return_type)
sb.write_string(' VV_LOCAL_SYMBOL void ${node.decl.name}_wrapper(')
if is_big {
sb.write_string('__CLOSURE_WRAPPER_EXTRA_PARAM ')
if node.decl.params.len > 0 {
sb.write_string('__CLOSURE_WRAPPER_EXTRA_PARAM_COMMA ')
}
}
for i, param in node.decl.params {
if i > 0 {
sb.write_string(', ')
}
sb.write_string('${g.typ(param.typ)} a${i + 1}')
}
sb.writeln(') {')
if node.decl.params.len > 0 {
sb.writeln('void** closure_start = (void**)((char*)__RETURN_ADDRESS() - __CLOSURE_WRAPPER_OFFSET);
$arg_struct* args = closure_start[-5];')
for i in 0 .. node.decl.params.len {
sb.writeln('\targs->a${i + 1} = a${i + 1};')
}
}
sb.writeln('}\n')
sb.writeln(' VV_LOCAL_SYMBOL $ret_styp ${node.decl.name}_unwrapper(void) {
void** closure_start = (void**)((char*)__RETURN_ADDRESS() - __CLOSURE_UNWRAPPER_OFFSET);
void* userdata = closure_start[-1];')
sb.write_string('\t${g.typ(node.decl.return_type)} (*fn)(')
for i, param in node.decl.params {
sb.write_string('${g.typ(param.typ)} a${i + 1}, ')
}
sb.writeln('void* userdata) = closure_start[-2];')
if node.decl.params.len > 0 {
sb.writeln('\t$arg_struct* args = closure_start[-5];')
}
if node.decl.return_type == ast.void_type_idx {
sb.write_string('\tfn(')
} else {
sb.write_string('\treturn fn(')
}
for i in 0 .. node.decl.params.len {
sb.write_string('args->a${i + 1}, ')
}
sb.writeln('userdata);
}')
g.anon_fn_definitions << sb.str()
g.empty_line = false g.empty_line = false
} }
@ -559,20 +497,13 @@ fn (mut g Gen) gen_anon_fn_decl(mut node ast.AnonFn) {
node.has_gen = true node.has_gen = true
mut builder := strings.new_builder(256) mut builder := strings.new_builder(256)
if node.inherited_vars.len > 0 { if node.inherited_vars.len > 0 {
ctx_struct, arg_struct := closure_ctx(node.decl) ctx_struct := closure_ctx(node.decl)
builder.writeln('$ctx_struct {') builder.writeln('$ctx_struct {')
for var in node.inherited_vars { for var in node.inherited_vars {
styp := g.typ(var.typ) styp := g.typ(var.typ)
builder.writeln('\t$styp $var.name;') builder.writeln('\t$styp $var.name;')
} }
builder.writeln('};\n') builder.writeln('};\n')
if node.decl.params.len > 0 {
builder.writeln('$arg_struct {')
for i, param in node.decl.params {
builder.writeln('\t${g.typ(param.typ)} a${i + 1};')
}
builder.writeln('};\n')
}
} }
pos := g.out.len pos := g.out.len
was_anon_fn := g.anon_fn was_anon_fn := g.anon_fn