From 84fa1ae444b9caca5f3a3227a5348bb3262c8a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kr=C3=BCger?= <45282134+UweKrueger@users.noreply.github.com> Date: Fri, 9 Apr 2021 12:13:49 +0200 Subject: [PATCH] boehm-gc: support a `[keep_args_alive]` tag for C functions (#9641) --- cmd/tools/vtest-self.v | 1 + doc/docs.md | 5 ++ vlib/builtin/builtin_d_gcboehm.v | 3 ++ vlib/v/ast/ast.v | 2 + vlib/v/ast/table.v | 1 + vlib/v/checker/checker.v | 1 + vlib/v/gen/c/fn.v | 69 ++++++++++++++++++++++-- vlib/v/parser/fn.v | 8 +++ vlib/v/tests/keep_args_alive_test.v | 75 +++++++++++++++++++++++++++ vlib/v/tests/keep_args_alive_test_c.h | 44 ++++++++++++++++ 10 files changed, 204 insertions(+), 5 deletions(-) create mode 100644 vlib/v/tests/keep_args_alive_test.v create mode 100644 vlib/v/tests/keep_args_alive_test_c.h diff --git a/cmd/tools/vtest-self.v b/cmd/tools/vtest-self.v index 6ab37da62c..7849b850aa 100644 --- a/cmd/tools/vtest-self.v +++ b/cmd/tools/vtest-self.v @@ -109,6 +109,7 @@ const ( 'vlib/v/tests/interface_edge_cases/assign_to_interface_field_test.v', 'vlib/v/tests/interface_fields_test.v', 'vlib/v/tests/interface_variadic_test.v', + 'vlib/v/tests/keep_args_alive_test.v', 'vlib/v/tests/option_2_test.v', 'vlib/v/tests/operator_overloading_with_string_interpolation_test.v', 'vlib/v/tests/orm_sub_struct_test.v', diff --git a/doc/docs.md b/doc/docs.md index 603b8c094e..e68f6d2db7 100644 --- a/doc/docs.md +++ b/doc/docs.md @@ -4222,6 +4222,11 @@ fn bar() { foo() // will not be called if `-d debug` is not passed } +// The memory pointed to by the pointer arguments of this function will not be +// freed by the garbage collector (if in use) before the function returns +[keep_args_alive] +fn C.my_external_function(voidptr, int, voidptr) int + // Calls to following function must be in unsafe{} blocks. // Note that the code in the body of `risky_business()` will still be // checked, unless you also wrap it in `unsafe {}` blocks. diff --git a/vlib/builtin/builtin_d_gcboehm.v b/vlib/builtin/builtin_d_gcboehm.v index c29699568b..c625818a50 100644 --- a/vlib/builtin/builtin_d_gcboehm.v +++ b/vlib/builtin/builtin_d_gcboehm.v @@ -52,6 +52,9 @@ fn C.GC_enable() // returns non-zero if GC is disabled fn C.GC_is_disabled() int +// protect memory block from being freed before this call +fn C.GC_reachable_here(voidptr) + // for leak detection it is advisable to do explicit garbage collections pub fn gc_check_leaks() { $if gcboehm_leak ? { diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v index 46b640885a..5b3ff495cd 100644 --- a/vlib/v/ast/ast.v +++ b/vlib/v/ast/ast.v @@ -356,6 +356,7 @@ pub: is_main bool // true for `fn main()` is_test bool // true for `fn test_abcde` is_conditional bool // true for `[if abc] fn abc(){}` + is_keep_alive bool // passed memory must not be freed (by GC) before function returns receiver StructField // TODO this is not a struct field receiver_pos token.Position // `(u User)` in `fn (u User) name()` position is_method bool @@ -409,6 +410,7 @@ pub mut: name string // left.name() is_method bool is_field bool // temp hack, remove ASAP when re-impl CallExpr / Selector (joe) + is_keep_alive bool // GC must not free arguments before fn returns args []CallArg expected_arg_types []Type language Language diff --git a/vlib/v/ast/table.v b/vlib/v/ast/table.v index ff35c0ef37..3211f74762 100644 --- a/vlib/v/ast/table.v +++ b/vlib/v/ast/table.v @@ -38,6 +38,7 @@ pub: is_main bool // `fn main(){}` is_test bool // `fn test_abc(){}` is_conditional bool // `[if abc]fn(){}` + is_keep_alive bool // passed memory must not be freed (by GC) before function returns no_body bool // a pure declaration like `fn abc(x int)`; used in .vh files, C./JS. fns. mod string ctdefine string // compile time define. "myflag", when [if myflag] tag diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v index 27fe697b00..84ee4087c8 100644 --- a/vlib/v/checker/checker.v +++ b/vlib/v/checker/checker.v @@ -2060,6 +2060,7 @@ pub fn (mut c Checker) fn_call(mut call_expr ast.CallExpr) ast.Type { // builtin C.m*, C.s* only - temp c.warn('function `$f.name` must be called from an `unsafe` block', call_expr.pos) } + call_expr.is_keep_alive = f.is_keep_alive if f.mod != 'builtin' && f.language == .v && f.no_body && !c.pref.translated && !f.is_unsafe { c.error('cannot call a function that does not have a body', call_expr.pos) } diff --git a/vlib/v/gen/c/fn.v b/vlib/v/gen/c/fn.v index a587451e27..5943db143e 100644 --- a/vlib/v/gen/c/fn.v +++ b/vlib/v/gen/c/fn.v @@ -413,9 +413,11 @@ fn (mut g Gen) call_expr(node ast.CallExpr) { defer { g.inside_call = false } + gen_keep_alive := node.is_keep_alive && node.return_type != ast.void_type + && g.pref.gc_mode in [.boehm_full, .boehm_incr, .boehm] gen_or := node.or_block.kind != .absent // && !g.is_autofree is_gen_or_and_assign_rhs := gen_or && !g.discard_or_result - cur_line := if is_gen_or_and_assign_rhs { // && !g.is_autofree { + cur_line := if is_gen_or_and_assign_rhs || gen_keep_alive { // && !g.is_autofree { // `x := foo() or { ...}` // cut everything that has been generated to prepend optional variable creation line := g.go_before_stmt(0) @@ -425,9 +427,13 @@ fn (mut g Gen) call_expr(node ast.CallExpr) { } else { '' } - tmp_opt := if gen_or { g.new_tmp_var() } else { '' } - if gen_or { - styp := g.typ(node.return_type.set_flag(.optional)) + tmp_opt := if gen_or || gen_keep_alive { g.new_tmp_var() } else { '' } + if gen_or || gen_keep_alive { + mut ret_typ := node.return_type + if gen_or { + ret_typ = ret_typ.set_flag(.optional) + } + styp := g.typ(ret_typ) g.write('$styp $tmp_opt = ') } if node.is_method && !node.is_field { @@ -458,6 +464,12 @@ fn (mut g Gen) call_expr(node ast.CallExpr) { } } } + } else if gen_keep_alive { + if node.return_type == ast.void_type { + g.write('\n $cur_line') + } else { + g.write('\n $cur_line $tmp_opt') + } } } @@ -874,13 +886,30 @@ fn (mut g Gen) fn_call(node ast.CallExpr) { // g.writeln(';') // g.write(cur_line + ' /* <== af cur line*/') // } + mut tmp_cnt_save := -1 g.write('${g.get_ternary_name(name)}(') if g.is_json_fn { g.write(json_obj) } else { - g.call_args(node) + if node.is_keep_alive && g.pref.gc_mode in [.boehm_full, .boehm_incr, .boehm] { + cur_line := g.go_before_stmt(0) + tmp_cnt_save = g.keep_alive_call_pregen(node) + g.write(cur_line) + for i in 0 .. node.args.len { + if i > 0 { + g.write(', ') + } + g.write('__tmp_arg_${tmp_cnt_save + i}') + } + } else { + g.call_args(node) + } } g.write(')') + if tmp_cnt_save >= 0 { + g.writeln(';') + g.keep_alive_call_postgen(node, tmp_cnt_save) + } } } g.is_c_call = false @@ -1103,6 +1132,36 @@ fn (mut g Gen) call_args(node ast.CallExpr) { } } +// similar to `autofree_call_pregen()` but only to to handle [keep_args_alive] for C functions +fn (mut g Gen) keep_alive_call_pregen(node ast.CallExpr) int { + g.empty_line = true + g.writeln('// keep_alive_call_pregen()') + // reserve the next tmp_vars for arguments + tmp_cnt_save := g.tmp_count + 1 + g.tmp_count += node.args.len + for i, arg in node.args { + // save all arguments in temp vars (not only pointers) to make sure the + // evaluation order is preserved + expected_type := node.expected_arg_types[i] + typ := g.table.get_type_symbol(expected_type).cname + g.write('$typ __tmp_arg_${tmp_cnt_save + i} = ') + // g.expr(arg.expr) + g.ref_or_deref_arg(arg, expected_type, node.language) + g.writeln(';') + } + g.empty_line = false + return tmp_cnt_save +} + +fn (mut g Gen) keep_alive_call_postgen(node ast.CallExpr, tmp_cnt_save int) { + g.writeln('// keep_alive_call_postgen()') + for i, expected_type in node.expected_arg_types { + if expected_type.is_ptr() || expected_type.is_pointer() { + g.writeln('GC_reachable_here(__tmp_arg_${tmp_cnt_save + i});') + } + } +} + [inline] fn (mut g Gen) ref_or_deref_arg(arg ast.CallArg, expected_type ast.Type, lang ast.Language) { arg_is_ptr := expected_type.is_ptr() || expected_type.idx() in ast.pointer_type_idxs diff --git a/vlib/v/parser/fn.v b/vlib/v/parser/fn.v index 5e2883ffcd..aed417392a 100644 --- a/vlib/v/parser/fn.v +++ b/vlib/v/parser/fn.v @@ -179,6 +179,7 @@ fn (mut p Parser) fn_decl() ast.FnDecl { is_direct_arr := p.attrs.contains('direct_array_access') is_conditional, conditional_ctdefine := p.attrs.has_comptime_define() mut is_unsafe := p.attrs.contains('unsafe') + is_keep_alive := p.attrs.contains('keep_args_alive') is_pub := p.tok.kind == .key_pub if is_pub { p.next() @@ -193,6 +194,10 @@ fn (mut p Parser) fn_decl() ast.FnDecl { } else if p.tok.kind == .name && p.tok.lit == 'JS' { language = ast.Language.js } + if is_keep_alive && language != .c { + p.error_with_pos('attribute [keep_args_alive] is only supported for C functions', + p.tok.position()) + } if language != .v { p.next() p.check(.dot) @@ -340,6 +345,7 @@ fn (mut p Parser) fn_decl() ast.FnDecl { is_main: is_main is_test: is_test is_conditional: is_conditional + is_keep_alive: is_keep_alive ctdefine: conditional_ctdefine no_body: no_body mod: p.mod @@ -368,6 +374,7 @@ fn (mut p Parser) fn_decl() ast.FnDecl { is_main: is_main is_test: is_test is_conditional: is_conditional + is_keep_alive: is_keep_alive ctdefine: conditional_ctdefine no_body: no_body mod: p.mod @@ -410,6 +417,7 @@ fn (mut p Parser) fn_decl() ast.FnDecl { is_main: is_main is_test: is_test is_conditional: is_conditional + is_keep_alive: is_keep_alive receiver: ast.StructField{ name: rec.name typ: rec.typ diff --git a/vlib/v/tests/keep_args_alive_test.v b/vlib/v/tests/keep_args_alive_test.v new file mode 100644 index 0000000000..bd1b4ea84a --- /dev/null +++ b/vlib/v/tests/keep_args_alive_test.v @@ -0,0 +1,75 @@ +/* +* To verify the effect of "[keep_args_alive]", this attribute may be commented out. +* However it is not guaranteed that then this test will fail. +* To provoke a failure it seems to be best to use `gcc` with optimization: +* `gcc -gc boehm -cc gcc-9 -prod test keep_args_alive_test.v`. +* Without optimization, pointer variables may remain on the stack even if +* not used any more. +*/ +import rand +import sync + +#flag -I@VROOT/vlib/v/tests +#include "keep_args_alive_test_c.h" + +fn C.atomic_load_ptr(voidptr) voidptr + +fn C.atomic_store_ptr(voidptr, voidptr) + +[keep_args_alive] +fn C.calc_expr_after_delay(voidptr, int, voidptr) int + +fn set_vals() voidptr { + unsafe { + p := &int(malloc(8000000)) + q := &int(malloc(8000000)) + aa := p + 769345 + *aa = -4578 + bb := q + 572397 + *bb = 793254 + p = &int(0) + q = &int(0) + r := &voidptr(malloc(1000000)) + r[456] = aa + r[7932] = bb + aa = &int(0) + bb = &int(0) + return r + } +} + +fn tt(mut sem sync.Semaphore) int { + waste_mem(10000, mut sem) + r := &voidptr(set_vals()) + g := unsafe { C.calc_expr_after_delay(r[456], 12, r[7932]) } + return g +} + +fn waste_mem(n int, mut sem sync.Semaphore) { + mut m := []voidptr{len: 30} + for j := 0; n < 0 || j < n; j++ { + i := rand.intn(30) + m[i] = unsafe { malloc(10000) } + fill := rand.intn(256) + unsafe { C.memset(m[i], fill, 10000) } + if n < 0 && sem.try_wait() { + break + } + } +} + +fn test_keep_args_alive_attribute() { + mut sem := sync.new_semaphore() + $if gcboehm ? { + go waste_mem(-1, mut sem) + go waste_mem(-1, mut sem) + waste_mem(10000, mut sem) + } + r := &voidptr(set_vals()) + v := unsafe { C.calc_expr_after_delay(r[456], 12, r[7932]) } + $if gcboehm ? { + sem.post() + sem.post() + } + assert v == 738318 +} diff --git a/vlib/v/tests/keep_args_alive_test_c.h b/vlib/v/tests/keep_args_alive_test_c.h new file mode 100644 index 0000000000..ccf6082f7d --- /dev/null +++ b/vlib/v/tests/keep_args_alive_test_c.h @@ -0,0 +1,44 @@ +#include + +#if defined(_WIN32) +#define __SLEEP_MS(n) Sleep(n) +#elif defined(__APPLE__) +static void __sleep_ms(int ms) { + struct timespec ts = { + .tv_sec = ms / 1000, + .tv_nsec = 1000000L * (ms % 1000) + }; + struct timespec rem; + while (nanosleep(&ts, &rem) != 0) { + ts = rem; + } +} +#define __SLEEP_MS(n) __sleep_ms(n) +#else +static void __sleep_ms(int ms) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + ts.tv_nsec += 1000000L*((ms)%1000); + ts.tv_sec += ms/1000; + if (ts.tv_nsec >= 1000000000) { + ts.tv_nsec -= 1000000000; + ++ts.tv_sec; + } + while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, NULL) != 0); +} +#define __SLEEP_MS(n) __sleep_ms(n) +#endif + +volatile static int** keep; + +static int calc_expr_after_delay(int* a, int b, int* c) { + keep = malloc(1000000); + keep[43242] = a; + keep[86343] = c; + a = NULL; + c = NULL; + __SLEEP_MS(200); + int z = *keep[43242] * b + *keep[86343]; + free(keep); + return z; +}