From 2086e6f1c1e349919f63ef58fd25544397a6e7e3 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Wed, 19 May 2021 10:35:56 +0300 Subject: [PATCH] builtin, cgen: implement chunked allocation of 16MB blocks with -prealloc (#10130) --- vlib/builtin/builtin.c.v | 58 +++++++++---------- vlib/builtin/builtin.v | 13 +---- vlib/builtin/prealloc.c.v | 114 +++++++++++++++++++++++++++++++++++++ vlib/v/gen/c/cgen.v | 3 +- vlib/v/gen/c/cheaders.v | 12 +--- vlib/v/markused/markused.v | 6 ++ 6 files changed, 152 insertions(+), 54 deletions(-) create mode 100644 vlib/builtin/prealloc.c.v diff --git a/vlib/builtin/builtin.c.v b/vlib/builtin/builtin.c.v index 63ceba1de7..047dd2cd05 100644 --- a/vlib/builtin/builtin.c.v +++ b/vlib/builtin/builtin.c.v @@ -217,6 +217,7 @@ pub fn println(s string) { } } +__global total_m = i64(0) // malloc dynamically allocates a `n` bytes block of memory on the heap. // malloc returns a `byteptr` pointing to the memory address of the allocated space. // unlike the `calloc` family of functions - malloc will not zero the memory block. @@ -237,30 +238,24 @@ pub fn malloc(n int) &byte { } mut res := &byte(0) $if prealloc { - res = g_m2_ptr + res = unsafe { prealloc_malloc(n) } + } $else $if gcboehm ? { unsafe { - g_m2_ptr += n + res = C.GC_MALLOC(n) + } + } $else $if freestanding { + mut e := Errno{} + res, e = mm_alloc(u64(n)) + if e != .enoerror { + eprint('malloc() failed: ') + eprintln(e.str()) + panic('malloc() failed') } - nr_mallocs++ } $else { - $if gcboehm ? { - unsafe { - res = C.GC_MALLOC(n) - } - } $else $if freestanding { - mut e := Errno{} - res, e = mm_alloc(u64(n)) - if e != .enoerror { - eprint('malloc() failed: ') - eprintln(e.str()) - panic('malloc() failed') - } - } $else { - res = unsafe { C.malloc(n) } - } - if res == 0 { - panic('malloc($n) failed') - } + res = unsafe { C.malloc(n) } + } + if res == 0 { + panic('malloc($n) failed') } $if debug_malloc ? { // Fill in the memory with something != 0, so it is easier to spot @@ -310,12 +305,7 @@ pub fn v_realloc(b &byte, n int) &byte { [unsafe] pub fn realloc_data(old_data &byte, old_size int, new_size int) &byte { $if prealloc { - unsafe { - new_ptr := malloc(new_size) - min_size := if old_size < new_size { old_size } else { new_size } - C.memcpy(new_ptr, old_data, min_size) - return new_ptr - } + return unsafe { prealloc_realloc(old_data, old_size, new_size) } } $if debug_realloc ? { // NB: this is slower, but helps debugging memory problems. @@ -356,16 +346,22 @@ pub fn vcalloc(n int) &byte { } else if n == 0 { return &byte(0) } + $if prealloc { + return unsafe { prealloc_calloc(n) } + } $if gcboehm ? { - return &byte(C.GC_MALLOC(n)) + return unsafe { &byte(C.GC_MALLOC(n)) } } $else { - return C.calloc(1, n) + return unsafe { C.calloc(1, n) } } } // special versions of the above that allocate memory which is not scanned // for pointers (but is collected) when the Boehm garbage collection is used pub fn vcalloc_noscan(n int) &byte { + $if prealloc { + return unsafe { prealloc_calloc(n) } + } $if gcboehm ? { $if vplayground ? { if n > 10000 { @@ -375,7 +371,7 @@ pub fn vcalloc_noscan(n int) &byte { if n < 0 { panic('calloc(<0)') } - return &byte(unsafe { C.memset(C.GC_MALLOC_ATOMIC(n), 0, n) }) + return unsafe { &byte(C.memset(C.GC_MALLOC_ATOMIC(n), 0, n)) } } $else { return unsafe { vcalloc(n) } } @@ -394,7 +390,7 @@ pub fn free(ptr voidptr) { // // The exception is doing leak detection for manual memory management: $if gcboehm_leak ? { - C.GC_FREE(ptr) + unsafe { C.GC_FREE(ptr) } } return } diff --git a/vlib/builtin/builtin.v b/vlib/builtin/builtin.v index 21e68ec9bf..66e8dbd0bb 100644 --- a/vlib/builtin/builtin.v +++ b/vlib/builtin/builtin.v @@ -3,11 +3,6 @@ // that can be found in the LICENSE file. module builtin -__global ( - g_m2_buf &byte - g_m2_ptr &byte -) - // isnil returns true if an object is nil (only for C objects). [inline] pub fn isnil(v voidptr) bool { @@ -45,12 +40,8 @@ struct VCastTypeIndexName { tname string } -__global ( - total_m = i64(0) - nr_mallocs = int(0) - // will be filled in cgen - as_cast_type_indexes []VCastTypeIndexName -) +// will be filled in cgen +__global as_cast_type_indexes []VCastTypeIndexName fn __as_cast(obj voidptr, obj_type int, expected_type int) voidptr { if obj_type != expected_type { diff --git a/vlib/builtin/prealloc.c.v b/vlib/builtin/prealloc.c.v new file mode 100644 index 0000000000..0ef66fc650 --- /dev/null +++ b/vlib/builtin/prealloc.c.v @@ -0,0 +1,114 @@ +module builtin + +// With -prealloc, V calls libc's malloc to get chunks, each at least 16MB +// in size, as needed. Once a chunk is available, all malloc() calls within +// V code, that can fit inside the chunk, will use it instead, each bumping a +// pointer, till the chunk is filled. Once a chunk is filled, a new chunk will +// be allocated by calling libc's malloc, and the process continues. +// Each new chunk has a pointer to the old one, and at the end of the program, +// the entire linked list of chunks is freed. +// The goal of all this is to amortize the cost of calling libc's malloc, +// trading higher memory usage for a compiler (or any single threaded batch +// mode program), for a ~8-10% speed increase. +// NB: `-prealloc` is NOT safe to be used for multithreaded programs! + +// size of the preallocated chunk +const prealloc_block_size = 16 * 1024 * 1024 + +__global g_memory_block &VMemoryBlock +[heap] +struct VMemoryBlock { +mut: + id int + cap int + start &byte = 0 + previous &VMemoryBlock = 0 + remaining int + current &byte = 0 + mallocs int +} + +[unsafe] +fn vmemory_block_new(prev &VMemoryBlock, at_least int) &VMemoryBlock { + mut v := unsafe { &VMemoryBlock(C.calloc(1, sizeof(VMemoryBlock))) } + if prev != 0 { + v.id = prev.id + 1 + } + v.previous = prev + block_size := if at_least < prealloc_block_size { prealloc_block_size } else { at_least } + v.start = unsafe { C.malloc(block_size) } + v.cap = block_size + v.remaining = block_size + v.current = v.start + return v +} + +[unsafe] +fn vmemory_block_malloc(n int) &byte { + unsafe { + if g_memory_block.remaining < n { + g_memory_block = vmemory_block_new(g_memory_block, n) + } + mut res := &byte(0) + res = g_memory_block.current + g_memory_block.remaining -= n + g_memory_block.mallocs++ + g_memory_block.current += n + return res + } +} + +///////////////////////////////////////////////// + +[unsafe] +fn prealloc_vinit() { + unsafe { + g_memory_block = vmemory_block_new(voidptr(0), prealloc_block_size) + $if !freestanding { + C.atexit(prealloc_vcleanup) + } + } +} + +[unsafe] +fn prealloc_vcleanup() { + $if prealloc_stats ? { + // NB: we do 2 loops here, because string interpolation + // in the first loop may still use g_memory_block + // The second loop however should *not* allocate at all. + mut nr_mallocs := i64(0) + mut mb := g_memory_block + for mb != 0 { + nr_mallocs += mb.mallocs + eprintln('> freeing mb.id: ${mb.id:3} | cap: ${mb.cap:7} | rem: ${mb.remaining:7} | start: ${voidptr(mb.start)} | current: ${voidptr(mb.current)} | diff: ${u64(mb.current) - u64(mb.start):7} bytes | mallocs: $mb.mallocs') + mb = mb.previous + } + eprintln('> nr_mallocs: $nr_mallocs') + } + unsafe { + for g_memory_block != 0 { + C.free(g_memory_block.start) + g_memory_block = g_memory_block.previous + } + } +} + +[unsafe] +fn prealloc_malloc(n int) &byte { + return unsafe { vmemory_block_malloc(n) } +} + +[unsafe] +fn prealloc_realloc(old_data &byte, old_size int, new_size int) &byte { + new_ptr := unsafe { vmemory_block_malloc(new_size) } + min_size := if old_size < new_size { old_size } else { new_size } + unsafe { C.memcpy(new_ptr, old_data, min_size) } + return new_ptr +} + +[unsafe] +fn prealloc_calloc(n int) &byte { + new_ptr := unsafe { vmemory_block_malloc(n) } + unsafe { C.memset(new_ptr, 0, n) } + return new_ptr +} diff --git a/vlib/v/gen/c/cgen.v b/vlib/v/gen/c/cgen.v index 632b95f097..a72dde4d44 100644 --- a/vlib/v/gen/c/cgen.v +++ b/vlib/v/gen/c/cgen.v @@ -5535,8 +5535,7 @@ fn (mut g Gen) write_init_function() { // g.writeln('g_str_buf = malloc( ${mb_size} * 1024 * 1000 );') } if g.pref.prealloc { - g.writeln('g_m2_buf = malloc(150 * 1000 * 1000);') - g.writeln('g_m2_ptr = g_m2_buf;') + g.writeln('prealloc_vinit();') } // NB: the as_cast table should be *before* the other constant initialize calls, // because it may be needed during const initialization of builtin and during diff --git a/vlib/v/gen/c/cheaders.v b/vlib/v/gen/c/cheaders.v index e5592a1c0c..7a1a1271d7 100644 --- a/vlib/v/gen/c/cheaders.v +++ b/vlib/v/gen/c/cheaders.v @@ -66,11 +66,7 @@ void _STR_PRINT_ARG(const char *fmt, char** refbufp, int *nbytes, int *memsize, } // increase buffer (somewhat exponentially) *memsize += guess + 3 * (*memsize) / 2; -#ifdef _VGCBOEHM - *refbufp = (char*)GC_REALLOC((void*)*refbufp, *memsize); -#else - *refbufp = (char*)realloc((void*)*refbufp, *memsize); -#endif + *refbufp = (char*)v_realloc((void*)*refbufp, *memsize); } va_end(args); } @@ -79,11 +75,7 @@ string _STR(const char *fmt, int nfmts, ...) { va_list argptr; int memsize = 128; int nbytes = 0; -#ifdef _VGCBOEHM - char* buf = (char*)GC_MALLOC(memsize); -#else - char* buf = (char*)malloc(memsize); -#endif + char* buf = (char*)v_malloc(memsize); va_start(argptr, nfmts); for (int i=0; i