builtin, cgen: implement chunked allocation of 16MB blocks with -prealloc (#10130)

pull/10135/head
Delyan Angelov 2021-05-19 10:35:56 +03:00 committed by GitHub
parent 94e7a55b97
commit 2086e6f1c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 152 additions and 54 deletions

View File

@ -217,6 +217,7 @@ pub fn println(s string) {
}
}
__global total_m = i64(0)
// malloc dynamically allocates a `n` bytes block of memory on the heap.
// malloc returns a `byteptr` pointing to the memory address of the allocated space.
// unlike the `calloc` family of functions - malloc will not zero the memory block.
@ -237,13 +238,8 @@ pub fn malloc(n int) &byte {
}
mut res := &byte(0)
$if prealloc {
res = g_m2_ptr
unsafe {
g_m2_ptr += n
}
nr_mallocs++
} $else {
$if gcboehm ? {
res = unsafe { prealloc_malloc(n) }
} $else $if gcboehm ? {
unsafe {
res = C.GC_MALLOC(n)
}
@ -261,7 +257,6 @@ pub fn malloc(n int) &byte {
if res == 0 {
panic('malloc($n) failed')
}
}
$if debug_malloc ? {
// Fill in the memory with something != 0, so it is easier to spot
// when the calling code wrongly relies on it being zeroed.
@ -310,12 +305,7 @@ pub fn v_realloc(b &byte, n int) &byte {
[unsafe]
pub fn realloc_data(old_data &byte, old_size int, new_size int) &byte {
$if prealloc {
unsafe {
new_ptr := malloc(new_size)
min_size := if old_size < new_size { old_size } else { new_size }
C.memcpy(new_ptr, old_data, min_size)
return new_ptr
}
return unsafe { prealloc_realloc(old_data, old_size, new_size) }
}
$if debug_realloc ? {
// NB: this is slower, but helps debugging memory problems.
@ -356,16 +346,22 @@ pub fn vcalloc(n int) &byte {
} else if n == 0 {
return &byte(0)
}
$if prealloc {
return unsafe { prealloc_calloc(n) }
}
$if gcboehm ? {
return &byte(C.GC_MALLOC(n))
return unsafe { &byte(C.GC_MALLOC(n)) }
} $else {
return C.calloc(1, n)
return unsafe { C.calloc(1, n) }
}
}
// special versions of the above that allocate memory which is not scanned
// for pointers (but is collected) when the Boehm garbage collection is used
pub fn vcalloc_noscan(n int) &byte {
$if prealloc {
return unsafe { prealloc_calloc(n) }
}
$if gcboehm ? {
$if vplayground ? {
if n > 10000 {
@ -375,7 +371,7 @@ pub fn vcalloc_noscan(n int) &byte {
if n < 0 {
panic('calloc(<0)')
}
return &byte(unsafe { C.memset(C.GC_MALLOC_ATOMIC(n), 0, n) })
return unsafe { &byte(C.memset(C.GC_MALLOC_ATOMIC(n), 0, n)) }
} $else {
return unsafe { vcalloc(n) }
}
@ -394,7 +390,7 @@ pub fn free(ptr voidptr) {
//
// The exception is doing leak detection for manual memory management:
$if gcboehm_leak ? {
C.GC_FREE(ptr)
unsafe { C.GC_FREE(ptr) }
}
return
}

View File

@ -3,11 +3,6 @@
// that can be found in the LICENSE file.
module builtin
__global (
g_m2_buf &byte
g_m2_ptr &byte
)
// isnil returns true if an object is nil (only for C objects).
[inline]
pub fn isnil(v voidptr) bool {
@ -45,12 +40,8 @@ struct VCastTypeIndexName {
tname string
}
__global (
total_m = i64(0)
nr_mallocs = int(0)
// will be filled in cgen
as_cast_type_indexes []VCastTypeIndexName
)
__global as_cast_type_indexes []VCastTypeIndexName
fn __as_cast(obj voidptr, obj_type int, expected_type int) voidptr {
if obj_type != expected_type {

View File

@ -0,0 +1,114 @@
module builtin
// With -prealloc, V calls libc's malloc to get chunks, each at least 16MB
// in size, as needed. Once a chunk is available, all malloc() calls within
// V code, that can fit inside the chunk, will use it instead, each bumping a
// pointer, till the chunk is filled. Once a chunk is filled, a new chunk will
// be allocated by calling libc's malloc, and the process continues.
// Each new chunk has a pointer to the old one, and at the end of the program,
// the entire linked list of chunks is freed.
// The goal of all this is to amortize the cost of calling libc's malloc,
// trading higher memory usage for a compiler (or any single threaded batch
// mode program), for a ~8-10% speed increase.
// NB: `-prealloc` is NOT safe to be used for multithreaded programs!
// size of the preallocated chunk
const prealloc_block_size = 16 * 1024 * 1024
__global g_memory_block &VMemoryBlock
[heap]
struct VMemoryBlock {
mut:
id int
cap int
start &byte = 0
previous &VMemoryBlock = 0
remaining int
current &byte = 0
mallocs int
}
[unsafe]
fn vmemory_block_new(prev &VMemoryBlock, at_least int) &VMemoryBlock {
mut v := unsafe { &VMemoryBlock(C.calloc(1, sizeof(VMemoryBlock))) }
if prev != 0 {
v.id = prev.id + 1
}
v.previous = prev
block_size := if at_least < prealloc_block_size { prealloc_block_size } else { at_least }
v.start = unsafe { C.malloc(block_size) }
v.cap = block_size
v.remaining = block_size
v.current = v.start
return v
}
[unsafe]
fn vmemory_block_malloc(n int) &byte {
unsafe {
if g_memory_block.remaining < n {
g_memory_block = vmemory_block_new(g_memory_block, n)
}
mut res := &byte(0)
res = g_memory_block.current
g_memory_block.remaining -= n
g_memory_block.mallocs++
g_memory_block.current += n
return res
}
}
/////////////////////////////////////////////////
[unsafe]
fn prealloc_vinit() {
unsafe {
g_memory_block = vmemory_block_new(voidptr(0), prealloc_block_size)
$if !freestanding {
C.atexit(prealloc_vcleanup)
}
}
}
[unsafe]
fn prealloc_vcleanup() {
$if prealloc_stats ? {
// NB: we do 2 loops here, because string interpolation
// in the first loop may still use g_memory_block
// The second loop however should *not* allocate at all.
mut nr_mallocs := i64(0)
mut mb := g_memory_block
for mb != 0 {
nr_mallocs += mb.mallocs
eprintln('> freeing mb.id: ${mb.id:3} | cap: ${mb.cap:7} | rem: ${mb.remaining:7} | start: ${voidptr(mb.start)} | current: ${voidptr(mb.current)} | diff: ${u64(mb.current) - u64(mb.start):7} bytes | mallocs: $mb.mallocs')
mb = mb.previous
}
eprintln('> nr_mallocs: $nr_mallocs')
}
unsafe {
for g_memory_block != 0 {
C.free(g_memory_block.start)
g_memory_block = g_memory_block.previous
}
}
}
[unsafe]
fn prealloc_malloc(n int) &byte {
return unsafe { vmemory_block_malloc(n) }
}
[unsafe]
fn prealloc_realloc(old_data &byte, old_size int, new_size int) &byte {
new_ptr := unsafe { vmemory_block_malloc(new_size) }
min_size := if old_size < new_size { old_size } else { new_size }
unsafe { C.memcpy(new_ptr, old_data, min_size) }
return new_ptr
}
[unsafe]
fn prealloc_calloc(n int) &byte {
new_ptr := unsafe { vmemory_block_malloc(n) }
unsafe { C.memset(new_ptr, 0, n) }
return new_ptr
}

View File

@ -5535,8 +5535,7 @@ fn (mut g Gen) write_init_function() {
// g.writeln('g_str_buf = malloc( ${mb_size} * 1024 * 1000 );')
}
if g.pref.prealloc {
g.writeln('g_m2_buf = malloc(150 * 1000 * 1000);')
g.writeln('g_m2_ptr = g_m2_buf;')
g.writeln('prealloc_vinit();')
}
// NB: the as_cast table should be *before* the other constant initialize calls,
// because it may be needed during const initialization of builtin and during

View File

@ -66,11 +66,7 @@ void _STR_PRINT_ARG(const char *fmt, char** refbufp, int *nbytes, int *memsize,
}
// increase buffer (somewhat exponentially)
*memsize += guess + 3 * (*memsize) / 2;
#ifdef _VGCBOEHM
*refbufp = (char*)GC_REALLOC((void*)*refbufp, *memsize);
#else
*refbufp = (char*)realloc((void*)*refbufp, *memsize);
#endif
*refbufp = (char*)v_realloc((void*)*refbufp, *memsize);
}
va_end(args);
}
@ -79,11 +75,7 @@ string _STR(const char *fmt, int nfmts, ...) {
va_list argptr;
int memsize = 128;
int nbytes = 0;
#ifdef _VGCBOEHM
char* buf = (char*)GC_MALLOC(memsize);
#else
char* buf = (char*)malloc(memsize);
#endif
char* buf = (char*)v_malloc(memsize);
va_start(argptr, nfmts);
for (int i=0; i<nfmts; ++i) {
int k = strlen(fmt);

View File

@ -18,6 +18,8 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []ast.Fi
'__new_array',
'__new_array_with_default',
'__new_array_with_array_default',
'v_realloc' /* needed for _STR */,
'v_malloc' /* needed for _STR */,
'new_array_from_c_array',
'v_fixed_index',
'memdup',
@ -162,6 +164,10 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []ast.Fi
all_fn_root_names << k
continue
}
if pref.prealloc && k.starts_with('prealloc_') {
all_fn_root_names << k
continue
}
}
// handle assertions and testing framework callbacks: