builtin, cgen: implement chunked allocation of 16MB blocks with -prealloc (#10130)
parent
94e7a55b97
commit
2086e6f1c1
|
@ -217,6 +217,7 @@ pub fn println(s string) {
|
|||
}
|
||||
}
|
||||
|
||||
__global total_m = i64(0)
|
||||
// malloc dynamically allocates a `n` bytes block of memory on the heap.
|
||||
// malloc returns a `byteptr` pointing to the memory address of the allocated space.
|
||||
// unlike the `calloc` family of functions - malloc will not zero the memory block.
|
||||
|
@ -237,13 +238,8 @@ pub fn malloc(n int) &byte {
|
|||
}
|
||||
mut res := &byte(0)
|
||||
$if prealloc {
|
||||
res = g_m2_ptr
|
||||
unsafe {
|
||||
g_m2_ptr += n
|
||||
}
|
||||
nr_mallocs++
|
||||
} $else {
|
||||
$if gcboehm ? {
|
||||
res = unsafe { prealloc_malloc(n) }
|
||||
} $else $if gcboehm ? {
|
||||
unsafe {
|
||||
res = C.GC_MALLOC(n)
|
||||
}
|
||||
|
@ -261,7 +257,6 @@ pub fn malloc(n int) &byte {
|
|||
if res == 0 {
|
||||
panic('malloc($n) failed')
|
||||
}
|
||||
}
|
||||
$if debug_malloc ? {
|
||||
// Fill in the memory with something != 0, so it is easier to spot
|
||||
// when the calling code wrongly relies on it being zeroed.
|
||||
|
@ -310,12 +305,7 @@ pub fn v_realloc(b &byte, n int) &byte {
|
|||
[unsafe]
|
||||
pub fn realloc_data(old_data &byte, old_size int, new_size int) &byte {
|
||||
$if prealloc {
|
||||
unsafe {
|
||||
new_ptr := malloc(new_size)
|
||||
min_size := if old_size < new_size { old_size } else { new_size }
|
||||
C.memcpy(new_ptr, old_data, min_size)
|
||||
return new_ptr
|
||||
}
|
||||
return unsafe { prealloc_realloc(old_data, old_size, new_size) }
|
||||
}
|
||||
$if debug_realloc ? {
|
||||
// NB: this is slower, but helps debugging memory problems.
|
||||
|
@ -356,16 +346,22 @@ pub fn vcalloc(n int) &byte {
|
|||
} else if n == 0 {
|
||||
return &byte(0)
|
||||
}
|
||||
$if prealloc {
|
||||
return unsafe { prealloc_calloc(n) }
|
||||
}
|
||||
$if gcboehm ? {
|
||||
return &byte(C.GC_MALLOC(n))
|
||||
return unsafe { &byte(C.GC_MALLOC(n)) }
|
||||
} $else {
|
||||
return C.calloc(1, n)
|
||||
return unsafe { C.calloc(1, n) }
|
||||
}
|
||||
}
|
||||
|
||||
// special versions of the above that allocate memory which is not scanned
|
||||
// for pointers (but is collected) when the Boehm garbage collection is used
|
||||
pub fn vcalloc_noscan(n int) &byte {
|
||||
$if prealloc {
|
||||
return unsafe { prealloc_calloc(n) }
|
||||
}
|
||||
$if gcboehm ? {
|
||||
$if vplayground ? {
|
||||
if n > 10000 {
|
||||
|
@ -375,7 +371,7 @@ pub fn vcalloc_noscan(n int) &byte {
|
|||
if n < 0 {
|
||||
panic('calloc(<0)')
|
||||
}
|
||||
return &byte(unsafe { C.memset(C.GC_MALLOC_ATOMIC(n), 0, n) })
|
||||
return unsafe { &byte(C.memset(C.GC_MALLOC_ATOMIC(n), 0, n)) }
|
||||
} $else {
|
||||
return unsafe { vcalloc(n) }
|
||||
}
|
||||
|
@ -394,7 +390,7 @@ pub fn free(ptr voidptr) {
|
|||
//
|
||||
// The exception is doing leak detection for manual memory management:
|
||||
$if gcboehm_leak ? {
|
||||
C.GC_FREE(ptr)
|
||||
unsafe { C.GC_FREE(ptr) }
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -3,11 +3,6 @@
|
|||
// that can be found in the LICENSE file.
|
||||
module builtin
|
||||
|
||||
__global (
|
||||
g_m2_buf &byte
|
||||
g_m2_ptr &byte
|
||||
)
|
||||
|
||||
// isnil returns true if an object is nil (only for C objects).
|
||||
[inline]
|
||||
pub fn isnil(v voidptr) bool {
|
||||
|
@ -45,12 +40,8 @@ struct VCastTypeIndexName {
|
|||
tname string
|
||||
}
|
||||
|
||||
__global (
|
||||
total_m = i64(0)
|
||||
nr_mallocs = int(0)
|
||||
// will be filled in cgen
|
||||
as_cast_type_indexes []VCastTypeIndexName
|
||||
)
|
||||
__global as_cast_type_indexes []VCastTypeIndexName
|
||||
|
||||
fn __as_cast(obj voidptr, obj_type int, expected_type int) voidptr {
|
||||
if obj_type != expected_type {
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
module builtin
|
||||
|
||||
// With -prealloc, V calls libc's malloc to get chunks, each at least 16MB
|
||||
// in size, as needed. Once a chunk is available, all malloc() calls within
|
||||
// V code, that can fit inside the chunk, will use it instead, each bumping a
|
||||
// pointer, till the chunk is filled. Once a chunk is filled, a new chunk will
|
||||
// be allocated by calling libc's malloc, and the process continues.
|
||||
// Each new chunk has a pointer to the old one, and at the end of the program,
|
||||
// the entire linked list of chunks is freed.
|
||||
// The goal of all this is to amortize the cost of calling libc's malloc,
|
||||
// trading higher memory usage for a compiler (or any single threaded batch
|
||||
// mode program), for a ~8-10% speed increase.
|
||||
// NB: `-prealloc` is NOT safe to be used for multithreaded programs!
|
||||
|
||||
// size of the preallocated chunk
|
||||
const prealloc_block_size = 16 * 1024 * 1024
|
||||
|
||||
__global g_memory_block &VMemoryBlock
|
||||
[heap]
|
||||
struct VMemoryBlock {
|
||||
mut:
|
||||
id int
|
||||
cap int
|
||||
start &byte = 0
|
||||
previous &VMemoryBlock = 0
|
||||
remaining int
|
||||
current &byte = 0
|
||||
mallocs int
|
||||
}
|
||||
|
||||
[unsafe]
|
||||
fn vmemory_block_new(prev &VMemoryBlock, at_least int) &VMemoryBlock {
|
||||
mut v := unsafe { &VMemoryBlock(C.calloc(1, sizeof(VMemoryBlock))) }
|
||||
if prev != 0 {
|
||||
v.id = prev.id + 1
|
||||
}
|
||||
v.previous = prev
|
||||
block_size := if at_least < prealloc_block_size { prealloc_block_size } else { at_least }
|
||||
v.start = unsafe { C.malloc(block_size) }
|
||||
v.cap = block_size
|
||||
v.remaining = block_size
|
||||
v.current = v.start
|
||||
return v
|
||||
}
|
||||
|
||||
[unsafe]
|
||||
fn vmemory_block_malloc(n int) &byte {
|
||||
unsafe {
|
||||
if g_memory_block.remaining < n {
|
||||
g_memory_block = vmemory_block_new(g_memory_block, n)
|
||||
}
|
||||
mut res := &byte(0)
|
||||
res = g_memory_block.current
|
||||
g_memory_block.remaining -= n
|
||||
g_memory_block.mallocs++
|
||||
g_memory_block.current += n
|
||||
return res
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
|
||||
[unsafe]
|
||||
fn prealloc_vinit() {
|
||||
unsafe {
|
||||
g_memory_block = vmemory_block_new(voidptr(0), prealloc_block_size)
|
||||
$if !freestanding {
|
||||
C.atexit(prealloc_vcleanup)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[unsafe]
|
||||
fn prealloc_vcleanup() {
|
||||
$if prealloc_stats ? {
|
||||
// NB: we do 2 loops here, because string interpolation
|
||||
// in the first loop may still use g_memory_block
|
||||
// The second loop however should *not* allocate at all.
|
||||
mut nr_mallocs := i64(0)
|
||||
mut mb := g_memory_block
|
||||
for mb != 0 {
|
||||
nr_mallocs += mb.mallocs
|
||||
eprintln('> freeing mb.id: ${mb.id:3} | cap: ${mb.cap:7} | rem: ${mb.remaining:7} | start: ${voidptr(mb.start)} | current: ${voidptr(mb.current)} | diff: ${u64(mb.current) - u64(mb.start):7} bytes | mallocs: $mb.mallocs')
|
||||
mb = mb.previous
|
||||
}
|
||||
eprintln('> nr_mallocs: $nr_mallocs')
|
||||
}
|
||||
unsafe {
|
||||
for g_memory_block != 0 {
|
||||
C.free(g_memory_block.start)
|
||||
g_memory_block = g_memory_block.previous
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[unsafe]
|
||||
fn prealloc_malloc(n int) &byte {
|
||||
return unsafe { vmemory_block_malloc(n) }
|
||||
}
|
||||
|
||||
[unsafe]
|
||||
fn prealloc_realloc(old_data &byte, old_size int, new_size int) &byte {
|
||||
new_ptr := unsafe { vmemory_block_malloc(new_size) }
|
||||
min_size := if old_size < new_size { old_size } else { new_size }
|
||||
unsafe { C.memcpy(new_ptr, old_data, min_size) }
|
||||
return new_ptr
|
||||
}
|
||||
|
||||
[unsafe]
|
||||
fn prealloc_calloc(n int) &byte {
|
||||
new_ptr := unsafe { vmemory_block_malloc(n) }
|
||||
unsafe { C.memset(new_ptr, 0, n) }
|
||||
return new_ptr
|
||||
}
|
|
@ -5535,8 +5535,7 @@ fn (mut g Gen) write_init_function() {
|
|||
// g.writeln('g_str_buf = malloc( ${mb_size} * 1024 * 1000 );')
|
||||
}
|
||||
if g.pref.prealloc {
|
||||
g.writeln('g_m2_buf = malloc(150 * 1000 * 1000);')
|
||||
g.writeln('g_m2_ptr = g_m2_buf;')
|
||||
g.writeln('prealloc_vinit();')
|
||||
}
|
||||
// NB: the as_cast table should be *before* the other constant initialize calls,
|
||||
// because it may be needed during const initialization of builtin and during
|
||||
|
|
|
@ -66,11 +66,7 @@ void _STR_PRINT_ARG(const char *fmt, char** refbufp, int *nbytes, int *memsize,
|
|||
}
|
||||
// increase buffer (somewhat exponentially)
|
||||
*memsize += guess + 3 * (*memsize) / 2;
|
||||
#ifdef _VGCBOEHM
|
||||
*refbufp = (char*)GC_REALLOC((void*)*refbufp, *memsize);
|
||||
#else
|
||||
*refbufp = (char*)realloc((void*)*refbufp, *memsize);
|
||||
#endif
|
||||
*refbufp = (char*)v_realloc((void*)*refbufp, *memsize);
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
|
@ -79,11 +75,7 @@ string _STR(const char *fmt, int nfmts, ...) {
|
|||
va_list argptr;
|
||||
int memsize = 128;
|
||||
int nbytes = 0;
|
||||
#ifdef _VGCBOEHM
|
||||
char* buf = (char*)GC_MALLOC(memsize);
|
||||
#else
|
||||
char* buf = (char*)malloc(memsize);
|
||||
#endif
|
||||
char* buf = (char*)v_malloc(memsize);
|
||||
va_start(argptr, nfmts);
|
||||
for (int i=0; i<nfmts; ++i) {
|
||||
int k = strlen(fmt);
|
||||
|
|
|
@ -18,6 +18,8 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []ast.Fi
|
|||
'__new_array',
|
||||
'__new_array_with_default',
|
||||
'__new_array_with_array_default',
|
||||
'v_realloc' /* needed for _STR */,
|
||||
'v_malloc' /* needed for _STR */,
|
||||
'new_array_from_c_array',
|
||||
'v_fixed_index',
|
||||
'memdup',
|
||||
|
@ -162,6 +164,10 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []ast.Fi
|
|||
all_fn_root_names << k
|
||||
continue
|
||||
}
|
||||
if pref.prealloc && k.starts_with('prealloc_') {
|
||||
all_fn_root_names << k
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// handle assertions and testing framework callbacks:
|
||||
|
|
Loading…
Reference in New Issue