builtin, cgen: implement chunked allocation of 16MB blocks with -prealloc (#10130)
parent
94e7a55b97
commit
2086e6f1c1
|
@ -217,6 +217,7 @@ pub fn println(s string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global total_m = i64(0)
|
||||||
// malloc dynamically allocates a `n` bytes block of memory on the heap.
|
// malloc dynamically allocates a `n` bytes block of memory on the heap.
|
||||||
// malloc returns a `byteptr` pointing to the memory address of the allocated space.
|
// malloc returns a `byteptr` pointing to the memory address of the allocated space.
|
||||||
// unlike the `calloc` family of functions - malloc will not zero the memory block.
|
// unlike the `calloc` family of functions - malloc will not zero the memory block.
|
||||||
|
@ -237,13 +238,8 @@ pub fn malloc(n int) &byte {
|
||||||
}
|
}
|
||||||
mut res := &byte(0)
|
mut res := &byte(0)
|
||||||
$if prealloc {
|
$if prealloc {
|
||||||
res = g_m2_ptr
|
res = unsafe { prealloc_malloc(n) }
|
||||||
unsafe {
|
} $else $if gcboehm ? {
|
||||||
g_m2_ptr += n
|
|
||||||
}
|
|
||||||
nr_mallocs++
|
|
||||||
} $else {
|
|
||||||
$if gcboehm ? {
|
|
||||||
unsafe {
|
unsafe {
|
||||||
res = C.GC_MALLOC(n)
|
res = C.GC_MALLOC(n)
|
||||||
}
|
}
|
||||||
|
@ -261,7 +257,6 @@ pub fn malloc(n int) &byte {
|
||||||
if res == 0 {
|
if res == 0 {
|
||||||
panic('malloc($n) failed')
|
panic('malloc($n) failed')
|
||||||
}
|
}
|
||||||
}
|
|
||||||
$if debug_malloc ? {
|
$if debug_malloc ? {
|
||||||
// Fill in the memory with something != 0, so it is easier to spot
|
// Fill in the memory with something != 0, so it is easier to spot
|
||||||
// when the calling code wrongly relies on it being zeroed.
|
// when the calling code wrongly relies on it being zeroed.
|
||||||
|
@ -310,12 +305,7 @@ pub fn v_realloc(b &byte, n int) &byte {
|
||||||
[unsafe]
|
[unsafe]
|
||||||
pub fn realloc_data(old_data &byte, old_size int, new_size int) &byte {
|
pub fn realloc_data(old_data &byte, old_size int, new_size int) &byte {
|
||||||
$if prealloc {
|
$if prealloc {
|
||||||
unsafe {
|
return unsafe { prealloc_realloc(old_data, old_size, new_size) }
|
||||||
new_ptr := malloc(new_size)
|
|
||||||
min_size := if old_size < new_size { old_size } else { new_size }
|
|
||||||
C.memcpy(new_ptr, old_data, min_size)
|
|
||||||
return new_ptr
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
$if debug_realloc ? {
|
$if debug_realloc ? {
|
||||||
// NB: this is slower, but helps debugging memory problems.
|
// NB: this is slower, but helps debugging memory problems.
|
||||||
|
@ -356,16 +346,22 @@ pub fn vcalloc(n int) &byte {
|
||||||
} else if n == 0 {
|
} else if n == 0 {
|
||||||
return &byte(0)
|
return &byte(0)
|
||||||
}
|
}
|
||||||
|
$if prealloc {
|
||||||
|
return unsafe { prealloc_calloc(n) }
|
||||||
|
}
|
||||||
$if gcboehm ? {
|
$if gcboehm ? {
|
||||||
return &byte(C.GC_MALLOC(n))
|
return unsafe { &byte(C.GC_MALLOC(n)) }
|
||||||
} $else {
|
} $else {
|
||||||
return C.calloc(1, n)
|
return unsafe { C.calloc(1, n) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// special versions of the above that allocate memory which is not scanned
|
// special versions of the above that allocate memory which is not scanned
|
||||||
// for pointers (but is collected) when the Boehm garbage collection is used
|
// for pointers (but is collected) when the Boehm garbage collection is used
|
||||||
pub fn vcalloc_noscan(n int) &byte {
|
pub fn vcalloc_noscan(n int) &byte {
|
||||||
|
$if prealloc {
|
||||||
|
return unsafe { prealloc_calloc(n) }
|
||||||
|
}
|
||||||
$if gcboehm ? {
|
$if gcboehm ? {
|
||||||
$if vplayground ? {
|
$if vplayground ? {
|
||||||
if n > 10000 {
|
if n > 10000 {
|
||||||
|
@ -375,7 +371,7 @@ pub fn vcalloc_noscan(n int) &byte {
|
||||||
if n < 0 {
|
if n < 0 {
|
||||||
panic('calloc(<0)')
|
panic('calloc(<0)')
|
||||||
}
|
}
|
||||||
return &byte(unsafe { C.memset(C.GC_MALLOC_ATOMIC(n), 0, n) })
|
return unsafe { &byte(C.memset(C.GC_MALLOC_ATOMIC(n), 0, n)) }
|
||||||
} $else {
|
} $else {
|
||||||
return unsafe { vcalloc(n) }
|
return unsafe { vcalloc(n) }
|
||||||
}
|
}
|
||||||
|
@ -394,7 +390,7 @@ pub fn free(ptr voidptr) {
|
||||||
//
|
//
|
||||||
// The exception is doing leak detection for manual memory management:
|
// The exception is doing leak detection for manual memory management:
|
||||||
$if gcboehm_leak ? {
|
$if gcboehm_leak ? {
|
||||||
C.GC_FREE(ptr)
|
unsafe { C.GC_FREE(ptr) }
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,11 +3,6 @@
|
||||||
// that can be found in the LICENSE file.
|
// that can be found in the LICENSE file.
|
||||||
module builtin
|
module builtin
|
||||||
|
|
||||||
__global (
|
|
||||||
g_m2_buf &byte
|
|
||||||
g_m2_ptr &byte
|
|
||||||
)
|
|
||||||
|
|
||||||
// isnil returns true if an object is nil (only for C objects).
|
// isnil returns true if an object is nil (only for C objects).
|
||||||
[inline]
|
[inline]
|
||||||
pub fn isnil(v voidptr) bool {
|
pub fn isnil(v voidptr) bool {
|
||||||
|
@ -45,12 +40,8 @@ struct VCastTypeIndexName {
|
||||||
tname string
|
tname string
|
||||||
}
|
}
|
||||||
|
|
||||||
__global (
|
// will be filled in cgen
|
||||||
total_m = i64(0)
|
__global as_cast_type_indexes []VCastTypeIndexName
|
||||||
nr_mallocs = int(0)
|
|
||||||
// will be filled in cgen
|
|
||||||
as_cast_type_indexes []VCastTypeIndexName
|
|
||||||
)
|
|
||||||
|
|
||||||
fn __as_cast(obj voidptr, obj_type int, expected_type int) voidptr {
|
fn __as_cast(obj voidptr, obj_type int, expected_type int) voidptr {
|
||||||
if obj_type != expected_type {
|
if obj_type != expected_type {
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
module builtin
|
||||||
|
|
||||||
|
// With -prealloc, V calls libc's malloc to get chunks, each at least 16MB
|
||||||
|
// in size, as needed. Once a chunk is available, all malloc() calls within
|
||||||
|
// V code, that can fit inside the chunk, will use it instead, each bumping a
|
||||||
|
// pointer, till the chunk is filled. Once a chunk is filled, a new chunk will
|
||||||
|
// be allocated by calling libc's malloc, and the process continues.
|
||||||
|
// Each new chunk has a pointer to the old one, and at the end of the program,
|
||||||
|
// the entire linked list of chunks is freed.
|
||||||
|
// The goal of all this is to amortize the cost of calling libc's malloc,
|
||||||
|
// trading higher memory usage for a compiler (or any single threaded batch
|
||||||
|
// mode program), for a ~8-10% speed increase.
|
||||||
|
// NB: `-prealloc` is NOT safe to be used for multithreaded programs!
|
||||||
|
|
||||||
|
// size of the preallocated chunk
|
||||||
|
const prealloc_block_size = 16 * 1024 * 1024
|
||||||
|
|
||||||
|
__global g_memory_block &VMemoryBlock
|
||||||
|
[heap]
|
||||||
|
struct VMemoryBlock {
|
||||||
|
mut:
|
||||||
|
id int
|
||||||
|
cap int
|
||||||
|
start &byte = 0
|
||||||
|
previous &VMemoryBlock = 0
|
||||||
|
remaining int
|
||||||
|
current &byte = 0
|
||||||
|
mallocs int
|
||||||
|
}
|
||||||
|
|
||||||
|
[unsafe]
|
||||||
|
fn vmemory_block_new(prev &VMemoryBlock, at_least int) &VMemoryBlock {
|
||||||
|
mut v := unsafe { &VMemoryBlock(C.calloc(1, sizeof(VMemoryBlock))) }
|
||||||
|
if prev != 0 {
|
||||||
|
v.id = prev.id + 1
|
||||||
|
}
|
||||||
|
v.previous = prev
|
||||||
|
block_size := if at_least < prealloc_block_size { prealloc_block_size } else { at_least }
|
||||||
|
v.start = unsafe { C.malloc(block_size) }
|
||||||
|
v.cap = block_size
|
||||||
|
v.remaining = block_size
|
||||||
|
v.current = v.start
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
[unsafe]
|
||||||
|
fn vmemory_block_malloc(n int) &byte {
|
||||||
|
unsafe {
|
||||||
|
if g_memory_block.remaining < n {
|
||||||
|
g_memory_block = vmemory_block_new(g_memory_block, n)
|
||||||
|
}
|
||||||
|
mut res := &byte(0)
|
||||||
|
res = g_memory_block.current
|
||||||
|
g_memory_block.remaining -= n
|
||||||
|
g_memory_block.mallocs++
|
||||||
|
g_memory_block.current += n
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////
|
||||||
|
|
||||||
|
[unsafe]
|
||||||
|
fn prealloc_vinit() {
|
||||||
|
unsafe {
|
||||||
|
g_memory_block = vmemory_block_new(voidptr(0), prealloc_block_size)
|
||||||
|
$if !freestanding {
|
||||||
|
C.atexit(prealloc_vcleanup)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[unsafe]
|
||||||
|
fn prealloc_vcleanup() {
|
||||||
|
$if prealloc_stats ? {
|
||||||
|
// NB: we do 2 loops here, because string interpolation
|
||||||
|
// in the first loop may still use g_memory_block
|
||||||
|
// The second loop however should *not* allocate at all.
|
||||||
|
mut nr_mallocs := i64(0)
|
||||||
|
mut mb := g_memory_block
|
||||||
|
for mb != 0 {
|
||||||
|
nr_mallocs += mb.mallocs
|
||||||
|
eprintln('> freeing mb.id: ${mb.id:3} | cap: ${mb.cap:7} | rem: ${mb.remaining:7} | start: ${voidptr(mb.start)} | current: ${voidptr(mb.current)} | diff: ${u64(mb.current) - u64(mb.start):7} bytes | mallocs: $mb.mallocs')
|
||||||
|
mb = mb.previous
|
||||||
|
}
|
||||||
|
eprintln('> nr_mallocs: $nr_mallocs')
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
for g_memory_block != 0 {
|
||||||
|
C.free(g_memory_block.start)
|
||||||
|
g_memory_block = g_memory_block.previous
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[unsafe]
|
||||||
|
fn prealloc_malloc(n int) &byte {
|
||||||
|
return unsafe { vmemory_block_malloc(n) }
|
||||||
|
}
|
||||||
|
|
||||||
|
[unsafe]
|
||||||
|
fn prealloc_realloc(old_data &byte, old_size int, new_size int) &byte {
|
||||||
|
new_ptr := unsafe { vmemory_block_malloc(new_size) }
|
||||||
|
min_size := if old_size < new_size { old_size } else { new_size }
|
||||||
|
unsafe { C.memcpy(new_ptr, old_data, min_size) }
|
||||||
|
return new_ptr
|
||||||
|
}
|
||||||
|
|
||||||
|
[unsafe]
|
||||||
|
fn prealloc_calloc(n int) &byte {
|
||||||
|
new_ptr := unsafe { vmemory_block_malloc(n) }
|
||||||
|
unsafe { C.memset(new_ptr, 0, n) }
|
||||||
|
return new_ptr
|
||||||
|
}
|
|
@ -5535,8 +5535,7 @@ fn (mut g Gen) write_init_function() {
|
||||||
// g.writeln('g_str_buf = malloc( ${mb_size} * 1024 * 1000 );')
|
// g.writeln('g_str_buf = malloc( ${mb_size} * 1024 * 1000 );')
|
||||||
}
|
}
|
||||||
if g.pref.prealloc {
|
if g.pref.prealloc {
|
||||||
g.writeln('g_m2_buf = malloc(150 * 1000 * 1000);')
|
g.writeln('prealloc_vinit();')
|
||||||
g.writeln('g_m2_ptr = g_m2_buf;')
|
|
||||||
}
|
}
|
||||||
// NB: the as_cast table should be *before* the other constant initialize calls,
|
// NB: the as_cast table should be *before* the other constant initialize calls,
|
||||||
// because it may be needed during const initialization of builtin and during
|
// because it may be needed during const initialization of builtin and during
|
||||||
|
|
|
@ -66,11 +66,7 @@ void _STR_PRINT_ARG(const char *fmt, char** refbufp, int *nbytes, int *memsize,
|
||||||
}
|
}
|
||||||
// increase buffer (somewhat exponentially)
|
// increase buffer (somewhat exponentially)
|
||||||
*memsize += guess + 3 * (*memsize) / 2;
|
*memsize += guess + 3 * (*memsize) / 2;
|
||||||
#ifdef _VGCBOEHM
|
*refbufp = (char*)v_realloc((void*)*refbufp, *memsize);
|
||||||
*refbufp = (char*)GC_REALLOC((void*)*refbufp, *memsize);
|
|
||||||
#else
|
|
||||||
*refbufp = (char*)realloc((void*)*refbufp, *memsize);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
}
|
||||||
|
@ -79,11 +75,7 @@ string _STR(const char *fmt, int nfmts, ...) {
|
||||||
va_list argptr;
|
va_list argptr;
|
||||||
int memsize = 128;
|
int memsize = 128;
|
||||||
int nbytes = 0;
|
int nbytes = 0;
|
||||||
#ifdef _VGCBOEHM
|
char* buf = (char*)v_malloc(memsize);
|
||||||
char* buf = (char*)GC_MALLOC(memsize);
|
|
||||||
#else
|
|
||||||
char* buf = (char*)malloc(memsize);
|
|
||||||
#endif
|
|
||||||
va_start(argptr, nfmts);
|
va_start(argptr, nfmts);
|
||||||
for (int i=0; i<nfmts; ++i) {
|
for (int i=0; i<nfmts; ++i) {
|
||||||
int k = strlen(fmt);
|
int k = strlen(fmt);
|
||||||
|
|
|
@ -18,6 +18,8 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []ast.Fi
|
||||||
'__new_array',
|
'__new_array',
|
||||||
'__new_array_with_default',
|
'__new_array_with_default',
|
||||||
'__new_array_with_array_default',
|
'__new_array_with_array_default',
|
||||||
|
'v_realloc' /* needed for _STR */,
|
||||||
|
'v_malloc' /* needed for _STR */,
|
||||||
'new_array_from_c_array',
|
'new_array_from_c_array',
|
||||||
'v_fixed_index',
|
'v_fixed_index',
|
||||||
'memdup',
|
'memdup',
|
||||||
|
@ -162,6 +164,10 @@ pub fn mark_used(mut table ast.Table, pref &pref.Preferences, ast_files []ast.Fi
|
||||||
all_fn_root_names << k
|
all_fn_root_names << k
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if pref.prealloc && k.starts_with('prealloc_') {
|
||||||
|
all_fn_root_names << k
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle assertions and testing framework callbacks:
|
// handle assertions and testing framework callbacks:
|
||||||
|
|
Loading…
Reference in New Issue