From 70a0aab72b7dc797351ddb70b04b6dd99aa8b487 Mon Sep 17 00:00:00 2001 From: playX Date: Wed, 5 Jan 2022 19:17:47 +0300 Subject: [PATCH] vlib: add a dlmalloc module (#12974) --- vlib/dlmalloc/dlmalloc.v | 1491 ++++++++++++++++++++++++ vlib/dlmalloc/dlmalloc_sys_nix.c.v | 116 ++ vlib/dlmalloc/dlmalloc_sys_windows.c.v | 42 + vlib/dlmalloc/global.v | 63 + 4 files changed, 1712 insertions(+) create mode 100644 vlib/dlmalloc/dlmalloc.v create mode 100644 vlib/dlmalloc/dlmalloc_sys_nix.c.v create mode 100644 vlib/dlmalloc/dlmalloc_sys_windows.c.v create mode 100644 vlib/dlmalloc/global.v diff --git a/vlib/dlmalloc/dlmalloc.v b/vlib/dlmalloc/dlmalloc.v new file mode 100644 index 0000000000..5d038e6cf8 --- /dev/null +++ b/vlib/dlmalloc/dlmalloc.v @@ -0,0 +1,1491 @@ +// This is a version of dlmalloc.c ported to V. You can find the original +// source at ftp://g.oswego.edu/pub/misc/malloc.c +// +// The original source was written by Doug Lea and released to the public domain +// +// +// # Why dlmalloc? +// +// This library does not rely on C code. The primary purpose is for use in freestanding +// build mode and for WASM target. +// +// dlmalloc is not the most performant allocator. It's main purpose is to be +// easily portable and easy to learn. Here we have straight port of C and dlmalloc-rs +// versions of dlmalloc. +module dlmalloc + +import math.bits + +pub const ( + n_small_bins = 32 + n_tree_bins = 32 + small_bin_shift = 3 + tree_bin_shift = 8 + default_granularity = 64 * 1024 + default_trim_threshold = 2 * 1024 * 1024 + max_release_check_rate = 4095 + malloc_alignment = sizeof(usize) * 2 + chunk_overhead = sizeof(usize) + mmap_chnk_overhead = 2 * sizeof(usize) + min_large_size = 1 << tree_bin_shift + max_small_size = min_large_size - 1 + max_small_request = max_small_size - (malloc_alignment - 1) - chunk_overhead + min_chunk_size = align_up(sizeof(Chunk), malloc_alignment) + chunk_mem_offset = 2 * sizeof(usize) + min_request = min_chunk_size - chunk_overhead - 1 + top_foot_size = align_offset_usize(chunk_mem_offset) + pad_request(sizeof(Segment)) + + min_chunk_size + max_request = calc_max_request() + mmap_foot_pad = 4 * sizeof(usize) +) + +fn usize_leading_zeros(x usize) usize { + if sizeof(usize) == 8 { + return usize(bits.leading_zeros_64(u64(x))) + } else { + return usize(bits.leading_zeros_32(u32(x))) + } +} + +fn min_sys_alloc_space() usize { + return ((~0 - (dlmalloc.default_granularity + dlmalloc.top_foot_size + + dlmalloc.malloc_alignment) + 1) & ~dlmalloc.malloc_alignment) - dlmalloc.chunk_overhead + 1 +} + +fn calc_max_request() usize { + x := min_sys_alloc_space() + y := (~dlmalloc.min_chunk_size + 1) << 2 + if x < y { + return x + } else { + return y + } +} + +fn pad_request(amt usize) usize { + return align_up(amt + dlmalloc.chunk_overhead, dlmalloc.malloc_alignment) +} + +fn align_offset_usize(addr usize) usize { + return align_up(addr, dlmalloc.malloc_alignment) - addr +} + +fn is_aligned(a usize) bool { + return a & (dlmalloc.malloc_alignment - 1) == 0 +} + +fn is_small(s usize) bool { + return s >> dlmalloc.small_bin_shift < dlmalloc.n_small_bins +} + +fn small_index2size(idx u32) usize { + return usize(idx) << dlmalloc.small_bin_shift +} + +fn small_index(size usize) u32 { + return u32(size >> dlmalloc.small_bin_shift) +} + +fn align_up(a usize, alignment usize) usize { + return (a + (alignment - 1)) & ~(alignment - 1) +} + +fn left_bits(x u32) u32 { + return (x << 1) | (~(x << 1)) + 1 +} + +fn least_bit(x u32) u32 { + return x & (~x + 1) +} + +fn leftshift_for_tree_index(x u32) u32 { + y := usize(x) + if y == dlmalloc.n_tree_bins - 1 { + return 0 + } else { + return u32(sizeof(usize) * 8 - 1 - ((y >> 1) + dlmalloc.tree_bin_shift - 2)) + } +} + +[unsafe] +fn align_as_chunk(ptr_ voidptr) &Chunk { + ptr := usize(ptr_) + chunk := ptr + dlmalloc.chunk_mem_offset + return &Chunk(ptr + align_offset_usize(chunk)) +} + +fn request_2_size(req usize) usize { + if req < dlmalloc.min_request { + return dlmalloc.min_chunk_size + } else { + return pad_request(req) + } +} + +fn overhead_for(c &Chunk) usize { + if c.mmapped() { + return dlmalloc.mmap_chnk_overhead + } else { + return dlmalloc.chunk_overhead + } +} + +// In order for dlmalloc to efficently manage memory, it needs a way to communicate with the underlying platform. +// This `Allocator` type provides an interface for this communication. +// +// +// Why not `interface?` Interfaces require memory allocation so it is simpler to pass a struct. +pub struct Allocator { + alloc fn (voidptr, usize) (voidptr, usize, u32) + remap fn (voidptr, voidptr, usize, usize, bool) voidptr + free_part fn (voidptr, voidptr, usize, usize) bool + free_ fn (voidptr, voidptr, usize) bool + can_release_part fn (voidptr, u32) bool + allocates_zeros fn (voidptr) bool + page_size fn (voidptr) usize // not a constant field because some platforms might have different page sizes depending on configs + data voidptr +} + +pub struct Dlmalloc { + system_allocator Allocator +mut: + // bin maps + smallmap u32 // bin map for small bins + treemap u32 // bin map for tree bins + + smallbins [66]&Chunk // small bins, it is actually (n_small_bins + 1) * 2 + treebins [n_tree_bins]&TreeChunk + dvsize usize + topsize usize + dv &Chunk + top &Chunk + footprint usize + max_footprint usize + seg Segment + trim_check u32 + least_addr voidptr + release_checks usize +} + +pub fn new(system_allocator Allocator) Dlmalloc { + return Dlmalloc{ + smallmap: 0 + treemap: 0 + smallbins: [(dlmalloc.n_small_bins + 1) * 2]&Chunk{} + treebins: [dlmalloc.n_tree_bins]&TreeChunk{} + dvsize: 0 + topsize: 0 + dv: voidptr(0) + top: voidptr(0) + footprint: 0 + max_footprint: 0 + seg: Segment{voidptr(0), 0, voidptr(0), 0} + trim_check: 0 + least_addr: voidptr(0) + release_checks: 0 + system_allocator: system_allocator + } +} + +[heap] +struct Chunk { +mut: + prev_foot usize + head usize + prev &Chunk + next &Chunk +} + +[heap] +struct Segment { +mut: + base voidptr + size usize + next &Segment + flags u32 +} + +[heap] +struct TreeChunk { +mut: + chunk Chunk + child [2]voidptr + parent voidptr + index u32 +} + +const ( + pinuse = 1 << 0 + cinuse = 1 << 1 + flag4 = 1 << 2 + inuse = pinuse | cinuse + flag_bits = pinuse | cinuse | flag4 + fencepost_head = inuse | sizeof(usize) +) + +fn (c &Chunk) size() usize { + return c.head & ~dlmalloc.flag_bits +} + +fn (c &Chunk) mmapped() bool { + return c.head & dlmalloc.inuse == 0 +} + +fn (c &Chunk) next() &Chunk { + mut me := usize(c) + me = me + c.size() + return &Chunk(me) +} + +fn (c &Chunk) prev() &Chunk { + mut me := usize(c) + me = me + c.prev_foot + return &Chunk(me) +} + +fn (c &Chunk) cinuse() bool { + return c.head & dlmalloc.cinuse != 0 +} + +fn (c &Chunk) pinuse() bool { + return c.head & dlmalloc.pinuse != 0 +} + +fn (mut c Chunk) clear_pinuse() { + c.head &= ~dlmalloc.pinuse +} + +fn (c &Chunk) inuse() bool { + return c.head & dlmalloc.inuse != dlmalloc.pinuse +} + +fn (mut c Chunk) set_inuse(size usize) { + c.head = (c.head & dlmalloc.pinuse) | size | dlmalloc.cinuse + mut next := c.plus_offset(size) + next.head |= dlmalloc.pinuse +} + +fn (mut c Chunk) set_inuse_and_pinuse(size usize) { + c.head = dlmalloc.pinuse | size | dlmalloc.cinuse + mut next := c.plus_offset(size) + next.head |= dlmalloc.pinuse +} + +fn (mut c Chunk) set_size_and_pinuse_of_inuse_chunk(size usize) { + c.head = size | dlmalloc.pinuse | dlmalloc.cinuse +} + +fn (mut c Chunk) set_size_and_pinuse_of_free_chunk(size usize) { + c.head = size | dlmalloc.pinuse + c.set_foot(size) +} + +fn (mut c Chunk) set_free_with_pinuse(size usize, n_ &Chunk) { + mut n := n_ + n.clear_pinuse() + c.set_size_and_pinuse_of_free_chunk(size) +} + +fn (c &Chunk) set_foot(size usize) { + mut next := c.plus_offset(size) + next.prev_foot = size +} + +fn (c &Chunk) plus_offset(offset usize) &Chunk { + return &Chunk((usize(c) + offset)) +} + +fn (c &Chunk) minus_offset(offset usize) &Chunk { + return &Chunk(usize(c) - offset) +} + +fn (c &Chunk) to_mem() voidptr { + return voidptr(usize(c) + dlmalloc.chunk_mem_offset) +} + +fn chunk_from_mem(mem_ voidptr) &Chunk { + mem := usize(mem_) + return &Chunk((mem - dlmalloc.chunk_mem_offset)) +} + +fn (tree &TreeChunk) leftmost_child() &TreeChunk { + left := &TreeChunk(tree.child[0]) + if isnil(left) { + return tree.child[1] + } else { + return left + } +} + +fn (tree &TreeChunk) chunk() &Chunk { + return &tree.chunk +} + +[unsafe] +fn (tree &TreeChunk) next() &TreeChunk { + unsafe { + return &TreeChunk(tree.chunk().next) + } +} + +[unsafe] +fn (tree &TreeChunk) prev() &TreeChunk { + unsafe { + return &TreeChunk(tree.chunk().prev) + } +} + +const extern = 1 << 0 + +fn (seg &Segment) is_extern() bool { + return seg.flags & dlmalloc.extern != 0 +} + +fn (seg &Segment) can_release_part(sys_alloc &Allocator) bool { + return sys_alloc.can_release_part(sys_alloc.data, seg.flags >> 1) +} + +fn (seg &Segment) sys_flags() u32 { + return seg.flags >> 1 +} + +fn (seg &Segment) holds(addr voidptr) bool { + return seg.base <= addr && addr < seg.top() +} + +fn (seg &Segment) top() voidptr { + return voidptr(usize(seg.base) + seg.size) +} + +[unsafe] +pub fn (dl &Dlmalloc) calloc_must_clear(ptr voidptr) bool { + return !dl.system_allocator.allocates_zeros(dl.system_allocator.data) + || !chunk_from_mem(ptr).mmapped() +} + +[unsafe] +fn (mut dl Dlmalloc) smallbin_at(idx u32) &Chunk { + unsafe { + return &Chunk(&dl.smallbins[idx * 2]) + } +} + +[unsafe] +fn (mut dl Dlmalloc) treebin_at(idx u32) &&TreeChunk { + return &dl.treebins[idx] +} + +fn (dl &Dlmalloc) compute_tree_index(size usize) u32 { + x := size >> dlmalloc.tree_bin_shift + if x == 0 { + return 0 + } else if x > 0xffff { + return dlmalloc.n_tree_bins - 1 + } else { + k := sizeof(usize) * 8 - 1 - usize_leading_zeros(x) + return u32((k << 1) + (size >> (k + dlmalloc.tree_bin_shift - 1) & 1)) + } +} + +[unsafe] +fn (mut dl Dlmalloc) unlink_chunk(chunk &Chunk, size usize) { + unsafe { + if is_small(size) { + dl.unlink_small_chunk(chunk, size) + } else { + dl.unlink_large_chunk(&TreeChunk(chunk)) + } + } +} + +[unsafe] +fn (mut dl Dlmalloc) unlink_small_chunk(chunk_ &Chunk, size usize) { + mut chunk := chunk_ + mut f := chunk.prev + mut b := chunk.next + idx := small_index(size) + + if b == f { + unsafe { dl.clear_smallmap(idx) } + } else { + f.next = b + b.prev = f + } +} + +[unsafe] +fn (mut dl Dlmalloc) unlink_large_chunk(chunk_ &TreeChunk) { + unsafe { + mut chunk := chunk_ + mut xp := &TreeChunk(chunk.parent) + mut r := &TreeChunk(voidptr(0)) + if voidptr(chunk.next()) != voidptr(chunk) { + mut f := chunk.prev() + r = chunk.next() + f.chunk.next = r.chunk() + r.chunk.prev = f.chunk() + } else { + mut rp := &&TreeChunk(&chunk.child[1]) + if isnil(*rp) { + rp = &&TreeChunk(&chunk.child[0]) + } + + r = *rp + if !isnil(*rp) { + for { + mut cp := &&TreeChunk(&rp.child[1]) + if isnil(*cp) { + cp = &&TreeChunk(&rp.child[0]) + } + if isnil(*cp) { + break + } + rp = cp + } + r = *rp + *rp = &TreeChunk(voidptr(0)) + } + } + + if isnil(xp) { + return + } + + mut h := dl.treebin_at(chunk.index) + if chunk == *h { + *h = r + if isnil(r) { + dl.clear_treemap(chunk.index) + } + } else { + if xp.child[0] == chunk { + xp.child[0] = r + } else { + xp.child[1] = r + } + } + + if !isnil(r) { + r.parent = xp + mut c0 := &TreeChunk(chunk.child[0]) + if !isnil(c0) { + r.child[0] = c0 + c0.parent = r + } + mut c1 := &TreeChunk(chunk.child[1]) + if !isnil(c1) { + r.child[1] = c1 + c1.parent = r + } + } + } +} + +[unsafe] +fn (mut dl Dlmalloc) unlink_first_small_chunk(head_ &Chunk, next_ &Chunk, idx u32) { + mut next := next_ + mut head := head_ + mut ptr := next.prev + if head == ptr { + unsafe { dl.clear_smallmap(idx) } + } else { + ptr.next = head + head.prev = ptr + } +} + +// calloc is the same as `malloc`, except if the allocation succeeds it's guaranteed +// to point to `size` bytes of zeros. +[unsafe] +pub fn (mut dl Dlmalloc) calloc(size usize) voidptr { + unsafe { + ptr := dl.malloc(size) + if !isnil(ptr) && dl.calloc_must_clear(ptr) { + vmemset(ptr, 0, int(size)) + } + return ptr + } +} + +// free_ behaves as libc free, but operates within the given space +[unsafe] +pub fn (mut dl Dlmalloc) free_(mem voidptr) { + unsafe { + mut p := chunk_from_mem(mem) + mut psize := p.size() + next := p.plus_offset(psize) + if !p.pinuse() { + prevsize := p.prev_foot + if p.mmapped() { + psize += prevsize + dlmalloc.mmap_foot_pad + if dl.system_allocator.free_(dl.system_allocator.data, voidptr(usize(p) - prevsize), + psize) + { + dl.footprint -= psize + } + return + } + + prev := p.minus_offset(prevsize) + psize += prevsize + p = prev + if voidptr(p) != voidptr(dl.dv) { + dl.unlink_chunk(p, prevsize) + } else if next.head & dlmalloc.inuse == dlmalloc.inuse { + dl.dvsize = psize + p.set_free_with_pinuse(psize, next) + return + } + } + // consolidate forward if we can + if !next.cinuse() { + if voidptr(next) == voidptr(dl.top) { + dl.topsize += psize + p.head = 0 + + tsize := dl.topsize + dl.top = p + p.head = tsize | dlmalloc.pinuse + if voidptr(p) == voidptr(dl.dv) { + dl.dv = voidptr(0) + dl.dvsize = 0 + } + + if dl.should_trim(tsize) { + dl.sys_trim(0) + } + return + } else if voidptr(next) == voidptr(dl.dv) { + dl.dvsize += psize + dsize := dl.dvsize + dl.dv = p + p.set_size_and_pinuse_of_free_chunk(dsize) + return + } else { + nsize := next.size() + psize += nsize + dl.unlink_chunk(next, nsize) + p.set_size_and_pinuse_of_free_chunk(psize) + if voidptr(p) == voidptr(dl.dv) { + dl.dvsize = psize + return + } + } + } else { + p.set_free_with_pinuse(psize, next) + } + + if is_small(psize) { + dl.insert_small_chunk(p, psize) + } else { + dl.insert_large_chunk(&TreeChunk(p), psize) + dl.release_checks -= 1 + if dl.release_checks == 0 { + dl.release_unused_segments() + } + } + } +} + +fn (dl Dlmalloc) should_trim(size usize) bool { + return size > dl.trim_check +} + +[unsafe] +fn (mut dl Dlmalloc) sys_trim(pad_ usize) bool { + unsafe { + mut pad := pad_ + mut released := usize(0) + if pad < dlmalloc.max_request && !isnil(dl.top) { + pad += dlmalloc.top_foot_size + if dl.topsize > pad { + unit := usize(dlmalloc.default_granularity) + extra := ((dl.topsize - pad + unit - 1) / unit - 1) * unit + mut sp := dl.segment_holding(dl.top) + + if !sp.is_extern() { + if sp.can_release_part(&dl.system_allocator) { + if sp.size >= extra && !dl.has_segment_link(sp) { + newsize := sp.size - extra + if dl.system_allocator.free_part(dl.system_allocator.data, + sp.base, sp.size, newsize) + { + released = extra + } + } + } + } + + if released != 0 { + sp.size -= released + dl.footprint -= released + top := dl.top + topsize := dl.topsize - released + dl.init_top(top, topsize) + } + } + + released += dl.release_unused_segments() + + if released == 0 && dl.topsize > dl.trim_check { + dl.trim_check = 1 << 31 + } + } + return released != 0 + } +} + +[unsafe] +fn (mut dl Dlmalloc) release_unused_segments() usize { + unsafe { + mut released := usize(0) + mut nsegs := usize(0) + mut pred := &dl.seg + mut sp := pred.next + for !isnil(sp) { + base := sp.base + size := sp.size + next := sp.next + + nsegs += 1 + + if sp.can_release_part(&dl.system_allocator) && !sp.is_extern() { + mut p := align_as_chunk(base) + psize := p.size() + chunk_top := voidptr(usize(p) + psize) + top := voidptr(usize(base) + (size - dlmalloc.top_foot_size)) + if !p.inuse() && chunk_top >= top { + mut tp := &TreeChunk(p) + if voidptr(p) == voidptr(dl.dv) { + dl.dv = voidptr(0) + dl.dvsize = 0 + } else { + dl.unlink_large_chunk(tp) + } + + if dl.system_allocator.free_(dl.system_allocator.data, base, size) { + released += size + dl.footprint -= size + sp = pred + sp.next = next + } else { + // back out if we can't unmap + dl.insert_large_chunk(tp, psize) + } + } + } + pred = sp + sp = next + } + dl.release_checks = if nsegs > dlmalloc.max_release_check_rate { + nsegs + } else { + dlmalloc.max_release_check_rate + } + return released + } +} + +[unsafe] +fn (dl &Dlmalloc) has_segment_link(ptr &Segment) bool { + mut sp := &dl.seg + for !isnil(sp) { + if ptr.holds(sp) { + return true + } + sp = sp.next + } + return false +} + +[unsafe] +fn (mut dl Dlmalloc) replace_dv(chunk &Chunk, size usize) { + dvs := dl.dvsize + if dvs != 0 { + dv := dl.dv + unsafe { + dl.insert_small_chunk(dv, dvs) + } + } + dl.dvsize = size + dl.dv = chunk +} + +[unsafe] +fn (mut dl Dlmalloc) insert_chunk(chunk &Chunk, size usize) { + unsafe { + if is_small(size) { + dl.insert_small_chunk(chunk, size) + } else { + dl.insert_large_chunk(&TreeChunk(chunk), size) + } + } +} + +[unsafe] +fn (mut dl Dlmalloc) insert_small_chunk(chunk_ &Chunk, size usize) { + mut chunk := chunk_ + idx := small_index(size) + unsafe { + mut head := dl.smallbin_at(idx) + mut f := head + if !dl.smallmap_is_marked(idx) { + dl.mark_smallmap(idx) + } else { + f = head.prev + } + head.prev = chunk + f.next = chunk + chunk.prev = f + chunk.next = head + } +} + +[unsafe] +fn (mut dl Dlmalloc) insert_large_chunk(chunk_ &TreeChunk, size usize) { + mut chunk := chunk_ + unsafe { + idx := dl.compute_tree_index(size) + mut h := dl.treebin_at(idx) + + chunk.index = idx + chunk.child[0] = voidptr(0) + chunk.child[1] = voidptr(0) + + mut chunkc := chunk.chunk() + if !dl.treemap_is_marked(idx) { + dl.mark_treemap(idx) + *h = chunk + chunk.parent = voidptr(h) + chunkc.prev = chunkc + chunkc.next = chunkc + } else { + mut t := *h + mut k := size << leftshift_for_tree_index(idx) + for { + if t.chunk().size() != size { + c_ := &t.child[(k >> sizeof(usize) * 8 - 1) & 1] + mut c := &&TreeChunk(c_) + k <<= 1 + if !isnil(c) { + t = *c + } else { + *c = chunk + chunk.parent = t + chunkc.next = chunkc + chunkc.prev = chunkc + break + } + } else { + tc := t.chunk() + f := tc.prev + f.next = chunkc + tc.prev = chunkc + chunkc.prev = f + chunkc.next = tc + chunk.parent = voidptr(0) + break + } + } + } + } +} + +[unsafe] +fn (mut dl Dlmalloc) clear_smallmap(idx u32) { + dl.smallmap &= ~(1 << idx) +} + +[unsafe] +fn (mut dl Dlmalloc) mark_smallmap(idx u32) { + dl.smallmap |= 1 << idx +} + +[unsafe] +fn (mut dl Dlmalloc) smallmap_is_marked(idx u32) bool { + return dl.smallmap & (1 << idx) != 0 +} + +[unsafe] +fn (mut dl Dlmalloc) clear_treemap(idx u32) { + dl.treemap &= ~(1 << idx) +} + +[unsafe] +fn (mut dl Dlmalloc) mark_treemap(idx u32) { + dl.treemap |= 1 << idx +} + +[unsafe] +fn (mut dl Dlmalloc) treemap_is_marked(idx u32) bool { + return dl.treemap & (1 << idx) != 0 +} + +/// malloc behaves as libc malloc, but operates within the given space +[unsafe] +pub fn (mut dl Dlmalloc) malloc(size usize) voidptr { + mut nb := usize(0) + unsafe { + if size <= dlmalloc.max_small_request { + nb = request_2_size(size) + mut idx := small_index(nb) + smallbits := dl.smallmap >> idx + if smallbits & 0b11 != 0 { + idx += ~smallbits & 1 + + b := dl.smallbin_at(idx) + mut p := b.prev + dl.unlink_first_small_chunk(b, p, idx) + smallsize := small_index2size(idx) + p.set_inuse_and_pinuse(smallsize) + ret := p.to_mem() + return ret + } + + if nb > dl.dvsize { + // if there's some other bin with some memory, then we just use + // the next smallest bin + if smallbits != 0 { + leftbits := (smallbits << idx) & left_bits(1 << idx) + leastbit := least_bit(leftbits) + i := u32(bits.trailing_zeros_32(leastbit)) + mut b := dl.smallbin_at(i) + mut p := b.prev + dl.unlink_first_small_chunk(b, p, i) + smallsize := small_index2size(i) + rsize := smallsize - nb + if sizeof(usize) != 4 && rsize < dlmalloc.min_chunk_size { + p.set_inuse_and_pinuse(smallsize) + } else { + p.set_size_and_pinuse_of_inuse_chunk(nb) + mut r := p.plus_offset(nb) + r.set_size_and_pinuse_of_free_chunk(size) + dl.replace_dv(r, rsize) + } + ret := p.to_mem() + return ret + } else if dl.treemap != 0 { + mem := dl.tmalloc_small(nb) + if !isnil(mem) { + return mem + } + } + } + } else if size >= dlmalloc.max_request { + return voidptr(0) + } else { + nb = pad_request(size) + if dl.treemap != 0 { + mem := dl.tmalloc_large(nb) + if !isnil(mem) { + return mem + } + } + } + // use the `dv` node if we can, splitting it if necessary or otherwise + // exhausting the entire chunk + if nb <= dl.dvsize { + rsize := dl.dvsize - nb + mut p := dl.dv + if rsize >= dlmalloc.min_chunk_size { + dl.dv = p.plus_offset(nb) + dl.dvsize = rsize + mut r := dl.dv + r.set_size_and_pinuse_of_free_chunk(rsize) + p.set_size_and_pinuse_of_inuse_chunk(nb) + } else { + dvs := dl.dvsize + dl.dvsize = 0 + dl.dv = voidptr(0) + p.set_inuse_and_pinuse(dvs) + } + return p.to_mem() + } + // Split the top node if we can + if nb < dl.topsize { + dl.topsize -= nb + rsize := dl.topsize + mut p := dl.top + dl.top = p.plus_offset(nb) + mut r := dl.top + r.head = rsize | dlmalloc.pinuse + p.set_size_and_pinuse_of_inuse_chunk(nb) + return p.to_mem() + } + return dl.sys_alloc(nb) + } +} + +[unsafe] +fn (mut dl Dlmalloc) init_bins() { + unsafe { + for i in 0 .. dlmalloc.n_small_bins { + mut bin := dl.smallbin_at(i) + bin.prev = bin + bin.next = bin + } + } +} + +[unsafe] +fn (mut dl Dlmalloc) init_top(ptr &Chunk, size_ usize) { + offset := align_offset_usize(ptr.to_mem()) + mut p := ptr.plus_offset(offset) + + size := size_ - offset + dl.top = p + dl.topsize = size + p.head = size | dlmalloc.pinuse + + p.plus_offset(size).head = dlmalloc.top_foot_size + dl.trim_check = dlmalloc.default_trim_threshold +} + +[unsafe] +fn (mut dl Dlmalloc) sys_alloc(size usize) voidptr { + asize := align_up(size + dlmalloc.top_foot_size + dlmalloc.malloc_alignment, dlmalloc.default_granularity) + unsafe { + tbase, mut tsize, flags := dl.system_allocator.alloc(dl.system_allocator.data, + asize) + if isnil(tbase) { + return tbase + } + dl.footprint += tsize + dl.max_footprint = if dl.max_footprint > dl.footprint { + dl.max_footprint + } else { + dl.footprint + } + if isnil(dl.top) { + if isnil(dl.least_addr) || tbase < dl.least_addr { + dl.least_addr = tbase + } + dl.seg.base = tbase + dl.seg.size = tsize + dl.seg.flags = flags + dl.release_checks = dlmalloc.max_release_check_rate + dl.init_bins() + tsize_ := tsize - dlmalloc.top_foot_size + dl.init_top(&Chunk(tbase), tsize_) + } else { + mut sp := &dl.seg + for !isnil(sp) && voidptr(tbase) != voidptr(sp.top()) { + sp = sp.next + } + + if !isnil(sp) && !sp.is_extern() && sp.sys_flags() == flags && sp.holds(dl.top) { + sp.size += tsize + ptr := dl.top + size_ := dl.topsize + tsize + dl.init_top(ptr, size_) + } else { + if tbase < dl.least_addr { + dl.least_addr = tbase + } else { + dl.least_addr = dl.least_addr + } + sp = &dl.seg + for !isnil(sp) && sp.base != voidptr(usize(tbase) + tsize) { + sp = sp.next + } + + if !isnil(sp) && !sp.is_extern() && sp.sys_flags() == flags { + oldbase := sp.base + sp.base = tbase + sp.size = tsize + return dl.prepend_alloc(tbase, oldbase, size) + } else { + dl.add_segment(tbase, tsize, flags) + } + } + } + + if size < dl.topsize { + dl.topsize -= size + rsize := dl.topsize + mut p := dl.top + dl.top = p.plus_offset(size) + mut r := dl.top + r.head = rsize | dlmalloc.pinuse + p.set_size_and_pinuse_of_inuse_chunk(size) + return p.to_mem() + } + } + return voidptr(0) +} + +[unsafe] +fn (mut dl Dlmalloc) tmalloc_small(size usize) voidptr { + unsafe { + leastbit := least_bit(dl.treemap) + i := bits.leading_zeros_32(leastbit) + mut v := *dl.treebin_at(u32(i)) + mut t := v + mut rsize := t.chunk().size() - size + for { + t = t.leftmost_child() + if isnil(t) { + break + } + + trem := t.chunk().size() - size + if trem < rsize { + rsize = trem + v = t + } + } + + mut vc := v.chunk() + r := &TreeChunk(vc.plus_offset(size)) + if rsize < dlmalloc.min_chunk_size { + vc.set_inuse_and_pinuse(rsize + size) + } else { + mut rc := r.chunk() + vc.set_size_and_pinuse_of_inuse_chunk(size) + rc.set_size_and_pinuse_of_free_chunk(rsize) + dl.replace_dv(rc, rsize) + } + return vc.to_mem() + } +} + +[unsafe] +fn (mut dl Dlmalloc) tmalloc_large(size usize) voidptr { + unsafe { + mut v := &TreeChunk(voidptr(0)) + mut rsize := ~size + 1 + idx := dl.compute_tree_index(size) + mut t := *dl.treebin_at(idx) + if !isnil(t) { + mut sizebits := size << leftshift_for_tree_index(idx) + mut rst := voidptr(0) + for { + csize := t.chunk().size() + if csize >= size && csize - size < rsize { + v = t + rsize = csize - size + if rsize == 0 { + break + } + } + + rt := t.child[1] + t = t.child[(sizebits >> (sizeof(usize) * 8 - 1)) & 1] + if !isnil(rt) && voidptr(rt) != voidptr(t) { + rst = rt + } + if isnil(t) { + t = rst + break + } + sizebits <<= 1 + } + } + + if isnil(t) && isnil(v) { + leftbits := left_bits(1 << idx) & dl.treemap + if leftbits != 0 { + leastbit := least_bit(leftbits) + i := bits.trailing_zeros_32(leastbit) + t = *dl.treebin_at(u32(i)) + } + } + // Find the smallest of this tree or subtree + for !isnil(t) { + csize := t.chunk().size() + if csize >= size && csize - size < rsize { + rsize = csize - size + v = t + } + t = t.leftmost_child() + } + + if isnil(v) || (dl.dvsize >= size && !(rsize < dl.dvsize - size)) { + return voidptr(0) + } + + mut vc := v.chunk() + mut r := vc.plus_offset(size) + dl.unlink_large_chunk(v) + if rsize < dlmalloc.min_chunk_size { + vc.set_inuse_and_pinuse(rsize + size) + } else { + vc.set_size_and_pinuse_of_inuse_chunk(size) + r.set_size_and_pinuse_of_free_chunk(rsize) + dl.insert_chunk(r, rsize) + } + + return vc.to_mem() + } +} + +[unsafe] +fn (mut dl Dlmalloc) prepend_alloc(newbase voidptr, oldbase voidptr, size usize) voidptr { + unsafe { + mut p := align_as_chunk(newbase) + mut oldfirst := align_as_chunk(oldbase) + psize := usize(oldfirst) - usize(p) + mut q := p.plus_offset(size) + mut qsize := psize - size + p.set_size_and_pinuse_of_inuse_chunk(size) + + if voidptr(oldfirst) == voidptr(dl.top) { + dl.topsize += qsize + tsize := dl.topsize + dl.top = q + q.head = tsize | dlmalloc.pinuse + } else if voidptr(oldfirst) == voidptr(dl.dv) { + dl.dvsize += qsize + dsize := dl.dvsize + dl.dv = q + q.set_size_and_pinuse_of_free_chunk(dsize) + } else { + if !oldfirst.inuse() { + nsize := oldfirst.size() + dl.unlink_chunk(oldfirst, nsize) + oldfirst = oldfirst.plus_offset(nsize) + qsize += nsize + } + q.set_free_with_pinuse(qsize, oldfirst) + dl.insert_chunk(q, qsize) + } + + ret := p.to_mem() + return ret + } +} + +[unsafe] +fn (mut dl Dlmalloc) add_segment(tbase voidptr, tsize usize, flags u32) { + // TODO: what in the world is this function doing???? + unsafe { + old_top := dl.top + mut oldsp := dl.segment_holding(old_top) + old_end := oldsp.top() + ssize := pad_request(sizeof(Segment)) + mut offset := ssize + sizeof(usize) * 4 + dlmalloc.malloc_alignment - 1 + rawsp := voidptr(usize(old_end) - offset) + offset = align_offset_usize((&Chunk(rawsp)).to_mem()) + asp := voidptr(usize(rawsp) + offset) + csp := if asp < voidptr(usize(old_top) + dlmalloc.min_chunk_size) { old_top } else { asp } + mut sp := &Chunk(csp) + mut ss := &Segment(sp.to_mem()) + mut tnext := sp.plus_offset(ssize) + mut p := tnext + mut nfences := 0 + + size := tsize - dlmalloc.top_foot_size + dl.init_top(&Chunk(tbase), size) + + sp.set_size_and_pinuse_of_inuse_chunk(ssize) + *ss = dl.seg + dl.seg.base = tbase + dl.seg.size = tsize + dl.seg.flags = flags + dl.seg.next = ss + + for { + nextp := p.plus_offset(sizeof(usize)) + p.head = dlmalloc.fencepost_head + nfences += 1 + if nextp.head < old_end { + p = nextp + } else { + break + } + } + // TODO: why 2? + assert nfences >= 2 + if voidptr(csp) != voidptr(old_top) { + mut q := &Chunk(old_top) + psize := usize(csp) - usize(old_top) + tn := q.plus_offset(psize) + q.set_free_with_pinuse(psize, tn) + + dl.insert_chunk(q, psize) + } + } +} + +[unsafe] +fn (mut dl Dlmalloc) segment_holding(ptr voidptr) &Segment { + mut sp := &dl.seg + for !isnil(sp) { + if sp.base <= ptr && ptr < sp.top() { + return sp + } + sp = sp.next + } + return &Segment(0) +} + +// realloc behaves as libc realloc, but operates within the given space +[unsafe] +pub fn (mut dl Dlmalloc) realloc(oldmem voidptr, bytes usize) voidptr { + if bytes >= dlmalloc.max_request { + return voidptr(0) + } + unsafe { + nb := request_2_size(bytes) + mut oldp := chunk_from_mem(oldmem) + newp := dl.try_realloc_chunk(oldp, nb, true) + if !isnil(newp) { + return newp.to_mem() + } + + ptr := dl.malloc(bytes) + if !isnil(ptr) { + oc := oldp.size() - overhead_for(oldp) + copy_bytes := if oc < bytes { oc } else { bytes } + vmemcpy(ptr, oldmem, int(copy_bytes)) + } + + return ptr + } +} + +// memaligns allocates memory aligned to `alignment_`. Only call this with power-of-two alignment +// and alignment > dlmalloc.malloc_alignment +[unsafe] +pub fn (mut dl Dlmalloc) memalign(alignment_ usize, bytes usize) voidptr { + mut alignment := alignment_ + if alignment < dlmalloc.min_chunk_size { + alignment = dlmalloc.min_chunk_size + } + + if bytes >= dlmalloc.max_request - alignment { + return voidptr(0) + } + unsafe { + nb := request_2_size(bytes) + req := nb + alignment + dlmalloc.min_chunk_size - dlmalloc.chunk_overhead + mem := dl.malloc(req) + if isnil(mem) { + return mem + } + + mut p := chunk_from_mem(mem) + if usize(mem) & (alignment - 1) != 0 { + // Here we find an aligned sopt inside the chunk. Since we need to + // give back leading space in a chunk of at least `min_chunk_size`, + // if the first calculation places us at a spot with less than + // `min_chunk_size` leader we can move to the next aligned spot. + // we've allocated enough total room so that this is always possible + br_ := (usize(mem) + alignment - 1) & (~alignment + 1) + br := chunk_from_mem(voidptr(br_)) + mut pos := voidptr(0) + if usize(br) - usize(p) > dlmalloc.min_chunk_size { + pos = voidptr(br) + } else { + pos = voidptr(usize(br) + alignment) + } + + mut newp := &Chunk(pos) + leadsize := usize(pos) - usize(p) + newsize := p.size() - leadsize + + if p.mmapped() { + newp.prev_foot = p.prev_foot + leadsize + newp.head = newsize + } else { + newp.set_inuse(newsize) + p.set_inuse(leadsize) + dl.dispose_chunk(p, leadsize) + } + p = newp + } + + if !p.mmapped() { + size := p.size() + if size > nb + dlmalloc.min_chunk_size { + remainder_size := size - nb + mut remainder := p.plus_offset(nb) + p.set_inuse(nb) + remainder.set_inuse(remainder_size) + dl.dispose_chunk(remainder, remainder_size) + } + } + return p.to_mem() + } +} + +[unsafe] +fn (mut dl Dlmalloc) try_realloc_chunk(p_ &Chunk, nb usize, can_move bool) &Chunk { + unsafe { + mut p := p_ + oldsize := p.size() + mut next := p.plus_offset(oldsize) + if p.mmapped() { + return dl.mmap_resize(p, nb, can_move) + } else if oldsize >= nb { + rsize := oldsize - nb + if rsize >= dlmalloc.min_chunk_size { + mut r := p.plus_offset(nb) + p.set_inuse(nb) + r.set_inuse(rsize) + dl.dispose_chunk(r, rsize) + } + return p + } else if voidptr(next) == voidptr(dl.top) { + if oldsize + dl.topsize <= nb { + return voidptr(0) + } + + newsize := oldsize + dl.topsize + newtopsize := newsize - nb + mut newtop := p.plus_offset(nb) + p.set_inuse(nb) + newtop.head = newtopsize | dlmalloc.pinuse + dl.top = newtop + dl.topsize = newtopsize + return p + } else if voidptr(next) == voidptr(dl.dv) { + dvs := dl.dvsize + if oldsize + dvs < nb { + return voidptr(0) + } + + dsize := oldsize + dvs - nb + if dsize >= dlmalloc.min_chunk_size { + mut r := p.plus_offset(nb) + mut n := r.plus_offset(dsize) + p.set_inuse(nb) + r.set_size_and_pinuse_of_free_chunk(dsize) + n.clear_pinuse() + dl.dvsize = dsize + dl.dv = r + } else { + newsize := oldsize + dvs + p.set_inuse(newsize) + dl.dvsize = 0 + dl.dv = voidptr(0) + } + return p + } else if !next.cinuse() { + nextsize := next.size() + if oldsize + nextsize < nb { + return voidptr(0) + } + rsize := oldsize + nextsize - nb + dl.unlink_chunk(next, nextsize) + if rsize < dlmalloc.min_chunk_size { + newsize := oldsize + nextsize + p.set_inuse(newsize) + } else { + r := p.plus_offset(nb) + p.set_inuse(nb) + r.set_inuse(rsize) + dl.dispose_chunk(r, rsize) + } + return p + } else { + return voidptr(0) + } + } +} + +[unsafe] +fn (mut dl Dlmalloc) mmap_resize(oldp_ &Chunk, nb usize, can_move bool) &Chunk { + mut oldp := oldp_ + oldsize := oldp.size() + if is_small(nb) { + return voidptr(0) + } + // Keep the old chunk if it's big enough but not too big + if oldsize >= nb + sizeof(usize) && (oldsize - nb) <= (dlmalloc.default_granularity << 1) { + return oldp + } + + offset := oldp.prev_foot + oldmmsize := oldsize + offset + dlmalloc.mmap_foot_pad + newmmsize := dl.mmap_align(nb + 6 * sizeof(usize) + dlmalloc.malloc_alignment - 1) + + ptr := dl.system_allocator.remap(dl.system_allocator.data, voidptr(usize(oldp) - offset), + oldmmsize, newmmsize, can_move) + if isnil(ptr) { + return voidptr(0) + } + + mut newp := &Chunk(voidptr(usize(ptr) + offset)) + psize := newmmsize - offset - dlmalloc.mmap_foot_pad + newp.head = psize + newp.plus_offset(psize).head = dlmalloc.fencepost_head + newp.plus_offset(psize + sizeof(usize)).head = 0 + if ptr < dl.least_addr { + dl.least_addr = ptr + } + dl.footprint = dl.footprint + newmmsize - oldmmsize + if dl.footprint > dl.max_footprint { + dl.max_footprint = dl.footprint + } + return newp +} + +fn (dl &Dlmalloc) mmap_align(a usize) usize { + return align_up(a, dl.system_allocator.page_size(dl.system_allocator.data)) +} + +[unsafe] +fn (mut dl Dlmalloc) dispose_chunk(p_ &Chunk, psize_ usize) { + mut p := p_ + mut psize := psize_ + unsafe { + mut next := p.plus_offset(psize) + if !p.pinuse() { + prevsize := p.prev_foot + if p.mmapped() { + psize += prevsize + dlmalloc.mmap_foot_pad + + if dl.system_allocator.free_(dl.system_allocator.data, voidptr(usize(p) - prevsize), + psize) + { + dl.footprint -= psize + } + return + } + + prev := p.minus_offset(prevsize) + psize += prevsize + p = prev + if voidptr(p) != voidptr(dl.dv) { + dl.unlink_chunk(p, prevsize) + } else if next.head & dlmalloc.inuse == dlmalloc.inuse { + dl.dvsize = psize + p.set_free_with_pinuse(psize, next) + return + } + } + + if !next.cinuse() { + if voidptr(next) == voidptr(dl.top) { + dl.topsize += psize + tsize := dl.topsize + dl.top = p + p.head = tsize | dlmalloc.pinuse + if voidptr(p) == voidptr(dl.dv) { + dl.dv = voidptr(0) + dl.dvsize = 0 + } + return + } else if voidptr(next) == voidptr(dl.dv) { + dl.dvsize += psize + dvsize := dl.dvsize + dl.dv = p + p.set_size_and_pinuse_of_free_chunk(dvsize) + return + } else { + nsize := next.size() + psize += nsize + dl.unlink_chunk(next, nsize) + p.set_size_and_pinuse_of_free_chunk(psize) + if voidptr(p) == voidptr(dl.dv) { + dl.dvsize = psize + return + } + } + } else { + p.set_free_with_pinuse(psize, next) + } + dl.insert_chunk(p, psize) + } +} diff --git a/vlib/dlmalloc/dlmalloc_sys_nix.c.v b/vlib/dlmalloc/dlmalloc_sys_nix.c.v new file mode 100644 index 0000000000..9842405f45 --- /dev/null +++ b/vlib/dlmalloc/dlmalloc_sys_nix.c.v @@ -0,0 +1,116 @@ +module dlmalloc + +#include +#include + +fn C.munmap(ptr voidptr, size usize) int +fn C.mremap(ptr voidptr, old usize, new usize, flags usize) voidptr +fn C.mmap(base voidptr, len usize, prot int, flags int, fd int, offset i64) voidptr + +pub enum Mm_prot { + prot_read = 0x1 + prot_write = 0x2 + prot_exec = 0x4 + prot_none = 0x0 + prot_growsdown = 0x01000000 + prot_growsup = 0x02000000 +} + +pub enum Map_flags { + map_shared = 0x01 + map_private = 0x02 + map_shared_validate = 0x03 + map_type = 0x0f + map_fixed = 0x10 + map_file = 0x00 + map_anonymous = 0x20 + map_huge_shift = 26 + map_huge_mask = 0x3f +} + +enum MemProt { + prot_read = 0x1 + prot_write = 0x2 + prot_exec = 0x4 + prot_none = 0x0 + prot_growsdown = 0x01000000 + prot_growsup = 0x02000000 +} + +enum MapFlags { + map_shared = 0x01 + map_private = 0x02 + map_shared_validate = 0x03 + map_type = 0x0f + map_fixed = 0x10 + map_file = 0x00 + map_anonymous = 0x20 + map_huge_shift = 26 + map_huge_mask = 0x3f +} + +fn system_alloc(_ voidptr, size usize) (voidptr, usize, u32) { + unsafe { + mem_prot := MemProt(int(MemProt.prot_read) | int(MemProt.prot_write)) + map_flags := MapFlags(int(MapFlags.map_private) | int(MapFlags.map_anonymous)) + addr := C.mmap(voidptr(0), size, int(mem_prot), int(map_flags), -1, 0) + + if addr == voidptr(-1) { + return voidptr(0), 0, 0 + } else { + return addr, size, 0 + } + } +} + +fn system_remap(_ voidptr, ptr voidptr, oldsize usize, newsize usize, can_move bool) voidptr { + return voidptr(0) +} + +fn system_free_part(_ voidptr, ptr voidptr, oldsize usize, newsize usize) bool { + $if linux { + unsafe { + rc := C.mremap(ptr, oldsize, newsize, 0) + if rc != voidptr(-1) { + return true + } + return C.munmap(voidptr(usize(ptr) + newsize), oldsize - newsize) == 0 + } + } $else $if macos { + unsafe { + return C.munmap(voidptr(usize(ptr) + newsize), oldsize - newsize) == 0 + } + } + return false +} + +fn system_free(_ voidptr, ptr voidptr, size usize) bool { + unsafe { + return C.munmap(ptr, size) == 0 + } +} + +fn system_can_release_part(_ voidptr, _ u32) bool { + return true +} + +fn system_allocates_zeros(_ voidptr) bool { + return true +} + +fn system_page_size(_ voidptr) usize { + return 4096 +} + +pub fn get_system_allocator() Allocator { + return Allocator{ + alloc: system_alloc + remap: system_remap + free_part: system_free_part + free_: system_free + can_release_part: system_can_release_part + allocates_zeros: system_allocates_zeros + page_size: system_page_size + data: voidptr(0) + } +} diff --git a/vlib/dlmalloc/dlmalloc_sys_windows.c.v b/vlib/dlmalloc/dlmalloc_sys_windows.c.v new file mode 100644 index 0000000000..aa7e6b191f --- /dev/null +++ b/vlib/dlmalloc/dlmalloc_sys_windows.c.v @@ -0,0 +1,42 @@ +module dlmalloc + +fn system_alloc(_ voidptr, size usize) (voidptr, usize, u32) { + return voidptr(0), 0, 0 +} + +fn system_remap(_ voidptr, ptr voidptr, oldsize usize, newsize usize, can_move bool) voidptr { + return voidptr(0) +} + +fn system_free_part(_ voidptr, ptr voidptr, oldsize usize, newsize usize) bool { + return false +} + +fn system_free(_ voidptr, ptr voidptr, size usize) bool { + return false +} + +fn system_can_release_part(_ voidptr, _ u32) bool { + return false +} + +fn system_allocates_zeros(_ voidptr) bool { + return false +} + +fn system_page_size(_ voidptr) usize { + return 4096 +} + +pub fn get_system_allocator() Allocator { + return Allocator{ + alloc: system_alloc + remap: system_remap + free_part: system_free_part + free_: system_free + can_release_part: system_can_release_part + allocates_zeros: system_allocates_zeros + page_size: system_page_size + data: voidptr(0) + } +} diff --git a/vlib/dlmalloc/global.v b/vlib/dlmalloc/global.v new file mode 100644 index 0000000000..ea396ed511 --- /dev/null +++ b/vlib/dlmalloc/global.v @@ -0,0 +1,63 @@ +[has_globals] +module dlmalloc + +__global global = new(get_system_allocator()) + +/// malloc allocates `size` bytes. +/// +/// Returns a null pointer if allocation fails. Returns a valid pointer +/// otherwise. +[unsafe] +pub fn malloc(size usize) voidptr { + unsafe { + return global.malloc(size) + } +} + +// free deallocates a `ptr`. +[unsafe] +pub fn free(ptr voidptr) { + unsafe { + global.free_(ptr) + } +} + +// Same as `malloc`, except if the allocation succeeds it's guaranteed to +// point to `size` bytes of zeros. +[unsafe] +pub fn calloc(size usize) voidptr { + unsafe { + return global.calloc(size) + } +} + +// realloc reallocates `ptr`, a previous allocation with `old_size` and +// to have `new_size`. +// +// +// Returns a null pointer if the memory couldn't be reallocated, but `ptr` +// is still valid. Returns a valid pointer and frees `ptr` if the request +// is satisfied. +[unsafe] +pub fn realloc(ptr voidptr, oldsize usize, newsize usize) voidptr { + unsafe { + _ := oldsize + + return global.realloc(ptr, newsize) + } +} + +// memalign allocates `size` bytes with `align` align. +// +// +// Returns a null pointer if allocation fails. Returns a valid pointer otherwise. +[unsafe] +pub fn memalign(size usize, align usize) voidptr { + unsafe { + if align <= malloc_alignment { + return global.malloc(size) + } else { + return global.memalign(align, size) + } + } +}