add the qsort_r implementation from musl (reduced pathological performance degradations)
							parent
							
								
									4535b5edc2
								
							
						
					
					
						commit
						7993ff295e
					
				| 
						 | 
				
			
			@ -0,0 +1,210 @@
 | 
			
		|||
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
static inline int a_ctz_32(uint32_t x) {
 | 
			
		||||
	static const char debruijn32[32] = { 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13, 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14 };
 | 
			
		||||
	return debruijn32[(x&-x)*0x076be629 >> 27];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int a_ctz_64(uint64_t x) {
 | 
			
		||||
	static const char debruijn64[64] = {
 | 
			
		||||
		0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
 | 
			
		||||
		62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
 | 
			
		||||
		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
 | 
			
		||||
		51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
 | 
			
		||||
	};
 | 
			
		||||
	if (sizeof(long) < 8) {
 | 
			
		||||
		uint32_t y = x;
 | 
			
		||||
		if (!y) {
 | 
			
		||||
			y = x>>32;
 | 
			
		||||
			return 32 + a_ctz_32(y);
 | 
			
		||||
		}
 | 
			
		||||
		return a_ctz_32(y);
 | 
			
		||||
	}
 | 
			
		||||
	return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline int a_ctz_l(unsigned long x) {
 | 
			
		||||
	return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
typedef int (*vqsort_r_cmpfun)(const void *, const void *, void *);
 | 
			
		||||
 | 
			
		||||
static inline int pntz(size_t p[2]) {
 | 
			
		||||
	int r = a_ctz_l(p[0] - 1);
 | 
			
		||||
	if(r != 0 || (r = 8*sizeof(size_t) + a_ctz_l(p[1])) != 8*sizeof(size_t)) {
 | 
			
		||||
		return r;
 | 
			
		||||
	}
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void cycle(size_t width, unsigned char* ar[], int n) {
 | 
			
		||||
	unsigned char tmp[256];
 | 
			
		||||
	size_t l;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	if(n < 2) {
 | 
			
		||||
		return;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ar[n] = tmp;
 | 
			
		||||
	while(width) {
 | 
			
		||||
		l = sizeof(tmp) < width ? sizeof(tmp) : width;
 | 
			
		||||
		memcpy(ar[n], ar[0], l);
 | 
			
		||||
		for(i = 0; i < n; i++) {
 | 
			
		||||
			memcpy(ar[i], ar[i + 1], l);
 | 
			
		||||
			ar[i] += l;
 | 
			
		||||
		}
 | 
			
		||||
		width -= l;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* shl() and shr() need n > 0 */
 | 
			
		||||
static inline void shl(size_t p[2], int n) {
 | 
			
		||||
	if(n >= 8 * sizeof(size_t)) {
 | 
			
		||||
		n -= 8 * sizeof(size_t);
 | 
			
		||||
		p[1] = p[0];
 | 
			
		||||
		p[0] = 0;
 | 
			
		||||
	}
 | 
			
		||||
	p[1] <<= n;
 | 
			
		||||
	p[1] |= p[0] >> (sizeof(size_t) * 8 - n);
 | 
			
		||||
	p[0] <<= n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void shr(size_t p[2], int n) {
 | 
			
		||||
	if(n >= 8 * sizeof(size_t)) {
 | 
			
		||||
		n -= 8 * sizeof(size_t);
 | 
			
		||||
		p[0] = p[1];
 | 
			
		||||
		p[1] = 0;
 | 
			
		||||
	}
 | 
			
		||||
	p[0] >>= n;
 | 
			
		||||
	p[0] |= p[1] << (sizeof(size_t) * 8 - n);
 | 
			
		||||
	p[1] >>= n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void sift(unsigned char *head, size_t width, vqsort_r_cmpfun cmp, void *arg, int pshift, size_t lp[]) {
 | 
			
		||||
	unsigned char *rt, *lf;
 | 
			
		||||
	unsigned char *ar[14 * sizeof(size_t) + 1];
 | 
			
		||||
	int i = 1;
 | 
			
		||||
 | 
			
		||||
	ar[0] = head;
 | 
			
		||||
	while(pshift > 1) {
 | 
			
		||||
		rt = head - width;
 | 
			
		||||
		lf = head - width - lp[pshift - 2];
 | 
			
		||||
 | 
			
		||||
		if(cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0) {
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		if(cmp(lf, rt, arg) >= 0) {
 | 
			
		||||
			ar[i++] = lf;
 | 
			
		||||
			head = lf;
 | 
			
		||||
			pshift -= 1;
 | 
			
		||||
		} else {
 | 
			
		||||
			ar[i++] = rt;
 | 
			
		||||
			head = rt;
 | 
			
		||||
			pshift -= 2;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	cycle(width, ar, i);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void trinkle(unsigned char *head, size_t width, vqsort_r_cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[]) {
 | 
			
		||||
	unsigned char *stepson, *rt, *lf;
 | 
			
		||||
	size_t p[2];
 | 
			
		||||
	unsigned char *ar[14 * sizeof(size_t) + 1];
 | 
			
		||||
	int i = 1;
 | 
			
		||||
	int trail;
 | 
			
		||||
 | 
			
		||||
	p[0] = pp[0];
 | 
			
		||||
	p[1] = pp[1];
 | 
			
		||||
 | 
			
		||||
	ar[0] = head;
 | 
			
		||||
	while(p[0] != 1 || p[1] != 0) {
 | 
			
		||||
		stepson = head - lp[pshift];
 | 
			
		||||
		if(cmp(stepson, ar[0], arg) <= 0) {
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
		if(!trusty && pshift > 1) {
 | 
			
		||||
			rt = head - width;
 | 
			
		||||
			lf = head - width - lp[pshift - 2];
 | 
			
		||||
			if(cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		ar[i++] = stepson;
 | 
			
		||||
		head = stepson;
 | 
			
		||||
		trail = pntz(p);
 | 
			
		||||
		shr(p, trail);
 | 
			
		||||
		pshift += trail;
 | 
			
		||||
		trusty = 0;
 | 
			
		||||
	}
 | 
			
		||||
	if(!trusty) {
 | 
			
		||||
		cycle(width, ar, i);
 | 
			
		||||
		sift(head, width, cmp, arg, pshift, lp);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void vqsort_r(void *base, size_t nel, size_t width, vqsort_r_cmpfun cmp, void *arg) {
 | 
			
		||||
	size_t lp[12*sizeof(size_t)];
 | 
			
		||||
	size_t i, size = width * nel;
 | 
			
		||||
	unsigned char *head, *high;
 | 
			
		||||
	size_t p[2] = {1, 0};
 | 
			
		||||
	int pshift = 1;
 | 
			
		||||
	int trail;
 | 
			
		||||
 | 
			
		||||
	if (!size) return;
 | 
			
		||||
 | 
			
		||||
	head = base;
 | 
			
		||||
	high = head + size - width;
 | 
			
		||||
 | 
			
		||||
	/* Precompute Leonardo numbers, scaled by element width */
 | 
			
		||||
	for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
 | 
			
		||||
 | 
			
		||||
	while(head < high) {
 | 
			
		||||
		if((p[0] & 3) == 3) {
 | 
			
		||||
			sift(head, width, cmp, arg, pshift, lp);
 | 
			
		||||
			shr(p, 2);
 | 
			
		||||
			pshift += 2;
 | 
			
		||||
		} else {
 | 
			
		||||
			if(lp[pshift - 1] >= high - head) {
 | 
			
		||||
				trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 | 
			
		||||
			} else {
 | 
			
		||||
				sift(head, width, cmp, arg, pshift, lp);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if(pshift == 1) {
 | 
			
		||||
				shl(p, 1);
 | 
			
		||||
				pshift = 0;
 | 
			
		||||
			} else {
 | 
			
		||||
				shl(p, pshift - 1);
 | 
			
		||||
				pshift = 1;
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		p[0] |= 1;
 | 
			
		||||
		head += width;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 | 
			
		||||
 | 
			
		||||
	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
 | 
			
		||||
		if(pshift <= 1) {
 | 
			
		||||
			trail = pntz(p);
 | 
			
		||||
			shr(p, trail);
 | 
			
		||||
			pshift += trail;
 | 
			
		||||
		} else {
 | 
			
		||||
			shl(p, 2);
 | 
			
		||||
			pshift -= 2;
 | 
			
		||||
			p[0] ^= 7;
 | 
			
		||||
			shr(p, 1);
 | 
			
		||||
			trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp);
 | 
			
		||||
			shl(p, 1);
 | 
			
		||||
			p[0] |= 1;
 | 
			
		||||
			trinkle(head - width, width, cmp, arg, p, pshift, 1, lp);
 | 
			
		||||
		}
 | 
			
		||||
		head -= width;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,3 @@
 | 
			
		|||
 | 
			
		||||
typedef int (*vqsort_r_cmpfun)(const void *, const void *, void *);
 | 
			
		||||
void vqsort_r(void *base, size_t nel, size_t width, vqsort_r_cmpfun cmp, void *arg);
 | 
			
		||||
| 
						 | 
				
			
			@ -1,4 +0,0 @@
 | 
			
		|||
module builtin
 | 
			
		||||
 | 
			
		||||
fn C.qsort_r(base voidptr, items usize, item_size usize, context voidptr, cb C.qsort_r_bsd_callback_func_context)
 | 
			
		||||
fn C.qsort_s(base voidptr, items usize, item_size usize, cb C.qsort_s_iso_callback_func_context, context voidptr)
 | 
			
		||||
| 
						 | 
				
			
			@ -1,3 +0,0 @@
 | 
			
		|||
module builtin
 | 
			
		||||
 | 
			
		||||
fn C.qsort_r(base voidptr, items usize, item_size usize, cb C.qsort_r_gnu_callback_func_context, context voidptr)
 | 
			
		||||
| 
						 | 
				
			
			@ -1,3 +0,0 @@
 | 
			
		|||
module builtin
 | 
			
		||||
 | 
			
		||||
fn C.qsort_s(base voidptr, items usize, item_size usize, cb C.qsort_s_windows_callback_func_context, context voidptr)
 | 
			
		||||
| 
						 | 
				
			
			@ -1,5 +1,8 @@
 | 
			
		|||
module builtin
 | 
			
		||||
 | 
			
		||||
#include "@VEXEROOT/thirdparty/musl_qsort/vqsort_r.h"
 | 
			
		||||
#flag @VEXEROOT/thirdparty/musl_qsort/vqsort_r.o
 | 
			
		||||
 | 
			
		||||
// vstrlen returns the V length of the C string `s` (0 terminator is not counted).
 | 
			
		||||
// The C string is expected to be a &byte pointer.
 | 
			
		||||
[inline; unsafe]
 | 
			
		||||
| 
						 | 
				
			
			@ -65,52 +68,15 @@ pub fn vmemset(s voidptr, c int, n int) voidptr {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
type FnSortCB = fn (const_a voidptr, const_b voidptr) int
 | 
			
		||||
 | 
			
		||||
type FnSortContextCB = fn (const_a voidptr, const_b voidptr, context voidptr) int
 | 
			
		||||
fn C.vqsort_r(base voidptr, nel usize, width usize, cb FnSortContextCB, context voidptr)
 | 
			
		||||
 | 
			
		||||
[inline; unsafe]
 | 
			
		||||
fn vqsort(base voidptr, nmemb usize, size usize, sort_cb FnSortCB) {
 | 
			
		||||
	C.qsort(base, nmemb, size, voidptr(sort_cb))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct VIndirectQSortContext {
 | 
			
		||||
mut:
 | 
			
		||||
	real_context voidptr
 | 
			
		||||
	real_sort_cb FnSortContextCB
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// GLIBC:
 | 
			
		||||
// void qsort_r(void *base, size_t nmemb, size_t size,
 | 
			
		||||
//                   int (*compar)(const void *, const void *, void *),
 | 
			
		||||
//                   void *arg);
 | 
			
		||||
fn vqsort_context_pure_v(base voidptr, nmemb usize, size usize, sort_cb FnSortContextCB, context voidptr) {
 | 
			
		||||
	C.qsort_r(base, nmemb, size, voidptr(sort_cb), context)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
[inline; unsafe]
 | 
			
		||||
fn vqsort_context(base voidptr, nmemb usize, size usize, sort_cb FnSortContextCB, context voidptr) {
 | 
			
		||||
	// See https://stackoverflow.com/questions/39560773/different-declarations-of-qsort-r-on-mac-and-linux
 | 
			
		||||
	// ... and https://xkcd.com/927/ :-|
 | 
			
		||||
	$if linux {
 | 
			
		||||
		vqsort_context_pure_v(base, nmemb, size, sort_cb, context)
 | 
			
		||||
		// C.qsort_r(base, nmemb, size, voidptr(sort_cb), context)
 | 
			
		||||
	} $else {
 | 
			
		||||
		ic := VIndirectQSortContext{
 | 
			
		||||
			real_context: context
 | 
			
		||||
			real_sort_cb: sort_cb
 | 
			
		||||
		}
 | 
			
		||||
		$if windows {
 | 
			
		||||
			cb := fn (context &VIndirectQSortContext, const_a voidptr, const_b voidptr) int {
 | 
			
		||||
				return context.real_sort_cb(const_a, const_b, context.real_context)
 | 
			
		||||
			}
 | 
			
		||||
			C.qsort_s(base, nmemb, size, voidptr(cb), &ic)
 | 
			
		||||
		} $else {
 | 
			
		||||
			// macos, BSDs, probably other unixes too:
 | 
			
		||||
			cb := fn (context &VIndirectQSortContext, const_a voidptr, const_b voidptr) int {
 | 
			
		||||
				return context.real_sort_cb(const_a, const_b, context.real_context)
 | 
			
		||||
			}
 | 
			
		||||
			C.qsort_r(base, nmemb, size, &ic, voidptr(cb))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	C.vqsort_r(base, nmemb, size, voidptr(sort_cb), context)
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,12 +29,14 @@ fn test_sort_with_compare() {
 | 
			
		|||
	dump(a)
 | 
			
		||||
	dump(context)
 | 
			
		||||
	assert a == ['1', '3', '5', 'hi']
 | 
			
		||||
	
 | 
			
		||||
	assert context.comparisons == [
 | 
			
		||||
		'a: "5" | b: "hi"',
 | 
			
		||||
		'a: "hi" | b: "1"',
 | 
			
		||||
		'a: "5" | b: "3"',
 | 
			
		||||
		'a: "1" | b: "3"',
 | 
			
		||||
		'a: "hi" | b: "3"',
 | 
			
		||||
		'a: "hi" | b: "5"',
 | 
			
		||||
		'a: "3" | b: "5"',
 | 
			
		||||
		'a: "5" | b: "1"',
 | 
			
		||||
		'a: "3" | b: "1"'
 | 
			
		||||
	]
 | 
			
		||||
	//
 | 
			
		||||
	mut already_sorted_context := Context{}
 | 
			
		||||
| 
						 | 
				
			
			@ -43,9 +45,9 @@ fn test_sort_with_compare() {
 | 
			
		|||
	dump(already_sorted_context)
 | 
			
		||||
	assert context != already_sorted_context
 | 
			
		||||
	assert already_sorted_context.comparisons == [
 | 
			
		||||
		'a: "1" | b: "3"',
 | 
			
		||||
		'a: "5" | b: "1"',
 | 
			
		||||
		'a: "5" | b: "3"',
 | 
			
		||||
		'a: "5" | b: "hi"',
 | 
			
		||||
		'a: "1" | b: "5"',
 | 
			
		||||
		'a: "3" | b: "5"',
 | 
			
		||||
		'a: "1" | b: "3"'
 | 
			
		||||
	]
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -478,10 +478,6 @@ const c_headers = c_helper_macros + c_unsigned_comparison_functions + c_common_m
 | 
			
		|||
	r'
 | 
			
		||||
// c_headers
 | 
			
		||||
typedef int (*qsort_callback_func)(const void*, const void*);
 | 
			
		||||
typedef int (*qsort_s_iso_callback_func_context)(const void*, const void*, void*); // ISO C11, *optional* Annex K
 | 
			
		||||
typedef int (*qsort_s_windows_callback_func_context)(void*, const void*, const void*); // Windows
 | 
			
		||||
typedef int (*qsort_r_gnu_callback_func_context)(const void*, const void*, void*); // Linux, GNU
 | 
			
		||||
typedef int (*qsort_r_bsd_callback_func_context)(void*, const void*, const void*); // MacOS, BSD
 | 
			
		||||
#include <stdio.h>  // TODO remove all these includes, define all function signatures and types manually
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue