module main // NB: this benchmark is preferable to be compiled with: `v -prod -cg -gc boehm bench_euclid.v` import math.big import benchmark import os import v.tests.bench.math_big_gcd.prime { DataI, PrimeCfg, PrimeSet, } interface TestDataI { r big.Integer aa big.Integer bb big.Integer } type GCDSet = PrimeSet type Clocks = map[string]benchmark.Benchmark const ( empty_set = GCDSet{'1', '1', '1'} with_dots = false ) fn main() { fp := os.join_path(@VROOT, prime.toml_path) if !prime_file_exists(fp) { panic('expected file |$fp| - not found.') } mut clocks := Clocks(map[string]benchmark.Benchmark{}) for algo in [ 'euclid', 'gcd_binary', //'u32binary', //'u64binary' ] { clocks[algo] = benchmark.new_benchmark() } // any test-prime-set needs to pass this predicate // before used in the benchmark. // predicate_fn := fn (ps PrimeSet) bool { cast_bi := bi_from_decimal_string r := cast_bi(ps.r) aa := r * cast_bi(ps.a) bb := r * cast_bi(ps.b) gcd := aa.gcd(bb) return if [ gcd != big.one_int, gcd == bb.gcd(aa), gcd == r, aa != bb, (aa % gcd) == big.zero_int, (bb % gcd) == big.zero_int, ].all(it == true) { true } else { false } } cfgs := [ // root-prime a-prime b-prime ['s.3', 'xs.3', 's'], ['s.10', 's.10', 'm.all'], ['m.9', 's.10', 'xs'], ['l.40', 'xs.10', 's.5'], ['ml.20', 'm', 's.15'], ['xl', 's.10', 'l.15'], ['xxl', 'l.10', 'xl'], ['l.30', 'm.10', 'xxl'], ['xxl', 'xl', 'm.15'], ['mega', 'xl.6', 's'], ['xxl', 'xxxl.10', 'm.30'], ['mega', 'xxxl', 'mega'], ['giga.5', 'mega', 'giga'], ['crazy', 'mega', 'giga'], ['s', 'biggest', 'crazy'], ['biggest', 'crazy', 'giga'], ].map(PrimeCfg{it[0], it[1], it[2]}) println('\n$cfgs.len x Tests') for i, prime_cfg in cfgs { println('\n#-${i + 1}(Stack) "$prime_cfg"') bench_euclid_vs_binary(prime_cfg, false, predicate_fn, mut clocks) // just-to-be-sure, but makes no difference in this test. // println('\n#-${i + 1}(Heap) "$prime_cfg"') // bench_euclid_vs_binary(prime_cfg, true, predicate_fn, mut clocks) } println('') for _, mut clock in clocks { clock.stop() } println(clocks['euclid'].total_message('both algorithms ')) msg := [ 'Seems to me as if euclid in big.Integer.gcd() performs better on ', 'very-small-integers up to 8-byte/u64. The tests #-1..5 show this.', 'The gcd_binary-algo seems to be perform better, the larger the numbers/buffers get.', 'On my machine, i see consistent gains between ~10-30-percent with :', '\n', 'v -prod -cg -gc boehm bench_euclid.v', '\n', 'This test covers multiplied primes up-to a length of 300-char-digits in ', 'a decimal-string. This equals (188-byte) == 47 x u32-values.', 'edit/change primes in $prime.toml_path', 'Improvements & critique are welcome : \n', 'https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/', ].join('\n') println(msg) } fn run_benchmark(data []DataI, heap bool, mut clocks Clocks) bool { mut testdata := []TestDataI{} for elem in data { // if elem is StackData || elem is HeapData{ // testdata << elem // } // TODO: this reads strange if elem is StackData { testdata << elem } if elem is HeapData { testdata << elem } } // some statistics // mut tmp := []int{cap: testdata.len * 3} for set in testdata { for prime in [set.r, set.aa, set.bb] { bi, _ := prime.bytes() tmp << bi_buffer_len(bi) } } tmp.sort() min_byte := tmp.first() * 4 max_byte := tmp.last() * 4 mut buffer_space := 0 for tmp.len != 0 { buffer_space += tmp.pop() } // trying to balance prime-size and item-count // minimum rounds is 100-times // mut rounds := 2000 / ((3 * buffer_space) / testdata.len) rounds = if rounds < 100 { 100 } else { rounds } ratio := (buffer_space * 4) / (testdata.len * 3) msg := [ 'avg-$ratio-byte/Prime, $min_byte-byte < Prime < $max_byte-byte \n', '~${buffer_space * 4 / 1024}-Kb-', if heap { 'Heap' } else { 'Stack' }, '-space for $testdata.len-items x ', '$rounds-rounds', ].join('') println(msg) mut cycles := 0 for algo, mut clock in clocks { cycles = 0 clock.step() for cycles < rounds { for set in testdata { match algo { 'euclid' { if set.r != set.aa.gcd(set.bb) { eprintln('$algo failed ?') clock.fail() break } } 'gcd_binary' { if set.r != set.aa.gcd_binary(set.bb) { eprintln('$algo failed ?') clock.fail() break } } else { eprintln('unknown algo was "$algo"') continue } } } // eo-for over testdata if with_dots { print('.') } cycles += 1 } // eo-for cycles if with_dots { println('') } clock.ok() clock.measure(algo) } // eo-for-loop over algo, clock return true } fn bench_euclid_vs_binary(test_config PrimeCfg, heap bool, predicate_fn fn (ps PrimeSet) bool, mut clocks Clocks) bool { testprimes := prime.random_set(test_config) or { panic(err) } // validate the test-data // gcd_primes := testprimes.map(prepare_and_test_gcd(it, predicate_fn)).filter(it != empty_set) // just to make sure all generated primes are sane // assert gcd_primes.len == testprimes.len // casting the decimal-strings into big.Integers // here to avoid measuring string-parsing-cycles // during later testing. // mut casted_sets := if heap { gcd_primes.map(unsafe { DataI(&PrimeSet(&it)).cast() }) } else { gcd_primes.map(unsafe { DataI(&PrimeSet(&it)).cast() }) } // ready use the primes in the benchmark // return run_benchmark(casted_sets, heap, mut clocks) } fn prepare_and_test_gcd(primeset PrimeSet, test fn (ps PrimeSet) bool) GCDSet { if !primeset.predicate(test) { eprintln('? Corrupt Testdata was ?') dump(primeset) return empty_set // {'1', '1', '1'} } cast_bi := bi_from_decimal_string r := cast_bi(primeset.r) aa := cast_bi(primeset.a) * r bb := cast_bi(primeset.b) * r gcd := aa.gcd(bb) return GCDSet{'$gcd', '$aa', '$bb'} } fn prime_file_exists(path string) bool { return os.is_readable(path) } // bi_from_decimal_string converts a string-of-decimals into // a math.big.Integer using the big.Integers 'from_radix-fn' // pub fn bi_from_decimal_string(s string) big.Integer { return big.integer_from_radix(s, u32(10)) or { msg := [ 'Cannot convert prime from decimal-string.', 'prime was : "$s"\n', ].join('\n') panic(msg) } } // need the bi.digits.len - during test only - to calculate // the size of big.Integers-buffer // fn bi_buffer_len(input []byte) int { if input.len == 0 { return 0 } // pad input mut padded_input := []byte{len: ((input.len + 3) & ~0x3) - input.len, cap: (input.len + 3) & ~0x3, init: 0x0} padded_input << input mut digits := []u32{len: padded_input.len / 4} // combine every 4 bytes into a u32 and insert into n.digits for i := 0; i < padded_input.len; i += 4 { x3 := u32(padded_input[i]) x2 := u32(padded_input[i + 1]) x1 := u32(padded_input[i + 2]) x0 := u32(padded_input[i + 3]) val := (x3 << 24) | (x2 << 16) | (x1 << 8) | x0 digits[(padded_input.len - i) / 4 - 1] = val } return digits.len } [heap] pub struct HeapData { pub mut: r big.Integer aa big.Integer bb big.Integer } pub fn (hd HeapData) to_primeset() PrimeSet { return PrimeSet{ r: '$hd.r' a: '$hd.aa' b: '$hd.bb' } } pub fn (hd HeapData) from_primeset(p PrimeSet) DataI { return DataI(HeapData{ r: bi_from_decimal_string(p.r) aa: bi_from_decimal_string(p.a) bb: bi_from_decimal_string(p.b) }) } pub struct StackData { pub mut: r big.Integer aa big.Integer bb big.Integer } pub fn (sd StackData) to_primeset() PrimeSet { return PrimeSet{ r: '$sd.r' a: '$sd.aa' b: '$sd.bb' } } pub fn (sd StackData) from_primeset(p PrimeSet) DataI { return DataI(StackData{ r: bi_from_decimal_string(p.r) aa: bi_from_decimal_string(p.a) bb: bi_from_decimal_string(p.b) }) }