diff --git a/.gitignore b/.gitignore index 7eb3a552a8..1bc6bf6822 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ vdbg.exe *.dll *.lib *.bak +*.dylib a.out .noprefix.vrepl_temp diff --git a/vlib/math/big/integer.v b/vlib/math/big/integer.v index fb0f5ef234..76da26ce02 100644 --- a/vlib/math/big/integer.v +++ b/vlib/math/big/integer.v @@ -761,3 +761,71 @@ pub fn (a Integer) isqrt() Integer { } return result } + +[inline] +fn bi_min(a Integer, b Integer) Integer { + return if a < b { a } else { b } +} + +[inline] +fn bi_max(a Integer, b Integer) Integer { + return if a > b { a } else { b } +} + +[direct_array_access] +fn (bi Integer) msb() u32 { + for idx := 0; idx < bi.digits.len; idx += 1 { + word := bi.digits[idx] + if word > 0 { + return u32((idx * 32) + bits.trailing_zeros_32(word)) + } + } + return u32(32) +} + +// Greatest-Common-Divisor https://en.wikipedia.org/wiki/Binary_GCD_algorithm +// The code below follows the 2013-christmas-special by D. Lemire & R. Corderoy +// https://en.algorithmica.org/hpc/analyzing-performance/gcd/ +// +// discussion & further info https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/ + +pub fn (x Integer) gcd_binary(y Integer) Integer { + // Since standard-euclid-gcd is much faster on smaller sizes 4-8-Byte. + // In such a case, one could delegate back to big.Integer.gcd() + // Uncomment below and a all long-long goes to euclid-gcd. + // + // if x.digits.len + y.digits.len <= 4 { + // return x.gcd( y ) + // } + + if x.signum == 0 { + return y.abs() + } + if y.signum == 0 { + return x.abs() + } + + if x.signum < 0 { + return x.neg().gcd(y) + } + if y.signum < 0 { + return x.gcd(y.neg()) + } + + mut a := x + mut b := y + + mut az := a.msb() + bz := b.msb() + shift := util.umin(az, bz) + b = b.rshift(bz) + + for a.signum != 0 { + a = a.rshift(az) + diff := b - a + az = diff.msb() + b = bi_min(a, b) + a = diff.abs() + } + return b.lshift(shift) +} diff --git a/vlib/v/tests/bench/math_big_gcd/bench_euclid.v b/vlib/v/tests/bench/math_big_gcd/bench_euclid.v new file mode 100644 index 0000000000..68b089d23e --- /dev/null +++ b/vlib/v/tests/bench/math_big_gcd/bench_euclid.v @@ -0,0 +1,339 @@ +module main + +// NB: this benchmark is preferable to be compiled with: `v -prod -cg -gc boehm bench_euclid.v` +import math.big +import benchmark +import os +import v.tests.bench.math_big_gcd.prime { + DataI, + PrimeCfg, + PrimeSet, +} + +interface TestDataI { + r big.Integer + aa big.Integer + bb big.Integer +} + +type GCDSet = PrimeSet +type Clocks = map[string]benchmark.Benchmark + +const ( + empty_set = GCDSet{'1', '1', '1'} + with_dots = false +) + +fn main() { + fp := os.join_path(@VROOT, prime.toml_path) + if !prime_file_exists(fp) { + panic('expected file |$fp| - not found.') + } + + mut clocks := Clocks(map[string]benchmark.Benchmark{}) + + for algo in [ + 'euclid', + 'gcd_binary', + //'u32binary', + //'u64binary' + ] { + clocks[algo] = benchmark.new_benchmark() + } + + // any test-prime-set needs to pass this predicate + // before used in the benchmark. + // + predicate_fn := fn (ps PrimeSet) bool { + cast_bi := bi_from_decimal_string + + r := cast_bi(ps.r) + aa := r * cast_bi(ps.a) + bb := r * cast_bi(ps.b) + gcd := aa.gcd(bb) + + return if [ + gcd != big.one_int, + gcd == bb.gcd(aa), + gcd == r, + aa != bb, + (aa % gcd) == big.zero_int, + (bb % gcd) == big.zero_int, + ].all(it == true) + { true } else { false } + } + + cfgs := [ + // root-prime a-prime b-prime + ['s.3', 'xs.3', 's'], + ['s.10', 's.10', 'm.all'], + ['m.9', 's.10', 'xs'], + ['l.40', 'xs.10', 's.5'], + ['ml.20', 'm', 's.15'], + ['xl', 's.10', 'l.15'], + ['xxl', 'l.10', 'xl'], + ['l.30', 'm.10', 'xxl'], + ['xxl', 'xl', 'm.15'], + ['mega', 'xl.6', 's'], + ['xxl', 'xxxl.10', 'm.30'], + ['mega', 'xxxl', 'mega'], + ['giga.5', 'mega', 'giga'], + ['crazy', 'mega', 'giga'], + ['s', 'biggest', 'crazy'], + ['biggest', 'crazy', 'giga'], + ].map(PrimeCfg{it[0], it[1], it[2]}) + + println('\n$cfgs.len x Tests') + + for i, prime_cfg in cfgs { + println('\n#-${i + 1}(Stack) "$prime_cfg"') + bench_euclid_vs_binary(prime_cfg, false, predicate_fn, mut clocks) + + // just-to-be-sure, but makes no difference in this test. + // println('\n#-${i + 1}(Heap) "$prime_cfg"') + // bench_euclid_vs_binary(prime_cfg, true, predicate_fn, mut clocks) + } + println('') + for _, mut clock in clocks { + clock.stop() + } + + println(clocks['euclid'].total_message('both algorithms ')) + + msg := [ + 'Seems to me as if euclid in big.Integer.gcd() performs better on ', + 'very-small-integers up to 8-byte/u64. The tests #-1..5 show this.', + 'The gcd_binary-algo seems to be perform better, the larger the numbers/buffers get.', + 'On my machine, i see consistent gains between ~10-30-percent with :', + '\n', + 'v -prod -cg -gc boehm bench_euclid.v', + '\n', + 'This test covers multiplied primes up-to a length of 300-char-digits in ', + 'a decimal-string. This equals (188-byte) == 47 x u32-values.', + 'edit/change primes in $prime.toml_path', + 'Improvements & critique are welcome : \n', + 'https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/', + ].join('\n') + + println(msg) +} + +fn run_benchmark(data []DataI, heap bool, mut clocks Clocks) bool { + mut testdata := []TestDataI{} + for elem in data { + // if elem is StackData || elem is HeapData{ + // testdata << elem + // } + // TODO: this reads strange + if elem is StackData { + testdata << elem + } + if elem is HeapData { + testdata << elem + } + } + // some statistics + // + mut tmp := []int{cap: testdata.len * 3} + for set in testdata { + for prime in [set.r, set.aa, set.bb] { + bi, _ := prime.bytes() + tmp << bi_buffer_len(bi) + } + } + tmp.sort() + min_byte := tmp.first() * 4 + max_byte := tmp.last() * 4 + mut buffer_space := 0 + for tmp.len != 0 { + buffer_space += tmp.pop() + } + + // trying to balance prime-size and item-count + // minimum rounds is 100-times + // + mut rounds := 2000 / ((3 * buffer_space) / testdata.len) + rounds = if rounds < 100 { 100 } else { rounds } + + ratio := (buffer_space * 4) / (testdata.len * 3) + + msg := [ + 'avg-$ratio-byte/Prime, $min_byte-byte < Prime < $max_byte-byte \n', + '~${buffer_space * 4 / 1024}-Kb-', + if heap { 'Heap' } else { 'Stack' }, + '-space for $testdata.len-items x ', + '$rounds-rounds', + ].join('') + println(msg) + + mut cycles := 0 + for algo, mut clock in clocks { + cycles = 0 + clock.step() + for cycles < rounds { + for set in testdata { + match algo { + 'euclid' { + if set.r != set.aa.gcd(set.bb) { + eprintln('$algo failed ?') + clock.fail() + break + } + } + 'gcd_binary' { + if set.r != set.aa.gcd_binary(set.bb) { + eprintln('$algo failed ?') + clock.fail() + break + } + } + else { + eprintln('unknown algo was "$algo"') + continue + } + } + } // eo-for over testdata + if with_dots { + print('.') + } + cycles += 1 + } // eo-for cycles + if with_dots { + println('') + } + clock.ok() + clock.measure(algo) + } // eo-for-loop over algo, clock + + return true +} + +fn bench_euclid_vs_binary(test_config PrimeCfg, heap bool, predicate_fn fn (ps PrimeSet) bool, mut clocks Clocks) bool { + testprimes := prime.random_set(test_config) or { panic(err) } + + // validate the test-data + // + gcd_primes := testprimes.map(prepare_and_test_gcd(it, predicate_fn)).filter(it != empty_set) + + // just to make sure all generated primes are sane + // + assert gcd_primes.len == testprimes.len + + // casting the decimal-strings into big.Integers + // here to avoid measuring string-parsing-cycles + // during later testing. + // + mut casted_sets := if heap { gcd_primes.map(unsafe { + DataI(&PrimeSet(&it)).cast() + }) } else { gcd_primes.map(unsafe { + DataI(&PrimeSet(&it)).cast() + }) } + + // ready use the primes in the benchmark + // + return run_benchmark(casted_sets, heap, mut clocks) +} + +fn prepare_and_test_gcd(primeset PrimeSet, test fn (ps PrimeSet) bool) GCDSet { + if !primeset.predicate(test) { + eprintln('? Corrupt Testdata was ?') + dump(primeset) + return empty_set // {'1', '1', '1'} + } + + cast_bi := bi_from_decimal_string + + r := cast_bi(primeset.r) + aa := cast_bi(primeset.a) * r + bb := cast_bi(primeset.b) * r + gcd := aa.gcd(bb) + + return GCDSet{'$gcd', '$aa', '$bb'} +} + +fn prime_file_exists(path string) bool { + return os.is_readable(path) +} + +// bi_from_decimal_string converts a string-of-decimals into +// a math.big.Integer using the big.Integers 'from_radix-fn' +// +pub fn bi_from_decimal_string(s string) big.Integer { + return big.integer_from_radix(s, u32(10)) or { + msg := [ + 'Cannot convert prime from decimal-string.', + 'prime was : "$s"\n', + ].join('\n') + panic(msg) + } +} + +// need the bi.digits.len - during test only - to calculate +// the size of big.Integers-buffer +// +fn bi_buffer_len(input []byte) int { + if input.len == 0 { + return 0 + } + // pad input + mut padded_input := []byte{len: ((input.len + 3) & ~0x3) - input.len, cap: (input.len + 3) & ~0x3, init: 0x0} + padded_input << input + mut digits := []u32{len: padded_input.len / 4} + // combine every 4 bytes into a u32 and insert into n.digits + for i := 0; i < padded_input.len; i += 4 { + x3 := u32(padded_input[i]) + x2 := u32(padded_input[i + 1]) + x1 := u32(padded_input[i + 2]) + x0 := u32(padded_input[i + 3]) + val := (x3 << 24) | (x2 << 16) | (x1 << 8) | x0 + digits[(padded_input.len - i) / 4 - 1] = val + } + return digits.len +} + +[heap] +pub struct HeapData { +pub mut: + r big.Integer + aa big.Integer + bb big.Integer +} + +pub fn (hd HeapData) to_primeset() PrimeSet { + return PrimeSet{ + r: '$hd.r' + a: '$hd.aa' + b: '$hd.bb' + } +} + +pub fn (hd HeapData) from_primeset(p PrimeSet) DataI { + return DataI(HeapData{ + r: bi_from_decimal_string(p.r) + aa: bi_from_decimal_string(p.a) + bb: bi_from_decimal_string(p.b) + }) +} + +pub struct StackData { +pub mut: + r big.Integer + aa big.Integer + bb big.Integer +} + +pub fn (sd StackData) to_primeset() PrimeSet { + return PrimeSet{ + r: '$sd.r' + a: '$sd.aa' + b: '$sd.bb' + } +} + +pub fn (sd StackData) from_primeset(p PrimeSet) DataI { + return DataI(StackData{ + r: bi_from_decimal_string(p.r) + aa: bi_from_decimal_string(p.a) + bb: bi_from_decimal_string(p.b) + }) +} diff --git a/vlib/v/tests/bench/math_big_gcd/prime/maker.v b/vlib/v/tests/bench/math_big_gcd/prime/maker.v new file mode 100644 index 0000000000..04b70449ba --- /dev/null +++ b/vlib/v/tests/bench/math_big_gcd/prime/maker.v @@ -0,0 +1,177 @@ +module prime + +import rand +import toml +import os + +pub const toml_path = 'vlib/v/tests/bench/math_big_gcd/primes.toml' + +pub interface DataI { + to_primeset() PrimeSet + from_primeset(PrimeSet) DataI +} + +pub fn (di DataI) cast() DataI { + return T{}.from_primeset(di.to_primeset()) +} + +pub type PrimeCfg = PrimeSet + +pub fn (pc PrimeCfg) short() string { + return "r: '$pc.r' a: '$pc.a' b: '$pc.b'" +} + +[heap] +pub struct PrimeSet { +pub mut: + r string [required] + a string [required] + b string [required] +} + +pub fn (p PrimeSet) to_primeset() PrimeSet { + return p +} + +pub fn (p PrimeSet) from_primeset(ps PrimeSet) DataI { + return ps +} + +pub fn (p PrimeSet) predicate(pred fn (data PrimeSet) bool) bool { + return pred(p) +} + +pub fn (p PrimeSet) key() string { + return [p.r, p.a, p.b].join('.') +} + +pub fn (p PrimeSet) str() string { + return [p.r, p.a, p.b].join(' ') +} + +fn extract_count(s string) int { + digits := '0123456789'.split('') + if (s == '') || !s.split('').any(it in digits) { + return 0 + } + ds := s.split('').filter(it in digits) + return ds.join('').int() +} + +// sizes lists the available names for prime-number sections. +pub fn sizes() []string { + primes := read_toml_file() + return primes.keys() +} + +// usage returns section-names and the count of available primes +// +pub fn usage() string { + primes := read_toml_file() + return sizes().map('$it[..${primes[it].len}]').join('\n\t') +} + +// reads the Map[string] []string from disk +// and returns the parsed content +fn read_toml_file() map[string][]string { + fp := os.join_path(@VROOT, prime.toml_path) + + tm_doc := toml.parse_file(fp) or { + err_msg := 'expected $fp' + eprintln(err_msg) + panic(err) + } + // TODO what happens if this goes wrong ? + tm_primes := tm_doc.value('primes') as map[string]toml.Any + + msg := 'expected a map[string][]string in TOML-data ? corrupt ?' + mut p := map[string][]string{} + for k in tm_primes.keys() { + p[k] = []string{} + arr := tm_primes[k] or { panic(msg) } + for _, elem in arr.array() { + p[k] << elem as string + } + } + return p +} + +pub fn random_list(cfg []string) []string { + primes := read_toml_file() + + mut p_list := []string{} + match cfg.len { + 1 { // prime-size e.g. 'xs' given + if cfg[0] !in primes { + return p_list + } else { + return primes[cfg[0]] + } + } + 2 { // prime-size and limiter given e.g 'xs.15' + prime_size := cfg[0] + if prime_size !in primes { + return p_list + } + + mut prime_count := extract_count(cfg[1]) + if prime_count == 0 { + return primes[prime_size] + } + mut num := '' + for prime_count != 0 { + num = primes[prime_size][rand.int_in_range(0, primes[prime_size].len - 1)] + if num in p_list { + continue + } + p_list << num + prime_count -= 1 + if prime_count >= primes[prime_size].len { + p_list = primes[prime_size] + break + } + } // eo-for + return p_list + } // cfg not understood + else { + return p_list + } + } + return p_list +} + +pub fn random_set(cfg PrimeCfg) ?[]PrimeSet { + p_lists := [ + cfg.r.split('.'), + cfg.a.split('.'), + cfg.b.split('.'), + ].map(random_list(it.map(it.trim_space().to_lower()))) + + // test for empty lists + // + if p_lists.any(it.len == 0) { + msg := [ + 'bad config was :\n\n"$cfg"', + "maybe try e.g { r: 'l.5' a: 'xxl.5 b: 'xl.10' } makes a set of 250", + 'your config was { $cfg.short() }', + 'sizes $usage()\n', + ].join('\n\n') + + return error(msg) + } + // filter unique combinations thru map + // + mut tmp := map[string]PrimeSet{} + for r in p_lists[0] { + for a in p_lists[1] { + for b in p_lists[2] { + d := PrimeSet{r, a, b} + if d.key() in tmp { + continue + } + tmp[d.key()] = d + } + } + } + return tmp.keys().map(tmp[it]) +} diff --git a/vlib/v/tests/bench/math_big_gcd/primes.toml b/vlib/v/tests/bench/math_big_gcd/primes.toml new file mode 100644 index 0000000000..10ee54e9c2 --- /dev/null +++ b/vlib/v/tests/bench/math_big_gcd/primes.toml @@ -0,0 +1,48 @@ +[primes] +xs = [ + "3", "5", "7", "11", "13", "17", "19", "21", "23", "27", "29", "31", "37", "41", "43" + ] +s = [ + "7823", "5419", "1889", "7213", "9817", "6521", "3299", "3709", "4217", "5737", "8753", "9187", "7013", "7951", "8647", "2089", "9241", "4993", "2819", "6469", "7879", "2341", "4733", "3259", "9697", "4373", "4231", "3001", "6577", "6449", "8783", "3947", "7687", "3221", "3449", "5527", "7411", "4339", "9461", "2909", "5449", "5843", "9007", "9461", "5419", "3571", "4133", "5417", "9043" + ] +m = [ + "55399", "57287", "53609", "13397", "53051", "95009", "44257", "15263", "47149", "53791", "67219", "35339", "92789", "40751", "27827", "89867", "45953", "81041", "34847", "66359", "73291", "31397", "19577", "31081", "73613", "57413", "42331", "61057", "68399", "21937", "15559", "31667", "39161", "12011", "32237", "28411", "43207", "17957", "60611", "61781", "61363", "92779", "87671", "68711", "59419" + ] +ml = [ + "3197239", "4777723", "5634029", "5345227", "3775663", "2577733", "2484731", "9489157", "9795311", "8807549", "4211929", "2192143", "1790611", "6124379", "7110449", "3423877", "3104219", "7823597", "4007569", "9538877", "9973409", "1337753", "2981431", "2052977", "3881461", "3107569", "6681733", "7208623", "6121067", "7024543", "7781927", "1355129", "3668941", "6326527", "8262383", "9656587", "8881111", "8026561", "8561011", "2398367", "8479967", "9903059" + ] +l = [ + "912325499", "724443641", "385137293", "926358929", "890097763", "130750339", "116017079", "144416561", "407856209", "410030717", "197927203", "202551331", "977001847", "538242283", "257127457", "818698583", "752243881", "596336339", "557356187", "240508001", "387269011", "456025643", "877258717", "369868633", "362001583", "864195281", "599680687", "215741453", "804623063", "938568131", "404699117", "255340313", "888734827" + ] +xl = [ + "4574666035183", "8417547875059", "4570443646591", "5048274773407", "7164005295869", "4518845199877", "5614561710673", "5260860119369", "7543626839449", "7276541368879", "7214431178011" + ] +xxl = [ + "835105110050347323134847370069", "143404874801173091148814190773", "201127251710413955312944973641", "404730213474815811826728006169", "667169667959358791072930585279", "501481765763667164242641192419", "446412154680934394834729305967", "460686874458826922684722184083", "830222457872535605976269306419", "185066433124965184224530740781" + ] +xxxl = [ + "41538632149409670203875645951895100583020903271493", "55019010857476970540160613582574368926321997435229", "80761049172599658440341257254896253118458575469719", "92617063636548016967671406179456164264446680329889", "40758128168942353395091585296464433019876977918493", "17036082713942833563217660171659128981053237307867" + ] +mega = [ + "198661663026514394979566031544326159107541699547309448720761733220478761329", + "992115042569270219928946682049053165598301867001123059433315541401938225427", + "101119309563131407758796252580471889739272930201455389991048893945940402311", + "516443083987514486594464741392901709033855328978282817026554473797439114541", + "660494613626358425463317297108260219544493771347620531225987686364487648369", + "175165388963515374726669791577365607804663690256056516815884117391457090227", + "121262978328120003250427616443327736340111055737282325346316041430003976093", + "532555459206091912293747167640580582656170020402363753480607569083534637187", + "839484247168370590833838270348781593651940895434421321502494070475339668381" + ] +giga = [ + "2884936882895429808357269729604622214585402525045525076838045027770319174012257013842658348401917179", + "8037083210794932674709412895115006556909092833822611837764114152389681777593745417056345965060910227", + "1910357352360710783480406392361973438148872596091183337139932590856337321762535807846055642301808061" + ] +crazy = [ + "351658062438469364437012934379378697325167334523785230519867682144348475237641332775452277714957257653189294629411183788781622118310946477143255873639", + "418013511717417121492936230539394143156615405871395138061025561400693302897049176138915109523457335116963751456368324397935946348902043658050622087259" + ] +biggest = [ + "477465885805093403826364314122183623487417165049378763285130618473465816825613056800831031290901922120098722325692953433392398177611894890290680254055832803915973853297598146797960159348700184977452096596048789018857150992316047149695774005795966830209155566710937235622005570209535860865999594857311" + ]