math.big: add a new greatest-common-divisor-algo for big.Integer, also add a benchmark for it (#12261)

2021-10-26 10:10:13 +02:00 · 2021-10-26 10:10:13 +02:00 · f14dabc6bd
parent f62b2dcfa7
commit f14dabc6bd
5 changed files with 633 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -20,6 +20,7 @@ vdbg.exe
 *.dll
 *.lib
 *.bak
+*.dylib
 a.out
 .noprefix.vrepl_temp

--- a/vlib/math/big/integer.v
+++ b/vlib/math/big/integer.v
@ -761,3 +761,71 @@ pub fn (a Integer) isqrt() Integer {
 	}
 	return result
 }
+
+[inline]
+fn bi_min(a Integer, b Integer) Integer {
+	return if a < b { a } else { b }
+}
+
+[inline]
+fn bi_max(a Integer, b Integer) Integer {
+	return if a > b { a } else { b }
+}
+
+[direct_array_access]
+fn (bi Integer) msb() u32 {
+	for idx := 0; idx < bi.digits.len; idx += 1 {
+		word := bi.digits[idx]
+		if word > 0 {
+			return u32((idx * 32) + bits.trailing_zeros_32(word))
+		}
+	}
+	return u32(32)
+}
+
+// Greatest-Common-Divisor https://en.wikipedia.org/wiki/Binary_GCD_algorithm
+// The code below follows the 2013-christmas-special by D. Lemire & R. Corderoy
+// https://en.algorithmica.org/hpc/analyzing-performance/gcd/
+//
+// discussion & further info https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/
+
+pub fn (x Integer) gcd_binary(y Integer) Integer {
+	// Since standard-euclid-gcd is much faster on smaller sizes 4-8-Byte.
+	// In such a case, one could delegate back to big.Integer.gcd()
+	// Uncomment below and a all long-long goes to euclid-gcd.
+	//
+	// if x.digits.len + y.digits.len <= 4 {
+	//   return x.gcd( y )
+	// }
+
+	if x.signum == 0 {
+		return y.abs()
+	}
+	if y.signum == 0 {
+		return x.abs()
+	}
+
+	if x.signum < 0 {
+		return x.neg().gcd(y)
+	}
+	if y.signum < 0 {
+		return x.gcd(y.neg())
+	}
+
+	mut a := x
+	mut b := y
+
+	mut az := a.msb()
+	bz := b.msb()
+	shift := util.umin(az, bz)
+	b = b.rshift(bz)
+
+	for a.signum != 0 {
+		a = a.rshift(az)
+		diff := b - a
+		az = diff.msb()
+		b = bi_min(a, b)
+		a = diff.abs()
+	}
+	return b.lshift(shift)
+}
--- a/vlib/v/tests/bench/math_big_gcd/bench_euclid.v
+++ b/vlib/v/tests/bench/math_big_gcd/bench_euclid.v
@ -0,0 +1,339 @@
+module main
+
+// NB: this benchmark is preferable to be compiled with: `v -prod -cg -gc boehm bench_euclid.v`
+import math.big
+import benchmark
+import os
+import v.tests.bench.math_big_gcd.prime {
+	DataI,
+	PrimeCfg,
+	PrimeSet,
+}
+
+interface TestDataI {
+	r big.Integer
+	aa big.Integer
+	bb big.Integer
+}
+
+type GCDSet = PrimeSet
+type Clocks = map[string]benchmark.Benchmark
+
+const (
+	empty_set = GCDSet{'1', '1', '1'}
+	with_dots = false
+)
+
+fn main() {
+	fp := os.join_path(@VROOT, prime.toml_path)
+	if !prime_file_exists(fp) {
+		panic('expected file |$fp| - not found.')
+	}
+
+	mut clocks := Clocks(map[string]benchmark.Benchmark{})
+
+	for algo in [
+		'euclid',
+		'gcd_binary',
+		//'u32binary',
+		//'u64binary'
+	] {
+		clocks[algo] = benchmark.new_benchmark()
+	}
+
+	// any test-prime-set needs to pass this predicate
+	// before used in the benchmark.
+	//
+	predicate_fn := fn (ps PrimeSet) bool {
+		cast_bi := bi_from_decimal_string
+
+		r := cast_bi(ps.r)
+		aa := r * cast_bi(ps.a)
+		bb := r * cast_bi(ps.b)
+		gcd := aa.gcd(bb)
+
+		return if [
+			gcd != big.one_int,
+			gcd == bb.gcd(aa),
+			gcd == r,
+			aa != bb,
+			(aa % gcd) == big.zero_int,
+			(bb % gcd) == big.zero_int,
+		].all(it == true)
+		{ true } else { false }
+	}
+
+	cfgs := [
+		// root-prime    a-prime      b-prime
+		['s.3', 'xs.3', 's'],
+		['s.10', 's.10', 'm.all'],
+		['m.9', 's.10', 'xs'],
+		['l.40', 'xs.10', 's.5'],
+		['ml.20', 'm', 's.15'],
+		['xl', 's.10', 'l.15'],
+		['xxl', 'l.10', 'xl'],
+		['l.30', 'm.10', 'xxl'],
+		['xxl', 'xl', 'm.15'],
+		['mega', 'xl.6', 's'],
+		['xxl', 'xxxl.10', 'm.30'],
+		['mega', 'xxxl', 'mega'],
+		['giga.5', 'mega', 'giga'],
+		['crazy', 'mega', 'giga'],
+		['s', 'biggest', 'crazy'],
+		['biggest', 'crazy', 'giga'],
+	].map(PrimeCfg{it[0], it[1], it[2]})
+
+	println('\n$cfgs.len x Tests')
+
+	for i, prime_cfg in cfgs {
+		println('\n#-${i + 1}(Stack) "$prime_cfg"')
+		bench_euclid_vs_binary(prime_cfg, false, predicate_fn, mut clocks)
+
+		// just-to-be-sure, but makes no difference in this test.
+		// println('\n#-${i + 1}(Heap) "$prime_cfg"')
+		// bench_euclid_vs_binary(prime_cfg, true,  predicate_fn, mut clocks)
+	}
+	println('')
+	for _, mut clock in clocks {
+		clock.stop()
+	}
+
+	println(clocks['euclid'].total_message('both algorithms '))
+
+	msg := [
+		'Seems to me as if euclid in big.Integer.gcd() performs better on ',
+		'very-small-integers up to 8-byte/u64. The tests #-1..5 show this.',
+		'The gcd_binary-algo seems to be perform better, the larger the numbers/buffers get.',
+		'On my machine, i see consistent gains between ~10-30-percent with :',
+		'\n',
+		'v -prod -cg -gc boehm bench_euclid.v',
+		'\n',
+		'This test covers multiplied primes up-to a length of 300-char-digits in ',
+		'a decimal-string. This equals (188-byte) == 47 x u32-values.',
+		'edit/change primes in $prime.toml_path',
+		'Improvements & critique are welcome : \n',
+		'https://lemire.me/blog/2013/12/26/fastest-way-to-compute-the-greatest-common-divisor/',
+	].join('\n')
+
+	println(msg)
+}
+
+fn run_benchmark(data []DataI, heap bool, mut clocks Clocks) bool {
+	mut testdata := []TestDataI{}
+	for elem in data {
+		// if elem is StackData || elem is HeapData{
+		// 	testdata << elem
+		// }
+		// TODO: this reads strange
+		if elem is StackData {
+			testdata << elem
+		}
+		if elem is HeapData {
+			testdata << elem
+		}
+	}
+	// some statistics
+	//
+	mut tmp := []int{cap: testdata.len * 3}
+	for set in testdata {
+		for prime in [set.r, set.aa, set.bb] {
+			bi, _ := prime.bytes()
+			tmp << bi_buffer_len(bi)
+		}
+	}
+	tmp.sort()
+	min_byte := tmp.first() * 4
+	max_byte := tmp.last() * 4
+	mut buffer_space := 0
+	for tmp.len != 0 {
+		buffer_space += tmp.pop()
+	}
+
+	// trying to balance prime-size and item-count
+	// minimum rounds is 100-times
+	//
+	mut rounds := 2000 / ((3 * buffer_space) / testdata.len)
+	rounds = if rounds < 100 { 100 } else { rounds }
+
+	ratio := (buffer_space * 4) / (testdata.len * 3)
+
+	msg := [
+		'avg-$ratio-byte/Prime, $min_byte-byte < Prime < $max_byte-byte \n',
+		'~${buffer_space * 4 / 1024}-Kb-',
+		if heap { 'Heap' } else { 'Stack' },
+		'-space for $testdata.len-items x ',
+		'$rounds-rounds',
+	].join('')
+	println(msg)
+
+	mut cycles := 0
+	for algo, mut clock in clocks {
+		cycles = 0
+		clock.step()
+		for cycles < rounds {
+			for set in testdata {
+				match algo {
+					'euclid' {
+						if set.r != set.aa.gcd(set.bb) {
+							eprintln('$algo failed ?')
+							clock.fail()
+							break
+						}
+					}
+					'gcd_binary' {
+						if set.r != set.aa.gcd_binary(set.bb) {
+							eprintln('$algo failed ?')
+							clock.fail()
+							break
+						}
+					}
+					else {
+						eprintln('unknown algo was "$algo"')
+						continue
+					}
+				}
+			} // eo-for over testdata
+			if with_dots {
+				print('.')
+			}
+			cycles += 1
+		} // eo-for cycles
+		if with_dots {
+			println('')
+		}
+		clock.ok()
+		clock.measure(algo)
+	} // eo-for-loop over algo, clock
+
+	return true
+}
+
+fn bench_euclid_vs_binary(test_config PrimeCfg, heap bool, predicate_fn fn (ps PrimeSet) bool, mut clocks Clocks) bool {
+	testprimes := prime.random_set(test_config) or { panic(err) }
+
+	// validate the test-data
+	//
+	gcd_primes := testprimes.map(prepare_and_test_gcd(it, predicate_fn)).filter(it != empty_set)
+
+	// just to make sure all generated primes are sane
+	//
+	assert gcd_primes.len == testprimes.len
+
+	// casting the decimal-strings into big.Integers
+	// here to avoid measuring string-parsing-cycles
+	// during later testing.
+	//
+	mut casted_sets := if heap { gcd_primes.map(unsafe {
+			DataI(&PrimeSet(&it)).cast<HeapData>()
+		}) } else { gcd_primes.map(unsafe {
+			DataI(&PrimeSet(&it)).cast<StackData>()
+		}) }
+
+	// ready use the primes in the benchmark
+	//
+	return run_benchmark(casted_sets, heap, mut clocks)
+}
+
+fn prepare_and_test_gcd(primeset PrimeSet, test fn (ps PrimeSet) bool) GCDSet {
+	if !primeset.predicate(test) {
+		eprintln('? Corrupt Testdata was ?')
+		dump(primeset)
+		return empty_set // {'1', '1', '1'}
+	}
+
+	cast_bi := bi_from_decimal_string
+
+	r := cast_bi(primeset.r)
+	aa := cast_bi(primeset.a) * r
+	bb := cast_bi(primeset.b) * r
+	gcd := aa.gcd(bb)
+
+	return GCDSet{'$gcd', '$aa', '$bb'}
+}
+
+fn prime_file_exists(path string) bool {
+	return os.is_readable(path)
+}
+
+// bi_from_decimal_string converts a string-of-decimals into
+// a math.big.Integer using the big.Integers 'from_radix-fn'
+//
+pub fn bi_from_decimal_string(s string) big.Integer {
+	return big.integer_from_radix(s, u32(10)) or {
+		msg := [
+			'Cannot convert prime from decimal-string.',
+			'prime was : "$s"\n',
+		].join('\n')
+		panic(msg)
+	}
+}
+
+// need the bi.digits.len - during test only - to calculate
+// the size of big.Integers-buffer
+//
+fn bi_buffer_len(input []byte) int {
+	if input.len == 0 {
+		return 0
+	}
+	// pad input
+	mut padded_input := []byte{len: ((input.len + 3) & ~0x3) - input.len, cap: (input.len + 3) & ~0x3, init: 0x0}
+	padded_input << input
+	mut digits := []u32{len: padded_input.len / 4}
+	// combine every 4 bytes into a u32 and insert into n.digits
+	for i := 0; i < padded_input.len; i += 4 {
+		x3 := u32(padded_input[i])
+		x2 := u32(padded_input[i + 1])
+		x1 := u32(padded_input[i + 2])
+		x0 := u32(padded_input[i + 3])
+		val := (x3 << 24) | (x2 << 16) | (x1 << 8) | x0
+		digits[(padded_input.len - i) / 4 - 1] = val
+	}
+	return digits.len
+}
+
+[heap]
+pub struct HeapData {
+pub mut:
+	r  big.Integer
+	aa big.Integer
+	bb big.Integer
+}
+
+pub fn (hd HeapData) to_primeset() PrimeSet {
+	return PrimeSet{
+		r: '$hd.r'
+		a: '$hd.aa'
+		b: '$hd.bb'
+	}
+}
+
+pub fn (hd HeapData) from_primeset(p PrimeSet) DataI {
+	return DataI(HeapData{
+		r: bi_from_decimal_string(p.r)
+		aa: bi_from_decimal_string(p.a)
+		bb: bi_from_decimal_string(p.b)
+	})
+}
+
+pub struct StackData {
+pub mut:
+	r  big.Integer
+	aa big.Integer
+	bb big.Integer
+}
+
+pub fn (sd StackData) to_primeset() PrimeSet {
+	return PrimeSet{
+		r: '$sd.r'
+		a: '$sd.aa'
+		b: '$sd.bb'
+	}
+}
+
+pub fn (sd StackData) from_primeset(p PrimeSet) DataI {
+	return DataI(StackData{
+		r: bi_from_decimal_string(p.r)
+		aa: bi_from_decimal_string(p.a)
+		bb: bi_from_decimal_string(p.b)
+	})
+}
--- a/vlib/v/tests/bench/math_big_gcd/prime/maker.v
+++ b/vlib/v/tests/bench/math_big_gcd/prime/maker.v
@ -0,0 +1,177 @@
+module prime
+
+import rand
+import toml
+import os
+
+pub const toml_path = 'vlib/v/tests/bench/math_big_gcd/primes.toml'
+
+pub interface DataI {
+	to_primeset() PrimeSet
+	from_primeset(PrimeSet) DataI
+}
+
+pub fn (di DataI) cast<T>() DataI {
+	return T{}.from_primeset(di.to_primeset())
+}
+
+pub type PrimeCfg = PrimeSet
+
+pub fn (pc PrimeCfg) short() string {
+	return "r: '$pc.r' a: '$pc.a' b: '$pc.b'"
+}
+
+[heap]
+pub struct PrimeSet {
+pub mut:
+	r string [required]
+	a string [required]
+	b string [required]
+}
+
+pub fn (p PrimeSet) to_primeset() PrimeSet {
+	return p
+}
+
+pub fn (p PrimeSet) from_primeset(ps PrimeSet) DataI {
+	return ps
+}
+
+pub fn (p PrimeSet) predicate(pred fn (data PrimeSet) bool) bool {
+	return pred(p)
+}
+
+pub fn (p PrimeSet) key() string {
+	return [p.r, p.a, p.b].join('.')
+}
+
+pub fn (p PrimeSet) str() string {
+	return [p.r, p.a, p.b].join(' ')
+}
+
+fn extract_count(s string) int {
+	digits := '0123456789'.split('')
+	if (s == '') || !s.split('').any(it in digits) {
+		return 0
+	}
+	ds := s.split('').filter(it in digits)
+	return ds.join('').int()
+}
+
+// sizes lists the available names for prime-number sections.
+pub fn sizes() []string {
+	primes := read_toml_file()
+	return primes.keys()
+}
+
+// usage returns section-names and the count of available primes
+//
+pub fn usage() string {
+	primes := read_toml_file()
+	return sizes().map('$it[..${primes[it].len}]').join('\n\t')
+}
+
+// reads the Map[string] []string from disk
+// and returns the parsed content
+fn read_toml_file() map[string][]string {
+	fp := os.join_path(@VROOT, prime.toml_path)
+
+	tm_doc := toml.parse_file(fp) or {
+		err_msg := 'expected $fp'
+		eprintln(err_msg)
+		panic(err)
+	}
+	// TODO what happens if this goes wrong ?
+	tm_primes := tm_doc.value('primes') as map[string]toml.Any
+
+	msg := 'expected a map[string][]string in TOML-data ? corrupt ?'
+	mut p := map[string][]string{}
+	for k in tm_primes.keys() {
+		p[k] = []string{}
+		arr := tm_primes[k] or { panic(msg) }
+		for _, elem in arr.array() {
+			p[k] << elem as string
+		}
+	}
+	return p
+}
+
+pub fn random_list(cfg []string) []string {
+	primes := read_toml_file()
+
+	mut p_list := []string{}
+	match cfg.len {
+		1 { // prime-size e.g. 'xs' given
+			if cfg[0] !in primes {
+				return p_list
+			} else {
+				return primes[cfg[0]]
+			}
+		}
+		2 { // prime-size and limiter given e.g 'xs.15'
+			prime_size := cfg[0]
+			if prime_size !in primes {
+				return p_list
+			}
+
+			mut prime_count := extract_count(cfg[1])
+			if prime_count == 0 {
+				return primes[prime_size]
+			}
+			mut num := ''
+			for prime_count != 0 {
+				num = primes[prime_size][rand.int_in_range(0, primes[prime_size].len - 1)]
+				if num in p_list {
+					continue
+				}
+				p_list << num
+				prime_count -= 1
+				if prime_count >= primes[prime_size].len {
+					p_list = primes[prime_size]
+					break
+				}
+			} // eo-for
+			return p_list
+		} // cfg not understood
+		else {
+			return p_list
+		}
+	}
+	return p_list
+}
+
+pub fn random_set(cfg PrimeCfg) ?[]PrimeSet {
+	p_lists := [
+		cfg.r.split('.'),
+		cfg.a.split('.'),
+		cfg.b.split('.'),
+	].map(random_list(it.map(it.trim_space().to_lower())))
+
+	// test for empty lists
+	//
+	if p_lists.any(it.len == 0) {
+		msg := [
+			'bad config was :\n\n"$cfg"',
+			"maybe try e.g { r: 'l.5' a: 'xxl.5 b: 'xl.10' } makes a set of 250",
+			'your config was { $cfg.short() }',
+			'sizes $usage()\n',
+		].join('\n\n')
+
+		return error(msg)
+	}
+	// filter unique combinations thru map
+	//
+	mut tmp := map[string]PrimeSet{}
+	for r in p_lists[0] {
+		for a in p_lists[1] {
+			for b in p_lists[2] {
+				d := PrimeSet{r, a, b}
+				if d.key() in tmp {
+					continue
+				}
+				tmp[d.key()] = d
+			}
+		}
+	}
+	return tmp.keys().map(tmp[it])
+}
--- a/vlib/v/tests/bench/math_big_gcd/primes.toml
+++ b/vlib/v/tests/bench/math_big_gcd/primes.toml
@ -0,0 +1,48 @@
+[primes]
+xs = [
+  "3", "5", "7", "11", "13", "17", "19", "21", "23", "27", "29", "31", "37", "41", "43"
+  ]
+s = [
+  "7823",    "5419",    "1889",    "7213",    "9817",    "6521",    "3299",    "3709",    "4217",    "5737",    "8753",    "9187",    "7013",    "7951",    "8647",    "2089",    "9241",    "4993",    "2819",    "6469",    "7879",    "2341",    "4733",    "3259",    "9697",    "4373",    "4231",    "3001",    "6577",    "6449",    "8783",    "3947",    "7687",    "3221",    "3449",    "5527",    "7411",    "4339",    "9461",    "2909",    "5449",    "5843",    "9007",    "9461",    "5419",    "3571",    "4133",    "5417",    "9043"
+  ]
+m = [
+  "55399",    "57287",    "53609",    "13397",    "53051",    "95009",    "44257",    "15263",    "47149",    "53791",    "67219",    "35339",    "92789",    "40751",    "27827",    "89867",    "45953",    "81041",    "34847",    "66359",    "73291",    "31397",    "19577",    "31081",    "73613",    "57413",    "42331",    "61057",    "68399",    "21937",    "15559",    "31667",    "39161",    "12011",    "32237",    "28411",    "43207",    "17957",    "60611",    "61781",    "61363",    "92779",    "87671",    "68711",    "59419"
+  ]
+ml = [
+  "3197239",    "4777723",    "5634029",    "5345227",    "3775663",    "2577733",    "2484731",    "9489157",    "9795311",    "8807549",    "4211929",    "2192143",    "1790611",    "6124379",    "7110449",    "3423877",    "3104219",    "7823597",    "4007569",    "9538877",    "9973409",    "1337753",    "2981431",    "2052977",    "3881461",    "3107569",    "6681733",    "7208623",    "6121067",    "7024543",    "7781927",    "1355129",    "3668941",    "6326527",    "8262383",    "9656587",    "8881111",    "8026561",    "8561011",    "2398367",    "8479967",    "9903059"
+  ]
+l = [
+  "912325499",    "724443641",    "385137293",    "926358929",    "890097763",    "130750339",    "116017079",    "144416561",    "407856209",    "410030717",    "197927203",    "202551331",    "977001847",    "538242283",    "257127457",    "818698583",    "752243881",    "596336339",    "557356187",    "240508001",    "387269011",    "456025643",    "877258717",    "369868633",    "362001583",    "864195281",    "599680687",    "215741453",    "804623063",    "938568131",    "404699117",    "255340313",    "888734827"
+  ]
+xl = [
+  "4574666035183",    "8417547875059",    "4570443646591",    "5048274773407",    "7164005295869",    "4518845199877",    "5614561710673",    "5260860119369",    "7543626839449",    "7276541368879",    "7214431178011"
+  ]
+xxl = [
+  "835105110050347323134847370069",    "143404874801173091148814190773",    "201127251710413955312944973641",    "404730213474815811826728006169",    "667169667959358791072930585279",    "501481765763667164242641192419",    "446412154680934394834729305967",    "460686874458826922684722184083",    "830222457872535605976269306419",    "185066433124965184224530740781"
+  ]
+xxxl = [
+  "41538632149409670203875645951895100583020903271493",    "55019010857476970540160613582574368926321997435229",    "80761049172599658440341257254896253118458575469719",    "92617063636548016967671406179456164264446680329889",    "40758128168942353395091585296464433019876977918493",    "17036082713942833563217660171659128981053237307867"
+  ]
+mega = [
+  "198661663026514394979566031544326159107541699547309448720761733220478761329",
+  "992115042569270219928946682049053165598301867001123059433315541401938225427",
+  "101119309563131407758796252580471889739272930201455389991048893945940402311",
+  "516443083987514486594464741392901709033855328978282817026554473797439114541",
+  "660494613626358425463317297108260219544493771347620531225987686364487648369",
+  "175165388963515374726669791577365607804663690256056516815884117391457090227",
+  "121262978328120003250427616443327736340111055737282325346316041430003976093",
+  "532555459206091912293747167640580582656170020402363753480607569083534637187",
+  "839484247168370590833838270348781593651940895434421321502494070475339668381"
+  ]
+giga = [
+  "2884936882895429808357269729604622214585402525045525076838045027770319174012257013842658348401917179",
+  "8037083210794932674709412895115006556909092833822611837764114152389681777593745417056345965060910227",
+  "1910357352360710783480406392361973438148872596091183337139932590856337321762535807846055642301808061"
+  ]
+crazy = [
+  "351658062438469364437012934379378697325167334523785230519867682144348475237641332775452277714957257653189294629411183788781622118310946477143255873639",
+  "418013511717417121492936230539394143156615405871395138061025561400693302897049176138915109523457335116963751456368324397935946348902043658050622087259"
+  ]
+biggest = [
+  "477465885805093403826364314122183623487417165049378763285130618473465816825613056800831031290901922120098722325692953433392398177611894890290680254055832803915973853297598146797960159348700184977452096596048789018857150992316047149695774005795966830209155566710937235622005570209535860865999594857311"
+  ]