From 273154c1aef5e0342a38ca0105982e10f3c6efd5 Mon Sep 17 00:00:00 2001
From: Vincent Laisney <vlaisney@zedat.fu-berlin.de>
Date: Thu, 16 Sep 2021 18:31:07 +0200
Subject: [PATCH] math.big: add Newton and Karatsuba algorithms (#11487)

---
 vlib/math/big/array_ops.v               |  27 ++-
 vlib/math/big/division_array_ops.v      |  45 ++---
 vlib/math/big/division_array_ops_test.v |  19 +-
 vlib/math/big/special_array_ops.v       | 228 ++++++++++++++++++++++++
 vlib/math/big/special_array_ops_test.v  | 153 ++++++++++++++++
 5 files changed, 434 insertions(+), 38 deletions(-)
 create mode 100644 vlib/math/big/special_array_ops.v
 create mode 100644 vlib/math/big/special_array_ops_test.v

diff --git a/vlib/math/big/array_ops.v b/vlib/math/big/array_ops.v
index bff4c5d450..92816fe884 100644
--- a/vlib/math/big/array_ops.v
+++ b/vlib/math/big/array_ops.v
@@ -109,10 +109,24 @@ fn subtract_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) {
 	}
 }
 
+const karatsuba_multiplication_limit = 1_000_000
+
+// set limit to choose algorithm
+
+[inline]
+fn multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) {
+	if operand_a.len >= big.karatsuba_multiplication_limit
+		|| operand_b.len >= big.karatsuba_multiplication_limit {
+		karatsuba_multiply_digit_array(operand_a, operand_b, mut storage)
+	} else {
+		simple_multiply_digit_array(operand_a, operand_b, mut storage)
+	}
+}
+
 // Multiplies the unsigned (non-negative) integers represented in a and b and the product is
 // stored in storage. It assumes that storage has length equal to the sum of lengths
 // of a and b. Length refers to length of array, that is, digit count.
-fn multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) {
+fn simple_multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) {
 	for b_index in 0 .. operand_b.len {
 		mut carry := u64(0)
 		for a_index in 0 .. operand_a.len {
@@ -240,6 +254,17 @@ fn divide_array_by_digit(operand_a []u32, divisor u32, mut quotient []u32, mut r
 	}
 }
 
+const newton_division_limit = 10_000
+
+[inline]
+fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) {
+	if operand_a.len >= big.newton_division_limit {
+		newton_divide_array_by_array(operand_a, operand_b, mut quotient, mut remainder)
+	} else {
+		binary_divide_array_by_array(operand_a, operand_b, mut quotient, mut remainder)
+	}
+}
+
 // Shifts the contents of the original array by the given amount of bits to the left.
 // This function assumes that the amount is less than 32. The storage is expected to
 // allocated with zeroes.
diff --git a/vlib/math/big/division_array_ops.v b/vlib/math/big/division_array_ops.v
index b00cda2afa..16b6801bf0 100644
--- a/vlib/math/big/division_array_ops.v
+++ b/vlib/math/big/division_array_ops.v
@@ -4,7 +4,7 @@ import math.bits
 
 // suppose operand_a bigger than operand_b and both not null.
 // Both quotient and remaider are allocated but of length 0
-fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) {
+fn binary_divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) {
 	for index in 0 .. operand_a.len {
 		remainder << operand_a[index]
 	}
@@ -39,12 +39,12 @@ fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, m
 	for bit_idx := int(bit_offset); bit_idx >= 0; bit_idx-- {
 		if greater_equal_from_end(remainder, divisor) {
 			bit_set(mut quotient, bit_idx)
-			subtract_in_place(mut remainder, divisor)
+			subtract_align_last_byte_in_place(mut remainder, divisor)
 		}
 		rshift_in_place(mut divisor, 1)
 	}
 
-	// ajust
+	// adjust
 	if lead_zer_remainder > lead_zer_divisor {
 		// rshift_in_place(mut quotient, lead_zer_remainder - lead_zer_divisor)
 		rshift_in_place(mut remainder, lead_zer_remainder - lead_zer_divisor)
@@ -61,7 +61,7 @@ fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, m
 // quicker than BitField.set_bit
 [inline]
 fn bit_set(mut a []u32, n int) {
-	byte_offset := n / 32
+	byte_offset := n >> 5
 	mask := u32(1) << u32(n % 32)
 	assert a.len >= byte_offset
 	a[byte_offset] |= mask
@@ -83,6 +83,25 @@ fn greater_equal_from_end(a []u32, b []u32) bool {
 	return true
 }
 
+// a := a - b supposed a >= b
+// attention the b operand is align with the a operand before the subtraction
+[inline]
+fn subtract_align_last_byte_in_place(mut a []u32, b []u32) {
+	mut carry := u32(0)
+	mut new_carry := u32(0)
+	offset := a.len - b.len
+	for index := a.len - b.len; index < a.len; index++ {
+		if a[index] < (b[index - offset] + carry) {
+			new_carry = 1
+		} else {
+			new_carry = 0
+		}
+		a[index] -= (b[index - offset] + carry)
+		carry = new_carry
+	}
+	assert carry == 0
+}
+
 // logical left shift
 // there is no overflow. We know that the last bits are zero
 // and that n <= 32
@@ -114,24 +133,6 @@ fn rshift_in_place(mut a []u32, n u32) {
 	}
 }
 
-// a := a - b supposed a >= b
-[inline]
-fn subtract_in_place(mut a []u32, b []u32) {
-	mut carry := u32(0)
-	mut new_carry := u32(0)
-	offset := a.len - b.len
-	for index := a.len - b.len; index < a.len; index++ {
-		if a[index] < (b[index - offset] + carry) {
-			new_carry = 1
-		} else {
-			new_carry = 0
-		}
-		a[index] -= (b[index - offset] + carry)
-		carry = new_carry
-	}
-	assert carry == 0
-}
-
 // for assert
 [inline]
 fn left_align_p(a u32, b u32) bool {
diff --git a/vlib/math/big/division_array_ops_test.v b/vlib/math/big/division_array_ops_test.v
index 91c158dd9e..0d9b13a14d 100644
--- a/vlib/math/big/division_array_ops_test.v
+++ b/vlib/math/big/division_array_ops_test.v
@@ -34,15 +34,15 @@ fn test_rshift_in_place() {
 	assert c == [u32(0x00ffffff), 0xf0f0f0f0, 1, 0x3fffffff, 1]
 }
 
-fn test_subtract_in_place() {
+fn test_subtract_align_last_byte_in_place() {
 	mut a := [u32(2), 2, 2, 2, 2]
 	mut b := [u32(1), 1, 2, 1, 1]
-	subtract_in_place(mut a, b)
+	subtract_align_last_byte_in_place(mut a, b)
 	assert a == [u32(1), 1, 0, 1, 1]
 
 	a = [u32(0), 0, 0, 0, 1]
 	b = [u32(0), 0, 1]
-	subtract_in_place(mut a, b)
+	subtract_align_last_byte_in_place(mut a, b)
 	assert a == [u32(0), 0, 0, 0, 0]
 
 	a = [u32(0), 0, 0, 0, 1, 13]
@@ -51,7 +51,7 @@ fn test_subtract_in_place() {
 	mut d := [u32(0), 0, 0]
 	d << b // to have same length
 	subtract_digit_array(a, d, mut c)
-	subtract_in_place(mut a, b)
+	subtract_align_last_byte_in_place(mut a, b)
 	assert a == [u32(0), 0, 0, u32(-1), 0, 12]
 	assert c == a
 }
@@ -126,17 +126,6 @@ fn test_divide_digit_array_06() {
 	assert r == [u32(2), 4]
 }
 
-// For debugging
-fn integer_from_u32_array(a []u32) Integer {
-	mut res := integer_from_i64(0)
-	mut multiplicand := integer_from_u32(1)
-	for i in 0 .. a.len {
-		res += integer_from_u32(a[i]) * multiplicand
-		multiplicand = multiplicand.lshift(32)
-	}
-	return res
-}
-
 fn test_many_divisions() {
 	for _ in 0 .. 100 {
 		a := random_number(30)
diff --git a/vlib/math/big/special_array_ops.v b/vlib/math/big/special_array_ops.v
new file mode 100644
index 0000000000..9ef71c28aa
--- /dev/null
+++ b/vlib/math/big/special_array_ops.v
@@ -0,0 +1,228 @@
+module big
+
+import math.bits
+import math.util
+import strings
+
+// suppose operand_a bigger than operand_b and both not null.
+// Both quotient and remaider are already allocated but of length 0
+fn newton_divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) {
+	// tranform back to Integers (on the stack without allocation)
+	a := Integer{
+		signum: 1
+		digits: operand_a
+	}
+	b := Integer{
+		signum: 1
+		digits: operand_b
+	}
+
+	k := bit_length(a) + bit_length(b) // a*b < 2**k
+	mut x := integer_from_int(2) //  0 < x < 2**(k+1)/b  // initial guess for convergence
+	// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
+	// use 48/17 - 32/17.D (divisor)
+	initial_guess := (((integer_from_int(48) - (integer_from_int(32) * b)) * integer_from_i64(0x0f0f0f0f0f0f0f0f)).rshift(64)).neg() // / 17 == 0x11
+	if initial_guess > zero_int {
+		x = initial_guess
+	}
+	mut lastx := integer_from_int(0)
+	pow2_k_plus_1 := pow2(k + 1) // outside of the loop to optimize allocatio
+	for lastx != x { // main loop
+		lastx = x
+		x = (x * (pow2_k_plus_1 - (x * b))).rshift(u32(k))
+	}
+	if x * b < pow2(k) {
+		x.inc()
+	}
+	mut q := (a * x).rshift(u32(k))
+	// possible adjustments. see literature
+	if q * b > a {
+		q.dec()
+	}
+	mut r := a - (q * b)
+	if r >= b {
+		q.inc()
+		r -= b
+	}
+	quotient = q.digits
+	remainder = r.digits
+
+	for remainder.len > 0 && remainder.last() == 0 {
+		remainder.delete_last()
+	}
+}
+
+[inline]
+fn bit_length(a Integer) int {
+	return a.digits.len * 32 - bits.leading_zeros_32(a.digits.last())
+}
+
+[inline]
+fn debug_u32_str(a []u32) string {
+	mut sb := strings.new_builder(30)
+	sb.write_string('[')
+	mut first := true
+	for i in 0 .. a.len {
+		if !first {
+			sb.write_string(', ')
+		}
+		sb.write_string('0x${a[i].hex()}')
+		first = false
+	}
+	sb.write_string(']')
+	return sb.str()
+}
+
+// karatsuba algorithm for multiplication
+// possible optimisations:
+// - transform one or all the recurrences in loops
+fn karatsuba_multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) {
+	// base case necessary to end recursion
+	if operand_a.len == 0 || operand_b.len == 0 {
+		for storage.len > 0 {
+			storage.delete_last()
+		}
+		return
+	}
+
+	if operand_a.len < operand_b.len {
+		multiply_digit_array(operand_b, operand_a, mut storage)
+		return
+	}
+
+	if operand_b.len == 1 {
+		multiply_array_by_digit(operand_a, operand_b[0], mut storage)
+		return
+	}
+	// karatsuba
+	// thanks to the base cases we can pass zero-length arrays to the mult func
+	half := util.imax(operand_a.len, operand_b.len) / 2
+	if half <= 0 {
+		panic('Unreachable. Both array have 1 length and multiply_array_by_digit should have been called')
+	}
+	a_l := operand_a[0..half]
+	a_h := operand_a[half..]
+	mut b_l := []u32{}
+	mut b_h := []u32{}
+	if half <= operand_b.len {
+		b_l = operand_b[0..half]
+		b_h = operand_b[half..]
+	} else {
+		b_l = unsafe { operand_b }
+		// b_h = []u32{}
+	}
+
+	// use storage for p_1 to avoid allocation and copy later
+	multiply_digit_array(a_h, b_h, mut storage)
+
+	mut p_3 := []u32{len: a_l.len + b_l.len + 1, init: 0}
+	multiply_digit_array(a_l, b_l, mut p_3)
+
+	mut tmp_1 := []u32{len: util.imax(a_h.len, a_l.len) + 1, init: 0}
+	mut tmp_2 := []u32{len: util.imax(b_h.len, b_l.len) + 1, init: 0}
+	add_digit_array(a_h, a_l, mut tmp_1)
+	add_digit_array(b_h, b_l, mut tmp_2)
+
+	mut p_2 := []u32{len: operand_a.len + operand_b.len + 1, init: 0}
+	multiply_digit_array(tmp_1, tmp_2, mut p_2)
+	subtract_in_place(mut p_2, storage) // p_1
+	subtract_in_place(mut p_2, p_3)
+
+	// return p_1.lshift(2 * u32(half * 32)) + p_2.lshift(u32(half * 32)) + p_3
+	lshift_byte_in_place(mut storage, 2 * half)
+	lshift_byte_in_place(mut p_2, half)
+	add_in_place(mut storage, p_2)
+	add_in_place(mut storage, p_3)
+
+	for storage.len > 0 && storage.last() == 0 {
+		storage.delete_last()
+	}
+}
+
+[inline]
+fn pow2(k int) Integer {
+	mut ret := []u32{len: (k >> 5) + 1, init: 0}
+	bit_set(mut ret, k)
+	return Integer{
+		signum: 1
+		digits: ret
+	}
+}
+
+// optimized left shift of full byte(s) in place. byte_nb must be positive
+fn lshift_byte_in_place(mut a []u32, byte_nb int) {
+	a_len := a.len
+	// control or allocate capacity
+	for _ in a_len .. a_len + byte_nb {
+		a << u32(0)
+	}
+	for index := a_len - 1; index >= 0; index-- {
+		a[index + byte_nb] = a[index]
+	}
+	for index in 0 .. byte_nb {
+		a[index] = u32(0)
+	}
+}
+
+// operand b can be greater than operand a
+// the capacity of both array is supposed to be sufficient
+[inline]
+fn add_in_place(mut a []u32, b []u32) {
+	len_a := a.len
+	len_b := b.len
+	max := util.imax(len_a, len_b)
+	min := util.imin(len_a, len_b)
+	mut carry := u64(0)
+	for index in 0 .. min {
+		partial := carry + a[index] + b[index]
+		a[index] = u32(partial)
+		carry = u32(partial >> 32)
+	}
+	if len_a >= len_b {
+		for index in min .. max {
+			partial := carry + a[index]
+			a[index] = u32(partial)
+			carry = u32(partial >> 32)
+		}
+	} else {
+		for index in min .. max {
+			partial := carry + b[index]
+			a << u32(partial)
+			carry = u32(partial >> 32)
+		}
+	}
+}
+
+// a := a - b supposed a >= b
+fn subtract_in_place(mut a []u32, b []u32) {
+	len_a := a.len
+	len_b := b.len
+	max := util.imax(len_a, len_b)
+	min := util.imin(len_a, len_b)
+	mut carry := u32(0)
+	mut new_carry := u32(0)
+	for index in 0 .. min {
+		if a[index] < (b[index] + carry) {
+			new_carry = 1
+		} else {
+			new_carry = 0
+		}
+		a[index] -= (b[index] + carry)
+		carry = new_carry
+	}
+	if len_a >= len_b {
+		for index in min .. max {
+			if a[index] < carry {
+				new_carry = 1
+			} else {
+				new_carry = 0
+			}
+			a[index] -= carry
+			carry = new_carry
+		}
+	} else { // if len.b > len.a return zero
+		for a.len > 0 {
+			a.delete_last()
+		}
+	}
+}
diff --git a/vlib/math/big/special_array_ops_test.v b/vlib/math/big/special_array_ops_test.v
new file mode 100644
index 0000000000..7d381742c8
--- /dev/null
+++ b/vlib/math/big/special_array_ops_test.v
@@ -0,0 +1,153 @@
+module big
+
+fn test_add_in_place() {
+	mut a := [u32(1), 2, 3]
+	mut b := [u32(5), 6, 7]
+	add_in_place(mut a, b)
+	assert a == [u32(6), 8, 10]
+
+	a = [u32(11), 10, 11, 12]
+	b = [u32(1), 2]
+	add_in_place(mut a, b)
+	assert a == [u32(12), 12, 11, 12]
+
+	a = []u32{cap: 4}
+	a << u32(1)
+	a << u32(2)
+	b = [u32(3), 4, 5, 6]
+	add_in_place(mut a, b)
+	assert a == [u32(4), 6, 5, 6]
+
+	a = [u32(0x3ce9124b), 0x1438]
+	b = [u32(0xdb166062)]
+	add_in_place(mut a, b)
+	assert a == [u32(0x17ff72ad), 0x1439]
+}
+
+fn test_lshift_byte_in_place() {
+	mut a := [u32(5), 6, 7, 8]
+	lshift_byte_in_place(mut a, 2)
+	assert a == [u32(0), 0, 5, 6, 7, 8]
+}
+
+fn test_multiply_karatsuba_01() {
+	mut a := [u32(3)]
+	mut b := []u32{}
+	mut c := []u32{len: a.len + b.len, init: 0}
+	karatsuba_multiply_digit_array(a, b, mut c)
+	assert c == []u32{}
+
+	a = []u32{}
+	b = [u32(4)]
+	c = []u32{len: a.len + b.len, init: 0}
+	karatsuba_multiply_digit_array(a, b, mut c)
+	assert c == []u32{}
+
+	a = [u32(3)]
+	b = [u32(1)]
+	c = []u32{len: a.len + b.len, init: 0}
+	karatsuba_multiply_digit_array(a, b, mut c)
+	assert c == a
+
+	a = [u32(1)]
+	b = [u32(5)]
+	c = []u32{len: a.len + b.len, init: 0}
+	karatsuba_multiply_digit_array(a, b, mut c)
+	assert c == b
+
+	a = [u32(1234)]
+	b = [u32(567)]
+	c = []u32{len: a.len + b.len + 1, init: 0}
+	karatsuba_multiply_digit_array(a, b, mut c)
+	assert c == [u32(699_678)]
+
+	a = [u32(0x17ff72ad), 0x1439]
+	b = [u32(0x30df2ea6)]
+	c = []u32{len: a.len + b.len + 1, init: 0}
+	karatsuba_multiply_digit_array(a, b, mut c)
+	assert c == [u32(0xcaf2722e), 0x55eb2c5a, 0x3dc]
+
+	a_operand := integer_from_string('95484736384949495947362') or { panic('error') }
+	b_operand := integer_from_string('39474638493') or { panic('error') }
+	c = []u32{len: a_operand.digits.len + b_operand.digits.len, init: 0}
+	karatsuba_multiply_digit_array(a_operand.digits, b_operand.digits, mut c)
+	expected := integer_from_string('3769225450395285038584683507005466') or { panic('error') }
+	assert c == expected.digits
+}
+
+fn test_multiply_karatsuba_02() {
+	a := integer_from_string('53575430359313366047421252453000090528070240585276680372187519418517552556246806124659918940784792906379733645877657341259357264284615702179922887873492874019672838874121154927105373025311855709389770910765') or {
+		panic('error')
+	}
+	b := integer_from_string('977091076523237491790970633699383779582771973038531457285598238843271083830214915826312193418602834034688531898668229388286706296786321423078510899614439367') or {
+		panic('error')
+	}
+	mut c := []u32{len: a.digits.len + b.digits.len + 1, init: 0}
+	karatsuba_multiply_digit_array(a.digits, b.digits, mut c)
+	expected := integer_from_string('52348074924977237255285644820010078601114587486470740900886892189662650320988400136613780986308710610258879824881256666730655821800564143426560480113864123642197317383052431412305975584645367703594190956925565749714310612399025459615546540332117815550470167143256687163102859337019449165214274088466835988832405507818643018779158891710706073875995722420460085755') or {
+		panic('error')
+	}
+}
+
+fn test_newton_divide_03() {
+	a := [u32(0), 4]
+	b := [u32(0), 1]
+	mut q := []u32{cap: a.len - b.len + 1}
+	mut r := []u32{cap: a.len}
+
+	newton_divide_array_by_array(a, b, mut q, mut r)
+	assert q == [u32(4)]
+	assert r == []u32{len: 0}
+}
+
+fn test_newton_divide_04() {
+	a := [u32(2), 4]
+	b := [u32(0), 1]
+	mut q := []u32{cap: a.len - b.len + 1}
+	mut r := []u32{cap: a.len}
+
+	newton_divide_array_by_array(a, b, mut q, mut r)
+	assert q == [u32(4)]
+	assert r == [u32(2)]
+}
+
+fn test_newton_divide_05() {
+	a := [u32(2), 4, 5]
+	b := [u32(0), 1]
+	mut q := []u32{cap: a.len - b.len + 1}
+	mut r := []u32{cap: a.len}
+
+	newton_divide_array_by_array(a, b, mut q, mut r)
+	assert q == [u32(4), 5]
+	assert r == [u32(2)]
+}
+
+fn test_newton_divide_06() {
+	a := [u32(2), 4, 5, 3]
+	b := [u32(0), 0x8000]
+	mut q := []u32{cap: a.len - b.len + 1}
+	mut r := []u32{cap: a.len}
+
+	newton_divide_array_by_array(a, b, mut q, mut r)
+	assert q == [u32(0xa0000), 0x60000]
+	assert r == [u32(2), 4]
+}
+
+fn test_newton_divide_07() {
+	a := integer_from_string('52348074924977237255285644820010078601114587486470740900886892189662650320988400136613780986308710610258879824881256666730655821800564143426560480113864123642197317383052431412305975584645367703594190956925565749714310612399025459615546540332117815550470167143256687163102859337019449165214274088466835988832405507818643018779158891710706073875995722420460085757') or {
+		panic('error')
+	}
+	b := integer_from_string('977091076523237491790970633699383779582771973038531457285598238843271083830214915826312193418602834034688531898668229388286706296786321423078510899614439367') or {
+		panic('error')
+	}
+	mut q := []u32{cap: a.digits.len - b.digits.len + 1}
+	mut r := []u32{cap: a.digits.len}
+
+	newton_divide_array_by_array(a.digits, b.digits, mut q, mut r)
+	quotient := Integer{
+		signum: 1
+		digits: q
+	}
+	assert quotient.str() == '53575430359313366047421252453000090528070240585276680372187519418517552556246806124659918940784792906379733645877657341259357264284615702179922887873492874019672838874121154927105373025311855709389770910765'
+	assert r == [u32(2)]
+}