From 273154c1aef5e0342a38ca0105982e10f3c6efd5 Mon Sep 17 00:00:00 2001 From: Vincent Laisney Date: Thu, 16 Sep 2021 18:31:07 +0200 Subject: [PATCH] math.big: add Newton and Karatsuba algorithms (#11487) --- vlib/math/big/array_ops.v | 27 ++- vlib/math/big/division_array_ops.v | 45 ++--- vlib/math/big/division_array_ops_test.v | 19 +- vlib/math/big/special_array_ops.v | 228 ++++++++++++++++++++++++ vlib/math/big/special_array_ops_test.v | 153 ++++++++++++++++ 5 files changed, 434 insertions(+), 38 deletions(-) create mode 100644 vlib/math/big/special_array_ops.v create mode 100644 vlib/math/big/special_array_ops_test.v diff --git a/vlib/math/big/array_ops.v b/vlib/math/big/array_ops.v index bff4c5d450..92816fe884 100644 --- a/vlib/math/big/array_ops.v +++ b/vlib/math/big/array_ops.v @@ -109,10 +109,24 @@ fn subtract_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) { } } +const karatsuba_multiplication_limit = 1_000_000 + +// set limit to choose algorithm + +[inline] +fn multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) { + if operand_a.len >= big.karatsuba_multiplication_limit + || operand_b.len >= big.karatsuba_multiplication_limit { + karatsuba_multiply_digit_array(operand_a, operand_b, mut storage) + } else { + simple_multiply_digit_array(operand_a, operand_b, mut storage) + } +} + // Multiplies the unsigned (non-negative) integers represented in a and b and the product is // stored in storage. It assumes that storage has length equal to the sum of lengths // of a and b. Length refers to length of array, that is, digit count. -fn multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) { +fn simple_multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) { for b_index in 0 .. operand_b.len { mut carry := u64(0) for a_index in 0 .. operand_a.len { @@ -240,6 +254,17 @@ fn divide_array_by_digit(operand_a []u32, divisor u32, mut quotient []u32, mut r } } +const newton_division_limit = 10_000 + +[inline] +fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) { + if operand_a.len >= big.newton_division_limit { + newton_divide_array_by_array(operand_a, operand_b, mut quotient, mut remainder) + } else { + binary_divide_array_by_array(operand_a, operand_b, mut quotient, mut remainder) + } +} + // Shifts the contents of the original array by the given amount of bits to the left. // This function assumes that the amount is less than 32. The storage is expected to // allocated with zeroes. diff --git a/vlib/math/big/division_array_ops.v b/vlib/math/big/division_array_ops.v index b00cda2afa..16b6801bf0 100644 --- a/vlib/math/big/division_array_ops.v +++ b/vlib/math/big/division_array_ops.v @@ -4,7 +4,7 @@ import math.bits // suppose operand_a bigger than operand_b and both not null. // Both quotient and remaider are allocated but of length 0 -fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) { +fn binary_divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) { for index in 0 .. operand_a.len { remainder << operand_a[index] } @@ -39,12 +39,12 @@ fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, m for bit_idx := int(bit_offset); bit_idx >= 0; bit_idx-- { if greater_equal_from_end(remainder, divisor) { bit_set(mut quotient, bit_idx) - subtract_in_place(mut remainder, divisor) + subtract_align_last_byte_in_place(mut remainder, divisor) } rshift_in_place(mut divisor, 1) } - // ajust + // adjust if lead_zer_remainder > lead_zer_divisor { // rshift_in_place(mut quotient, lead_zer_remainder - lead_zer_divisor) rshift_in_place(mut remainder, lead_zer_remainder - lead_zer_divisor) @@ -61,7 +61,7 @@ fn divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, m // quicker than BitField.set_bit [inline] fn bit_set(mut a []u32, n int) { - byte_offset := n / 32 + byte_offset := n >> 5 mask := u32(1) << u32(n % 32) assert a.len >= byte_offset a[byte_offset] |= mask @@ -83,6 +83,25 @@ fn greater_equal_from_end(a []u32, b []u32) bool { return true } +// a := a - b supposed a >= b +// attention the b operand is align with the a operand before the subtraction +[inline] +fn subtract_align_last_byte_in_place(mut a []u32, b []u32) { + mut carry := u32(0) + mut new_carry := u32(0) + offset := a.len - b.len + for index := a.len - b.len; index < a.len; index++ { + if a[index] < (b[index - offset] + carry) { + new_carry = 1 + } else { + new_carry = 0 + } + a[index] -= (b[index - offset] + carry) + carry = new_carry + } + assert carry == 0 +} + // logical left shift // there is no overflow. We know that the last bits are zero // and that n <= 32 @@ -114,24 +133,6 @@ fn rshift_in_place(mut a []u32, n u32) { } } -// a := a - b supposed a >= b -[inline] -fn subtract_in_place(mut a []u32, b []u32) { - mut carry := u32(0) - mut new_carry := u32(0) - offset := a.len - b.len - for index := a.len - b.len; index < a.len; index++ { - if a[index] < (b[index - offset] + carry) { - new_carry = 1 - } else { - new_carry = 0 - } - a[index] -= (b[index - offset] + carry) - carry = new_carry - } - assert carry == 0 -} - // for assert [inline] fn left_align_p(a u32, b u32) bool { diff --git a/vlib/math/big/division_array_ops_test.v b/vlib/math/big/division_array_ops_test.v index 91c158dd9e..0d9b13a14d 100644 --- a/vlib/math/big/division_array_ops_test.v +++ b/vlib/math/big/division_array_ops_test.v @@ -34,15 +34,15 @@ fn test_rshift_in_place() { assert c == [u32(0x00ffffff), 0xf0f0f0f0, 1, 0x3fffffff, 1] } -fn test_subtract_in_place() { +fn test_subtract_align_last_byte_in_place() { mut a := [u32(2), 2, 2, 2, 2] mut b := [u32(1), 1, 2, 1, 1] - subtract_in_place(mut a, b) + subtract_align_last_byte_in_place(mut a, b) assert a == [u32(1), 1, 0, 1, 1] a = [u32(0), 0, 0, 0, 1] b = [u32(0), 0, 1] - subtract_in_place(mut a, b) + subtract_align_last_byte_in_place(mut a, b) assert a == [u32(0), 0, 0, 0, 0] a = [u32(0), 0, 0, 0, 1, 13] @@ -51,7 +51,7 @@ fn test_subtract_in_place() { mut d := [u32(0), 0, 0] d << b // to have same length subtract_digit_array(a, d, mut c) - subtract_in_place(mut a, b) + subtract_align_last_byte_in_place(mut a, b) assert a == [u32(0), 0, 0, u32(-1), 0, 12] assert c == a } @@ -126,17 +126,6 @@ fn test_divide_digit_array_06() { assert r == [u32(2), 4] } -// For debugging -fn integer_from_u32_array(a []u32) Integer { - mut res := integer_from_i64(0) - mut multiplicand := integer_from_u32(1) - for i in 0 .. a.len { - res += integer_from_u32(a[i]) * multiplicand - multiplicand = multiplicand.lshift(32) - } - return res -} - fn test_many_divisions() { for _ in 0 .. 100 { a := random_number(30) diff --git a/vlib/math/big/special_array_ops.v b/vlib/math/big/special_array_ops.v new file mode 100644 index 0000000000..9ef71c28aa --- /dev/null +++ b/vlib/math/big/special_array_ops.v @@ -0,0 +1,228 @@ +module big + +import math.bits +import math.util +import strings + +// suppose operand_a bigger than operand_b and both not null. +// Both quotient and remaider are already allocated but of length 0 +fn newton_divide_array_by_array(operand_a []u32, operand_b []u32, mut quotient []u32, mut remainder []u32) { + // tranform back to Integers (on the stack without allocation) + a := Integer{ + signum: 1 + digits: operand_a + } + b := Integer{ + signum: 1 + digits: operand_b + } + + k := bit_length(a) + bit_length(b) // a*b < 2**k + mut x := integer_from_int(2) // 0 < x < 2**(k+1)/b // initial guess for convergence + // https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division + // use 48/17 - 32/17.D (divisor) + initial_guess := (((integer_from_int(48) - (integer_from_int(32) * b)) * integer_from_i64(0x0f0f0f0f0f0f0f0f)).rshift(64)).neg() // / 17 == 0x11 + if initial_guess > zero_int { + x = initial_guess + } + mut lastx := integer_from_int(0) + pow2_k_plus_1 := pow2(k + 1) // outside of the loop to optimize allocatio + for lastx != x { // main loop + lastx = x + x = (x * (pow2_k_plus_1 - (x * b))).rshift(u32(k)) + } + if x * b < pow2(k) { + x.inc() + } + mut q := (a * x).rshift(u32(k)) + // possible adjustments. see literature + if q * b > a { + q.dec() + } + mut r := a - (q * b) + if r >= b { + q.inc() + r -= b + } + quotient = q.digits + remainder = r.digits + + for remainder.len > 0 && remainder.last() == 0 { + remainder.delete_last() + } +} + +[inline] +fn bit_length(a Integer) int { + return a.digits.len * 32 - bits.leading_zeros_32(a.digits.last()) +} + +[inline] +fn debug_u32_str(a []u32) string { + mut sb := strings.new_builder(30) + sb.write_string('[') + mut first := true + for i in 0 .. a.len { + if !first { + sb.write_string(', ') + } + sb.write_string('0x${a[i].hex()}') + first = false + } + sb.write_string(']') + return sb.str() +} + +// karatsuba algorithm for multiplication +// possible optimisations: +// - transform one or all the recurrences in loops +fn karatsuba_multiply_digit_array(operand_a []u32, operand_b []u32, mut storage []u32) { + // base case necessary to end recursion + if operand_a.len == 0 || operand_b.len == 0 { + for storage.len > 0 { + storage.delete_last() + } + return + } + + if operand_a.len < operand_b.len { + multiply_digit_array(operand_b, operand_a, mut storage) + return + } + + if operand_b.len == 1 { + multiply_array_by_digit(operand_a, operand_b[0], mut storage) + return + } + // karatsuba + // thanks to the base cases we can pass zero-length arrays to the mult func + half := util.imax(operand_a.len, operand_b.len) / 2 + if half <= 0 { + panic('Unreachable. Both array have 1 length and multiply_array_by_digit should have been called') + } + a_l := operand_a[0..half] + a_h := operand_a[half..] + mut b_l := []u32{} + mut b_h := []u32{} + if half <= operand_b.len { + b_l = operand_b[0..half] + b_h = operand_b[half..] + } else { + b_l = unsafe { operand_b } + // b_h = []u32{} + } + + // use storage for p_1 to avoid allocation and copy later + multiply_digit_array(a_h, b_h, mut storage) + + mut p_3 := []u32{len: a_l.len + b_l.len + 1, init: 0} + multiply_digit_array(a_l, b_l, mut p_3) + + mut tmp_1 := []u32{len: util.imax(a_h.len, a_l.len) + 1, init: 0} + mut tmp_2 := []u32{len: util.imax(b_h.len, b_l.len) + 1, init: 0} + add_digit_array(a_h, a_l, mut tmp_1) + add_digit_array(b_h, b_l, mut tmp_2) + + mut p_2 := []u32{len: operand_a.len + operand_b.len + 1, init: 0} + multiply_digit_array(tmp_1, tmp_2, mut p_2) + subtract_in_place(mut p_2, storage) // p_1 + subtract_in_place(mut p_2, p_3) + + // return p_1.lshift(2 * u32(half * 32)) + p_2.lshift(u32(half * 32)) + p_3 + lshift_byte_in_place(mut storage, 2 * half) + lshift_byte_in_place(mut p_2, half) + add_in_place(mut storage, p_2) + add_in_place(mut storage, p_3) + + for storage.len > 0 && storage.last() == 0 { + storage.delete_last() + } +} + +[inline] +fn pow2(k int) Integer { + mut ret := []u32{len: (k >> 5) + 1, init: 0} + bit_set(mut ret, k) + return Integer{ + signum: 1 + digits: ret + } +} + +// optimized left shift of full byte(s) in place. byte_nb must be positive +fn lshift_byte_in_place(mut a []u32, byte_nb int) { + a_len := a.len + // control or allocate capacity + for _ in a_len .. a_len + byte_nb { + a << u32(0) + } + for index := a_len - 1; index >= 0; index-- { + a[index + byte_nb] = a[index] + } + for index in 0 .. byte_nb { + a[index] = u32(0) + } +} + +// operand b can be greater than operand a +// the capacity of both array is supposed to be sufficient +[inline] +fn add_in_place(mut a []u32, b []u32) { + len_a := a.len + len_b := b.len + max := util.imax(len_a, len_b) + min := util.imin(len_a, len_b) + mut carry := u64(0) + for index in 0 .. min { + partial := carry + a[index] + b[index] + a[index] = u32(partial) + carry = u32(partial >> 32) + } + if len_a >= len_b { + for index in min .. max { + partial := carry + a[index] + a[index] = u32(partial) + carry = u32(partial >> 32) + } + } else { + for index in min .. max { + partial := carry + b[index] + a << u32(partial) + carry = u32(partial >> 32) + } + } +} + +// a := a - b supposed a >= b +fn subtract_in_place(mut a []u32, b []u32) { + len_a := a.len + len_b := b.len + max := util.imax(len_a, len_b) + min := util.imin(len_a, len_b) + mut carry := u32(0) + mut new_carry := u32(0) + for index in 0 .. min { + if a[index] < (b[index] + carry) { + new_carry = 1 + } else { + new_carry = 0 + } + a[index] -= (b[index] + carry) + carry = new_carry + } + if len_a >= len_b { + for index in min .. max { + if a[index] < carry { + new_carry = 1 + } else { + new_carry = 0 + } + a[index] -= carry + carry = new_carry + } + } else { // if len.b > len.a return zero + for a.len > 0 { + a.delete_last() + } + } +} diff --git a/vlib/math/big/special_array_ops_test.v b/vlib/math/big/special_array_ops_test.v new file mode 100644 index 0000000000..7d381742c8 --- /dev/null +++ b/vlib/math/big/special_array_ops_test.v @@ -0,0 +1,153 @@ +module big + +fn test_add_in_place() { + mut a := [u32(1), 2, 3] + mut b := [u32(5), 6, 7] + add_in_place(mut a, b) + assert a == [u32(6), 8, 10] + + a = [u32(11), 10, 11, 12] + b = [u32(1), 2] + add_in_place(mut a, b) + assert a == [u32(12), 12, 11, 12] + + a = []u32{cap: 4} + a << u32(1) + a << u32(2) + b = [u32(3), 4, 5, 6] + add_in_place(mut a, b) + assert a == [u32(4), 6, 5, 6] + + a = [u32(0x3ce9124b), 0x1438] + b = [u32(0xdb166062)] + add_in_place(mut a, b) + assert a == [u32(0x17ff72ad), 0x1439] +} + +fn test_lshift_byte_in_place() { + mut a := [u32(5), 6, 7, 8] + lshift_byte_in_place(mut a, 2) + assert a == [u32(0), 0, 5, 6, 7, 8] +} + +fn test_multiply_karatsuba_01() { + mut a := [u32(3)] + mut b := []u32{} + mut c := []u32{len: a.len + b.len, init: 0} + karatsuba_multiply_digit_array(a, b, mut c) + assert c == []u32{} + + a = []u32{} + b = [u32(4)] + c = []u32{len: a.len + b.len, init: 0} + karatsuba_multiply_digit_array(a, b, mut c) + assert c == []u32{} + + a = [u32(3)] + b = [u32(1)] + c = []u32{len: a.len + b.len, init: 0} + karatsuba_multiply_digit_array(a, b, mut c) + assert c == a + + a = [u32(1)] + b = [u32(5)] + c = []u32{len: a.len + b.len, init: 0} + karatsuba_multiply_digit_array(a, b, mut c) + assert c == b + + a = [u32(1234)] + b = [u32(567)] + c = []u32{len: a.len + b.len + 1, init: 0} + karatsuba_multiply_digit_array(a, b, mut c) + assert c == [u32(699_678)] + + a = [u32(0x17ff72ad), 0x1439] + b = [u32(0x30df2ea6)] + c = []u32{len: a.len + b.len + 1, init: 0} + karatsuba_multiply_digit_array(a, b, mut c) + assert c == [u32(0xcaf2722e), 0x55eb2c5a, 0x3dc] + + a_operand := integer_from_string('95484736384949495947362') or { panic('error') } + b_operand := integer_from_string('39474638493') or { panic('error') } + c = []u32{len: a_operand.digits.len + b_operand.digits.len, init: 0} + karatsuba_multiply_digit_array(a_operand.digits, b_operand.digits, mut c) + expected := integer_from_string('3769225450395285038584683507005466') or { panic('error') } + assert c == expected.digits +} + +fn test_multiply_karatsuba_02() { + a := integer_from_string('53575430359313366047421252453000090528070240585276680372187519418517552556246806124659918940784792906379733645877657341259357264284615702179922887873492874019672838874121154927105373025311855709389770910765') or { + panic('error') + } + b := integer_from_string('977091076523237491790970633699383779582771973038531457285598238843271083830214915826312193418602834034688531898668229388286706296786321423078510899614439367') or { + panic('error') + } + mut c := []u32{len: a.digits.len + b.digits.len + 1, init: 0} + karatsuba_multiply_digit_array(a.digits, b.digits, mut c) + expected := integer_from_string('52348074924977237255285644820010078601114587486470740900886892189662650320988400136613780986308710610258879824881256666730655821800564143426560480113864123642197317383052431412305975584645367703594190956925565749714310612399025459615546540332117815550470167143256687163102859337019449165214274088466835988832405507818643018779158891710706073875995722420460085755') or { + panic('error') + } +} + +fn test_newton_divide_03() { + a := [u32(0), 4] + b := [u32(0), 1] + mut q := []u32{cap: a.len - b.len + 1} + mut r := []u32{cap: a.len} + + newton_divide_array_by_array(a, b, mut q, mut r) + assert q == [u32(4)] + assert r == []u32{len: 0} +} + +fn test_newton_divide_04() { + a := [u32(2), 4] + b := [u32(0), 1] + mut q := []u32{cap: a.len - b.len + 1} + mut r := []u32{cap: a.len} + + newton_divide_array_by_array(a, b, mut q, mut r) + assert q == [u32(4)] + assert r == [u32(2)] +} + +fn test_newton_divide_05() { + a := [u32(2), 4, 5] + b := [u32(0), 1] + mut q := []u32{cap: a.len - b.len + 1} + mut r := []u32{cap: a.len} + + newton_divide_array_by_array(a, b, mut q, mut r) + assert q == [u32(4), 5] + assert r == [u32(2)] +} + +fn test_newton_divide_06() { + a := [u32(2), 4, 5, 3] + b := [u32(0), 0x8000] + mut q := []u32{cap: a.len - b.len + 1} + mut r := []u32{cap: a.len} + + newton_divide_array_by_array(a, b, mut q, mut r) + assert q == [u32(0xa0000), 0x60000] + assert r == [u32(2), 4] +} + +fn test_newton_divide_07() { + a := integer_from_string('52348074924977237255285644820010078601114587486470740900886892189662650320988400136613780986308710610258879824881256666730655821800564143426560480113864123642197317383052431412305975584645367703594190956925565749714310612399025459615546540332117815550470167143256687163102859337019449165214274088466835988832405507818643018779158891710706073875995722420460085757') or { + panic('error') + } + b := integer_from_string('977091076523237491790970633699383779582771973038531457285598238843271083830214915826312193418602834034688531898668229388286706296786321423078510899614439367') or { + panic('error') + } + mut q := []u32{cap: a.digits.len - b.digits.len + 1} + mut r := []u32{cap: a.digits.len} + + newton_divide_array_by_array(a.digits, b.digits, mut q, mut r) + quotient := Integer{ + signum: 1 + digits: q + } + assert quotient.str() == '53575430359313366047421252453000090528070240585276680372187519418517552556246806124659918940784792906379733645877657341259357264284615702179922887873492874019672838874121154927105373025311855709389770910765' + assert r == [u32(2)] +}