From 3647fb4defc161570ffbfe0179adcca0e8c0187a Mon Sep 17 00:00:00 2001 From: Subhomoy Haldar Date: Sun, 22 May 2022 15:51:52 +0530 Subject: [PATCH] rand: move dist functions to top module and PRNG interface; minor cleanup (#14481) --- vlib/rand/buffer/buffer.v | 10 +++ vlib/rand/config/config.v | 35 ++++++++ vlib/rand/dist/README.md | 10 --- vlib/rand/dist/dist.v | 85 ------------------- vlib/rand/{dist => }/dist_test.v | 11 ++- vlib/rand/mini_math.v | 130 ++++++++++++++++++++++++++++++ vlib/rand/mt19937/mt19937.v | 8 +- vlib/rand/musl/musl_rng.v | 6 +- vlib/rand/pcg32/pcg32.v | 8 +- vlib/rand/rand.v | 109 ++++++++++++++++++++----- vlib/rand/splitmix64/splitmix64.v | 2 + vlib/rand/sys/system_rng.c.v | 8 +- vlib/rand/wyrand/wyrand.v | 2 + 13 files changed, 290 insertions(+), 134 deletions(-) create mode 100644 vlib/rand/buffer/buffer.v delete mode 100644 vlib/rand/dist/README.md delete mode 100644 vlib/rand/dist/dist.v rename vlib/rand/{dist => }/dist_test.v (91%) create mode 100644 vlib/rand/mini_math.v diff --git a/vlib/rand/buffer/buffer.v b/vlib/rand/buffer/buffer.v new file mode 100644 index 0000000000..b501436c8e --- /dev/null +++ b/vlib/rand/buffer/buffer.v @@ -0,0 +1,10 @@ +// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module buffer + +pub struct PRNGBuffer { +mut: + bytes_left int + buffer u64 +} diff --git a/vlib/rand/config/config.v b/vlib/rand/config/config.v index 9baeeaa0be..815f110f47 100644 --- a/vlib/rand/config/config.v +++ b/vlib/rand/config/config.v @@ -1,3 +1,6 @@ +// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. module config import rand.seed @@ -12,3 +15,35 @@ pub struct PRNGConfigStruct { pub: seed_ []u32 = seed.time_seed_array(2) } + +// Configuration struct for generating normally distributed floats. The default value for +// `mu` is 0 and the default value for `sigma` is 1. +[params] +pub struct NormalConfigStruct { +pub: + mu f64 = 0.0 + sigma f64 = 1.0 +} + +// Configuration struct for the shuffle functions. +// The start index is inclusive and the end index is exclusive. +// Set the end to 0 to shuffle until the end of the array. +[params] +pub struct ShuffleConfigStruct { +pub: + start int + end int +} + +// validate_for is a helper function for validating the configuration struct for the given array. +pub fn (config ShuffleConfigStruct) validate_for(a []T) ? { + if config.start < 0 || config.start >= a.len { + return error("argument 'config.start' must be in range [0, a.len)") + } + if config.end < 0 || config.end > a.len { + return error("argument 'config.end' must be in range [0, a.len]") + } + if config.end != 0 && config.end <= config.start { + return error("argument 'config.end' must be greater than 'config.start'") + } +} diff --git a/vlib/rand/dist/README.md b/vlib/rand/dist/README.md deleted file mode 100644 index 85b7177927..0000000000 --- a/vlib/rand/dist/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Non-Uniform Distribution Functions - -This module contains functions for sampling from non-uniform distributions. - -All implementations of the `rand.PRNG` interface generate numbers from uniform -distributions. This library exists to allow the generation of pseudorandom numbers -sampled from non-uniform distributions. Additionally, it allows the user to use any -PRNG of their choice. This is because the default RNG can be reassigned to a different -generator. It can either be one of the pre-existing one (which are well-tested and -recommended) or a custom user-defined one. See `rand.set_rng()`. \ No newline at end of file diff --git a/vlib/rand/dist/dist.v b/vlib/rand/dist/dist.v deleted file mode 100644 index f028d166df..0000000000 --- a/vlib/rand/dist/dist.v +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved. -// Use of this source code is governed by an MIT license -// that can be found in the LICENSE file. -module dist - -import math -import rand - -fn check_probability_range(p f64) { - if p < 0 || p > 1 { - panic('$p is not a valid probability value.') - } -} - -// bernoulli returns true with a probability p. Note that 0 <= p <= 1. -pub fn bernoulli(p f64) bool { - check_probability_range(p) - return rand.f64() <= p -} - -// binomial returns the number of successful trials out of n when the -// probability of success for each trial is p. -pub fn binomial(n int, p f64) int { - check_probability_range(p) - mut count := 0 - for _ in 0 .. n { - if bernoulli(p) { - count++ - } - } - return count -} - -// Configuration struct for the `normal_pair` function. The default value for -// `mu` is 0 and the default value for `sigma` is 1. -pub struct NormalConfigStruct { - mu f64 = 0.0 - sigma f64 = 1.0 -} - -// normal_pair returns a pair of normally distributed random numbers with the mean mu -// and standard deviation sigma. If not specified, mu is 0 and sigma is 1. Intended usage is -// `x, y := normal_pair(mu: mean, sigma: stdev)`, or `x, y := normal_pair()`. -pub fn normal_pair(config NormalConfigStruct) (f64, f64) { - if config.sigma <= 0 { - panic('The standard deviation has to be positive.') - } - // This is an implementation of the Marsaglia polar method - // See: https://doi.org/10.1137%2F1006063 - // Also: https://en.wikipedia.org/wiki/Marsaglia_polar_method - for { - u := rand.f64_in_range(-1, 1) or { 0.0 } - v := rand.f64_in_range(-1, 1) or { 0.0 } - - s := u * u + v * v - if s >= 1 || s == 0 { - continue - } - t := math.sqrt(-2 * math.log(s) / s) - x := config.mu + config.sigma * t * u - y := config.mu + config.sigma * t * v - return x, y - } - return config.mu, config.mu -} - -// normal returns a normally distributed random number with the mean mu and standard deviation -// sigma. If not specified, mu is 0 and sigma is 1. Intended usage is -// `x := normal(mu: mean, sigma: etdev)` or `x := normal()`. -// **NOTE:** If you are generating a lot of normal variates, use `the normal_pair` function -// instead. This function discards one of the two variates generated by the `normal_pair` function. -pub fn normal(config NormalConfigStruct) f64 { - x, _ := normal_pair(config) - return x -} - -// exponential returns an exponentially distributed random number with the rate paremeter -// lambda. It is expected that lambda is positive. -pub fn exponential(lambda f64) f64 { - if lambda <= 0 { - panic('The rate (lambda) must be positive.') - } - // Use the inverse transform sampling method - return -math.log(rand.f64()) / lambda -} diff --git a/vlib/rand/dist/dist_test.v b/vlib/rand/dist_test.v similarity index 91% rename from vlib/rand/dist/dist_test.v rename to vlib/rand/dist_test.v index d2589da809..96afb8b25d 100644 --- a/vlib/rand/dist/dist_test.v +++ b/vlib/rand/dist_test.v @@ -1,6 +1,5 @@ import math import rand -import rand.dist const ( // The sample size to be used @@ -20,7 +19,7 @@ fn test_bernoulli() { for p in ps { mut successes := 0 for _ in 0 .. count { - if dist.bernoulli(p) { + if rand.bernoulli(p) or { false } { successes++ } } @@ -43,7 +42,7 @@ fn test_binomial() { mut sum := 0 mut var := 0.0 for _ in 0 .. count { - x := dist.binomial(n, p) + x := rand.binomial(n, p) or { 0 } sum += x dist := (x - np) var += dist * dist @@ -68,7 +67,7 @@ fn test_normal_pair() { mut sum := 0.0 mut var := 0.0 for _ in 0 .. count { - x, y := dist.normal_pair(mu: mu, sigma: sigma) + x, y := rand.normal_pair(mu: mu, sigma: sigma) or { 0.0, 0.0 } sum += x + y dist_x := x - mu dist_y := y - mu @@ -95,7 +94,7 @@ fn test_normal() { mut sum := 0.0 mut var := 0.0 for _ in 0 .. count { - x := dist.normal(mu: mu, sigma: sigma) + x := rand.normal(mu: mu, sigma: sigma) or { 0.0 } sum += x dist := x - mu var += dist * dist @@ -120,7 +119,7 @@ fn test_exponential() { mut sum := 0.0 mut var := 0.0 for _ in 0 .. count { - x := dist.exponential(lambda) + x := rand.exponential(lambda) sum += x dist := x - mu var += dist * dist diff --git a/vlib/rand/mini_math.v b/vlib/rand/mini_math.v new file mode 100644 index 0000000000..518141a819 --- /dev/null +++ b/vlib/rand/mini_math.v @@ -0,0 +1,130 @@ +// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module rand + +// NOTE: mini_math.v exists, so that we can avoid `import math`, +// just for the math.log and math.sqrt functions needed for the +// non uniform random number redistribution functions. +// Importing math is relatively heavy, both in terms of compilation +// speed (more source to process), and in terms of increases in the +// generated executable sizes (if the rest of the program does not use +// math already; many programs do not need math, for example the +// compiler itself does not, while needing random number generation. + +const sqrt2 = 1.41421356237309504880168872420969807856967187537694807317667974 + +[inline] +fn msqrt(a f64) f64 { + if a == 0 { + return a + } + mut x := a + z, ex := frexp(x) + w := x + // approximate square root of number between 0.5 and 1 + // relative error of approximation = 7.47e-3 + x = 4.173075996388649989089e-1 + 5.9016206709064458299663e-1 * z // adjust for odd powers of 2 + if (ex & 1) != 0 { + x *= rand.sqrt2 + } + x = scalbn(x, ex >> 1) + // newton iterations + x = 0.5 * (x + w / x) + x = 0.5 * (x + w / x) + x = 0.5 * (x + w / x) + return x +} + +// a simplified approximation (without the edge cases), see math.log +fn mlog(a f64) f64 { + ln2_lo := 1.90821492927058770002e-10 + ln2_hi := 0.693147180369123816490 + l1 := 0.6666666666666735130 + l2 := 0.3999999999940941908 + l3 := 0.2857142874366239149 + l4 := 0.2222219843214978396 + l5 := 0.1818357216161805012 + l6 := 0.1531383769920937332 + l7 := 0.1479819860511658591 + x := a + mut f1, mut ki := frexp(x) + if f1 < rand.sqrt2 / 2 { + f1 *= 2 + ki-- + } + f := f1 - 1 + k := f64(ki) + s := f / (2 + f) + s2 := s * s + s4 := s2 * s2 + t1 := s2 * (l1 + s4 * (l3 + s4 * (l5 + s4 * l7))) + t2 := s4 * (l2 + s4 * (l4 + s4 * l6)) + r := t1 + t2 + hfsq := 0.5 * f * f + return k * ln2_hi - ((hfsq - (s * (hfsq + r) + k * ln2_lo)) - f) +} + +fn frexp(x f64) (f64, int) { + mut y := f64_bits(x) + ee := int((y >> 52) & 0x7ff) + if ee == 0 { + if x != 0.0 { + x1p64 := f64_from_bits(u64(0x43f0000000000000)) + z, e_ := frexp(x * x1p64) + return z, e_ - 64 + } + return x, 0 + } else if ee == 0x7ff { + return x, 0 + } + e_ := ee - 0x3fe + y &= u64(0x800fffffffffffff) + y |= u64(0x3fe0000000000000) + return f64_from_bits(y), e_ +} + +fn scalbn(x f64, n_ int) f64 { + mut n := n_ + x1p1023 := f64_from_bits(u64(0x7fe0000000000000)) + x1p53 := f64_from_bits(u64(0x4340000000000000)) + x1p_1022 := f64_from_bits(u64(0x0010000000000000)) + + mut y := x + if n > 1023 { + y *= x1p1023 + n -= 1023 + if n > 1023 { + y *= x1p1023 + n -= 1023 + if n > 1023 { + n = 1023 + } + } + } else if n < -1022 { + /* + make sure final n < -53 to avoid double + rounding in the subnormal range + */ + y *= x1p_1022 * x1p53 + n += 1022 - 53 + if n < -1022 { + y *= x1p_1022 * x1p53 + n += 1022 - 53 + if n < -1022 { + n = -1022 + } + } + } + return y * f64_from_bits(u64((0x3ff + n)) << 52) +} + +[inline] +fn f64_from_bits(b u64) f64 { + return *unsafe { &f64(&b) } +} + +[inline] +fn f64_bits(f f64) u64 { + return *unsafe { &u64(&f) } +} diff --git a/vlib/rand/mt19937/mt19937.v b/vlib/rand/mt19937/mt19937.v index e493b4201a..9b5ec86129 100644 --- a/vlib/rand/mt19937/mt19937.v +++ b/vlib/rand/mt19937/mt19937.v @@ -3,6 +3,7 @@ // that can be found in the LICENSE file. module mt19937 +import rand.buffer import rand.seed /* @@ -60,11 +61,10 @@ const ( // MT19937RNG is generator that uses the Mersenne Twister algorithm with period 2^19937. // **NOTE**: The RNG is not seeded when instantiated so remember to seed it before use. pub struct MT19937RNG { + buffer.PRNGBuffer mut: - state []u64 = get_first_state(seed.time_seed_array(2)) - mti int = mt19937.nn - bytes_left int - buffer u64 + state []u64 = get_first_state(seed.time_seed_array(2)) + mti int = mt19937.nn } fn get_first_state(seed_data []u32) []u64 { diff --git a/vlib/rand/musl/musl_rng.v b/vlib/rand/musl/musl_rng.v index 22ef39b95e..6c72dc50f8 100644 --- a/vlib/rand/musl/musl_rng.v +++ b/vlib/rand/musl/musl_rng.v @@ -4,15 +4,15 @@ module musl import rand.seed +import rand.buffer pub const seed_len = 1 // MuslRNG ported from https://git.musl-libc.org/cgit/musl/tree/src/prng/rand_r.c pub struct MuslRNG { + buffer.PRNGBuffer mut: - state u32 = seed.time_seed_32() - bytes_left int - buffer u32 + state u32 = seed.time_seed_32() } // seed sets the current random state based on `seed_data`. diff --git a/vlib/rand/pcg32/pcg32.v b/vlib/rand/pcg32/pcg32.v index dfe1b2c032..5b75f64f0b 100644 --- a/vlib/rand/pcg32/pcg32.v +++ b/vlib/rand/pcg32/pcg32.v @@ -4,6 +4,7 @@ module pcg32 import rand.seed +import rand.buffer pub const seed_len = 4 @@ -11,11 +12,10 @@ pub const seed_len = 4 // https://github.com/imneme/pcg-c-basic/blob/master/pcg_basic.c, and // https://github.com/imneme/pcg-c-basic/blob/master/pcg_basic.h pub struct PCG32RNG { + buffer.PRNGBuffer mut: - state u64 = u64(0x853c49e6748fea9b) ^ seed.time_seed_64() - inc u64 = u64(0xda3e39cb94b95bdb) ^ seed.time_seed_64() - bytes_left int - buffer u32 + state u64 = u64(0x853c49e6748fea9b) ^ seed.time_seed_64() + inc u64 = u64(0xda3e39cb94b95bdb) ^ seed.time_seed_64() } // seed seeds the PCG32RNG with 4 `u32` values. diff --git a/vlib/rand/rand.v b/vlib/rand/rand.v index 3544d71189..5fa5128d53 100644 --- a/vlib/rand/rand.v +++ b/vlib/rand/rand.v @@ -274,34 +274,79 @@ pub fn (mut rng PRNG) ascii(len int) string { return internal_string_from_set(mut rng, rand.ascii_chars, len) } -// Configuration struct for the shuffle functions. -// The start index is inclusive and the end index is exclusive. -// Set the end to 0 to shuffle until the end of the array. -[params] -pub struct ShuffleConfigStruct { -pub: - start int - end int +// bernoulli returns true with a probability p. Note that 0 <= p <= 1. +pub fn (mut rng PRNG) bernoulli(p f64) ?bool { + if p < 0 || p > 1 { + return error('$p is not a valid probability value.') + } + return rng.f64() <= p } -fn (config ShuffleConfigStruct) validate_for(a []T) ? { - if config.start < 0 || config.start >= a.len { - return error("argument 'config.start' must be in range [0, a.len)") +// normal returns a normally distributed pseudorandom f64 in range `[0, 1)`. +// NOTE: Use normal_pair() instead if you're generating a lot of normal variates. +pub fn (mut rng PRNG) normal(conf config.NormalConfigStruct) ?f64 { + x, _ := rng.normal_pair(conf)? + return x +} + +// normal_pair returns a pair of normally distributed pseudorandom f64 in range `[0, 1)`. +pub fn (mut rng PRNG) normal_pair(conf config.NormalConfigStruct) ?(f64, f64) { + if conf.sigma <= 0 { + return error('Standard deviation must be positive') } - if config.end < 0 || config.end > a.len { - return error("argument 'config.end' must be in range [0, a.len]") + // This is an implementation of the Marsaglia polar method + // See: https://doi.org/10.1137%2F1006063 + // Also: https://en.wikipedia.org/wiki/Marsaglia_polar_method + for { + u := rng.f64_in_range(-1, 1) or { 0.0 } + v := rng.f64_in_range(-1, 1) or { 0.0 } + + s := u * u + v * v + if s >= 1 || s == 0 { + continue + } + t := msqrt(-2 * mlog(s) / s) + x := conf.mu + conf.sigma * t * u + y := conf.mu + conf.sigma * t * v + return x, y } + return error('Implementation error. Please file an issue.') +} + +// binomial returns the number of successful trials out of n when the +// probability of success for each trial is p. +pub fn (mut rng PRNG) binomial(n int, p f64) ?int { + if p < 0 || p > 1 { + return error('$p is not a valid probability value.') + } + mut count := 0 + for _ in 0 .. n { + if rng.bernoulli(p)! { + count++ + } + } + return count +} + +// exponential returns an exponentially distributed random number with the rate paremeter +// lambda. It is expected that lambda is positive. +pub fn (mut rng PRNG) exponential(lambda f64) f64 { + if lambda <= 0 { + panic('The rate (lambda) must be positive.') + } + // Use the inverse transform sampling method + return -mlog(rng.f64()) / lambda } // shuffle randomly permutates the elements in `a`. The range for shuffling is // optional and the entire array is shuffled by default. Leave the end as 0 to // shuffle all elements until the end. [direct_array_access] -pub fn (mut rng PRNG) shuffle(mut a []T, config ShuffleConfigStruct) ? { +pub fn (mut rng PRNG) shuffle(mut a []T, config config.ShuffleConfigStruct) ? { config.validate_for(a)? new_end := if config.end == 0 { a.len } else { config.end } for i in config.start .. new_end { - x := rng.int_in_range(i, new_end) or { config.start } + x := rng.int_in_range(i, new_end) or { config.start + i } // swap a_i := a[i] a[i] = a[x] @@ -311,7 +356,7 @@ pub fn (mut rng PRNG) shuffle(mut a []T, config ShuffleConfigStruct) ? { // shuffle_clone returns a random permutation of the elements in `a`. // The permutation is done on a fresh clone of `a`, so `a` remains unchanged. -pub fn (mut rng PRNG) shuffle_clone(a []T, config ShuffleConfigStruct) ?[]T { +pub fn (mut rng PRNG) shuffle_clone(a []T, config config.ShuffleConfigStruct) ?[]T { mut res := a.clone() rng.shuffle(mut res, config)? return res @@ -541,13 +586,13 @@ pub fn ascii(len int) string { // shuffle randomly permutates the elements in `a`. The range for shuffling is // optional and the entire array is shuffled by default. Leave the end as 0 to // shuffle all elements until the end. -pub fn shuffle(mut a []T, config ShuffleConfigStruct) ? { +pub fn shuffle(mut a []T, config config.ShuffleConfigStruct) ? { default_rng.shuffle(mut a, config)? } // shuffle_clone returns a random permutation of the elements in `a`. // The permutation is done on a fresh clone of `a`, so `a` remains unchanged. -pub fn shuffle_clone(a []T, config ShuffleConfigStruct) ?[]T { +pub fn shuffle_clone(a []T, config config.ShuffleConfigStruct) ?[]T { return default_rng.shuffle_clone(a, config) } @@ -563,3 +608,31 @@ pub fn choose(array []T, k int) ?[]T { pub fn sample(array []T, k int) []T { return default_rng.sample(array, k) } + +// bernoulli returns true with a probability p. Note that 0 <= p <= 1. +pub fn bernoulli(p f64) ?bool { + return default_rng.bernoulli(p) +} + +// normal returns a normally distributed pseudorandom f64 in range `[0, 1)`. +// NOTE: Use normal_pair() instead if you're generating a lot of normal variates. +pub fn normal(conf config.NormalConfigStruct) ?f64 { + return default_rng.normal(conf) +} + +// normal_pair returns a pair of normally distributed pseudorandom f64 in range `[0, 1)`. +pub fn normal_pair(conf config.NormalConfigStruct) ?(f64, f64) { + return default_rng.normal_pair(conf) +} + +// binomial returns the number of successful trials out of n when the +// probability of success for each trial is p. +pub fn binomial(n int, p f64) ?int { + return default_rng.binomial(n, p) +} + +// exponential returns an exponentially distributed random number with the rate paremeter +// lambda. It is expected that lambda is positive. +pub fn exponential(lambda f64) f64 { + return default_rng.exponential(lambda) +} diff --git a/vlib/rand/splitmix64/splitmix64.v b/vlib/rand/splitmix64/splitmix64.v index 116781427b..e0bc2c1a11 100644 --- a/vlib/rand/splitmix64/splitmix64.v +++ b/vlib/rand/splitmix64/splitmix64.v @@ -4,11 +4,13 @@ module splitmix64 import rand.seed +import rand.buffer pub const seed_len = 2 // SplitMix64RNG ported from http://xoshiro.di.unimi.it/splitmix64.c pub struct SplitMix64RNG { + buffer.PRNGBuffer mut: state u64 = seed.time_seed_64() bytes_left int diff --git a/vlib/rand/sys/system_rng.c.v b/vlib/rand/sys/system_rng.c.v index 3b47748910..4292811781 100644 --- a/vlib/rand/sys/system_rng.c.v +++ b/vlib/rand/sys/system_rng.c.v @@ -4,6 +4,7 @@ module sys import math.bits +import rand.buffer import rand.seed // Implementation note: @@ -36,10 +37,9 @@ fn calculate_iterations_for(bits int) int { // SysRNG is the PRNG provided by default in the libc implementiation that V uses. pub struct SysRNG { + buffer.PRNGBuffer mut: - seed u32 = seed.time_seed_32() - buffer int - bytes_left int + seed u32 = seed.time_seed_32() } // r.seed() sets the seed of the accepting SysRNG to the given data. @@ -71,7 +71,7 @@ pub fn (mut r SysRNG) u8() u8 { r.buffer >>= 8 return value } - r.buffer = r.default_rand() + r.buffer = u64(r.default_rand()) r.bytes_left = sys.rand_bytesize - 1 value := u8(r.buffer) r.buffer >>= 8 diff --git a/vlib/rand/wyrand/wyrand.v b/vlib/rand/wyrand/wyrand.v index 6a0e41abb1..7c5c08c6ee 100644 --- a/vlib/rand/wyrand/wyrand.v +++ b/vlib/rand/wyrand/wyrand.v @@ -4,6 +4,7 @@ module wyrand import hash +import rand.buffer import rand.seed // Redefinition of some constants that we will need for pseudorandom number generation. @@ -16,6 +17,7 @@ pub const seed_len = 2 // WyRandRNG is a RNG based on the WyHash hashing algorithm. pub struct WyRandRNG { + buffer.PRNGBuffer mut: state u64 = seed.time_seed_64() bytes_left int