rand: move dist functions to top module and PRNG interface; minor cleanup (#14481)

2022-05-22 15:51:52 +05:30 · 2022-05-22 15:51:52 +05:30 · 3647fb4def
parent 64a686f41f
commit 3647fb4def
13 changed files with 290 additions and 134 deletions
--- a/vlib/rand/buffer/buffer.v
+++ b/vlib/rand/buffer/buffer.v
@ -0,0 +1,10 @@
+// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module buffer
+
+pub struct PRNGBuffer {
+mut:
+	bytes_left int
+	buffer     u64
+}
--- a/vlib/rand/config/config.v
+++ b/vlib/rand/config/config.v
@ -1,3 +1,6 @@
+// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
 module config

 import rand.seed
@ -12,3 +15,35 @@ pub struct PRNGConfigStruct {
 pub:
 	seed_ []u32 = seed.time_seed_array(2)
 }
+
+// Configuration struct for generating normally distributed floats. The default value for
+// `mu` is 0 and the default value for `sigma` is 1.
+[params]
+pub struct NormalConfigStruct {
+pub:
+	mu    f64 = 0.0
+	sigma f64 = 1.0
+}
+
+// Configuration struct for the shuffle functions.
+// The start index is inclusive and the end index is exclusive.
+// Set the end to 0 to shuffle until the end of the array.
+[params]
+pub struct ShuffleConfigStruct {
+pub:
+	start int
+	end   int
+}
+
+// validate_for is a helper function for validating the configuration struct for the given array.
+pub fn (config ShuffleConfigStruct) validate_for<T>(a []T) ? {
+	if config.start < 0 || config.start >= a.len {
+		return error("argument 'config.start' must be in range [0, a.len)")
+	}
+	if config.end < 0 || config.end > a.len {
+		return error("argument 'config.end' must be in range [0, a.len]")
+	}
+	if config.end != 0 && config.end <= config.start {
+		return error("argument 'config.end' must be greater than 'config.start'")
+	}
+}
--- a/vlib/rand/dist/README.md
+++ b/vlib/rand/dist/README.md
@ -1,10 +0,0 @@
-# Non-Uniform Distribution Functions
-
-This module contains functions for sampling from non-uniform distributions.
-
-All implementations of the `rand.PRNG` interface generate numbers from uniform
-distributions. This library exists to allow the generation of pseudorandom numbers
-sampled from non-uniform distributions. Additionally, it allows the user to use any
-PRNG of their choice. This is because the default RNG can be reassigned to a different
-generator. It can either be one of the pre-existing one (which are well-tested and
-recommended) or a custom user-defined one. See `rand.set_rng()`.
--- a/vlib/rand/dist/dist.v
+++ b/vlib/rand/dist/dist.v
@ -1,85 +0,0 @@
-// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
-// Use of this source code is governed by an MIT license
-// that can be found in the LICENSE file.
-module dist
-
-import math
-import rand
-
-fn check_probability_range(p f64) {
-	if p < 0 || p > 1 {
-		panic('$p is not a valid probability value.')
-	}
-}
-
-// bernoulli returns true with a probability p. Note that 0 <= p <= 1.
-pub fn bernoulli(p f64) bool {
-	check_probability_range(p)
-	return rand.f64() <= p
-}
-
-// binomial returns the number of successful trials out of n when the
-// probability of success for each trial is p.
-pub fn binomial(n int, p f64) int {
-	check_probability_range(p)
-	mut count := 0
-	for _ in 0 .. n {
-		if bernoulli(p) {
-			count++
-		}
-	}
-	return count
-}
-
-// Configuration struct for the `normal_pair` function. The default value for
-// `mu` is 0 and the default value for `sigma` is 1.
-pub struct NormalConfigStruct {
-	mu    f64 = 0.0
-	sigma f64 = 1.0
-}
-
-// normal_pair returns a pair of normally distributed random numbers with the mean mu
-// and standard deviation sigma. If not specified, mu is 0 and sigma is 1. Intended usage is
-// `x, y := normal_pair(mu: mean, sigma: stdev)`, or `x, y := normal_pair()`.
-pub fn normal_pair(config NormalConfigStruct) (f64, f64) {
-	if config.sigma <= 0 {
-		panic('The standard deviation has to be positive.')
-	}
-	// This is an implementation of the Marsaglia polar method
-	// See: https://doi.org/10.1137%2F1006063
-	// Also: https://en.wikipedia.org/wiki/Marsaglia_polar_method
-	for {
-		u := rand.f64_in_range(-1, 1) or { 0.0 }
-		v := rand.f64_in_range(-1, 1) or { 0.0 }
-
-		s := u * u + v * v
-		if s >= 1 || s == 0 {
-			continue
-		}
-		t := math.sqrt(-2 * math.log(s) / s)
-		x := config.mu + config.sigma * t * u
-		y := config.mu + config.sigma * t * v
-		return x, y
-	}
-	return config.mu, config.mu
-}
-
-// normal returns a normally distributed random number with the mean mu and standard deviation
-// sigma. If not specified, mu is 0 and sigma is 1. Intended usage is
-// `x := normal(mu: mean, sigma: etdev)` or `x := normal()`.
-// **NOTE:** If you are generating a lot of normal variates, use `the normal_pair` function
-// instead. This function discards one of the two variates generated by the `normal_pair` function.
-pub fn normal(config NormalConfigStruct) f64 {
-	x, _ := normal_pair(config)
-	return x
-}
-
-// exponential returns an exponentially distributed random number with the rate paremeter
-// lambda. It is expected that lambda is positive.
-pub fn exponential(lambda f64) f64 {
-	if lambda <= 0 {
-		panic('The rate (lambda) must be positive.')
-	}
-	// Use the inverse transform sampling method
-	return -math.log(rand.f64()) / lambda
-}
--- a/vlib/rand/dist/dist_test.v
+++ b/vlib/rand/dist/dist_test.v
@ -1,6 +1,5 @@
 import math
 import rand
-import rand.dist

 const (
 	// The sample size to be used
@ -20,7 +19,7 @@ fn test_bernoulli() {
 		for p in ps {
 			mut successes := 0
 			for _ in 0 .. count {
-				if dist.bernoulli(p) {
+				if rand.bernoulli(p) or { false } {
 					successes++
 				}
 			}
@ -43,7 +42,7 @@ fn test_binomial() {
 				mut sum := 0
 				mut var := 0.0
 				for _ in 0 .. count {
-					x := dist.binomial(n, p)
+					x := rand.binomial(n, p) or { 0 }
 					sum += x
 					dist := (x - np)
 					var += dist * dist
@ -68,7 +67,7 @@ fn test_normal_pair() {
 				mut sum := 0.0
 				mut var := 0.0
 				for _ in 0 .. count {
-					x, y := dist.normal_pair(mu: mu, sigma: sigma)
+					x, y := rand.normal_pair(mu: mu, sigma: sigma) or { 0.0, 0.0 }
 					sum += x + y
 					dist_x := x - mu
 					dist_y := y - mu
@ -95,7 +94,7 @@ fn test_normal() {
 				mut sum := 0.0
 				mut var := 0.0
 				for _ in 0 .. count {
-					x := dist.normal(mu: mu, sigma: sigma)
+					x := rand.normal(mu: mu, sigma: sigma) or { 0.0 }
 					sum += x
 					dist := x - mu
 					var += dist * dist
@ -120,7 +119,7 @@ fn test_exponential() {
 			mut sum := 0.0
 			mut var := 0.0
 			for _ in 0 .. count {
-				x := dist.exponential(lambda)
+				x := rand.exponential(lambda)
 				sum += x
 				dist := x - mu
 				var += dist * dist
--- a/vlib/rand/mini_math.v
+++ b/vlib/rand/mini_math.v
@ -0,0 +1,130 @@
+// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module rand
+
+// NOTE: mini_math.v exists, so that we can avoid `import math`,
+// just for the math.log and math.sqrt functions needed for the
+// non uniform random number redistribution functions.
+// Importing math is relatively heavy, both in terms of compilation
+// speed (more source to process), and in terms of increases in the
+// generated executable sizes (if the rest of the program does not use
+// math already; many programs do not need math, for example the
+// compiler itself does not, while needing random number generation.
+
+const sqrt2 = 1.41421356237309504880168872420969807856967187537694807317667974
+
+[inline]
+fn msqrt(a f64) f64 {
+	if a == 0 {
+		return a
+	}
+	mut x := a
+	z, ex := frexp(x)
+	w := x
+	// approximate square root of number between 0.5 and 1
+	// relative error of approximation = 7.47e-3
+	x = 4.173075996388649989089e-1 + 5.9016206709064458299663e-1 * z // adjust for odd powers of 2
+	if (ex & 1) != 0 {
+		x *= rand.sqrt2
+	}
+	x = scalbn(x, ex >> 1)
+	// newton iterations
+	x = 0.5 * (x + w / x)
+	x = 0.5 * (x + w / x)
+	x = 0.5 * (x + w / x)
+	return x
+}
+
+// a simplified approximation (without the edge cases), see math.log
+fn mlog(a f64) f64 {
+	ln2_lo := 1.90821492927058770002e-10
+	ln2_hi := 0.693147180369123816490
+	l1 := 0.6666666666666735130
+	l2 := 0.3999999999940941908
+	l3 := 0.2857142874366239149
+	l4 := 0.2222219843214978396
+	l5 := 0.1818357216161805012
+	l6 := 0.1531383769920937332
+	l7 := 0.1479819860511658591
+	x := a
+	mut f1, mut ki := frexp(x)
+	if f1 < rand.sqrt2 / 2 {
+		f1 *= 2
+		ki--
+	}
+	f := f1 - 1
+	k := f64(ki)
+	s := f / (2 + f)
+	s2 := s * s
+	s4 := s2 * s2
+	t1 := s2 * (l1 + s4 * (l3 + s4 * (l5 + s4 * l7)))
+	t2 := s4 * (l2 + s4 * (l4 + s4 * l6))
+	r := t1 + t2
+	hfsq := 0.5 * f * f
+	return k * ln2_hi - ((hfsq - (s * (hfsq + r) + k * ln2_lo)) - f)
+}
+
+fn frexp(x f64) (f64, int) {
+	mut y := f64_bits(x)
+	ee := int((y >> 52) & 0x7ff)
+	if ee == 0 {
+		if x != 0.0 {
+			x1p64 := f64_from_bits(u64(0x43f0000000000000))
+			z, e_ := frexp(x * x1p64)
+			return z, e_ - 64
+		}
+		return x, 0
+	} else if ee == 0x7ff {
+		return x, 0
+	}
+	e_ := ee - 0x3fe
+	y &= u64(0x800fffffffffffff)
+	y |= u64(0x3fe0000000000000)
+	return f64_from_bits(y), e_
+}
+
+fn scalbn(x f64, n_ int) f64 {
+	mut n := n_
+	x1p1023 := f64_from_bits(u64(0x7fe0000000000000))
+	x1p53 := f64_from_bits(u64(0x4340000000000000))
+	x1p_1022 := f64_from_bits(u64(0x0010000000000000))
+
+	mut y := x
+	if n > 1023 {
+		y *= x1p1023
+		n -= 1023
+		if n > 1023 {
+			y *= x1p1023
+			n -= 1023
+			if n > 1023 {
+				n = 1023
+			}
+		}
+	} else if n < -1022 {
+		/*
+		make sure final n < -53 to avoid double
+	rounding in the subnormal range
+		*/
+		y *= x1p_1022 * x1p53
+		n += 1022 - 53
+		if n < -1022 {
+			y *= x1p_1022 * x1p53
+			n += 1022 - 53
+			if n < -1022 {
+				n = -1022
+			}
+		}
+	}
+	return y * f64_from_bits(u64((0x3ff + n)) << 52)
+}
+
+[inline]
+fn f64_from_bits(b u64) f64 {
+	return *unsafe { &f64(&b) }
+}
+
+[inline]
+fn f64_bits(f f64) u64 {
+	return *unsafe { &u64(&f) }
+}
--- a/vlib/rand/mt19937/mt19937.v
+++ b/vlib/rand/mt19937/mt19937.v
@ -3,6 +3,7 @@
 // that can be found in the LICENSE file.
 module mt19937

+import rand.buffer
 import rand.seed

 /*
@ -60,11 +61,10 @@ const (
 // MT19937RNG is generator that uses the Mersenne Twister algorithm with period 2^19937.
 // **NOTE**: The RNG is not seeded when instantiated so remember to seed it before use.
 pub struct MT19937RNG {
+	buffer.PRNGBuffer
 mut:
-	state      []u64 = get_first_state(seed.time_seed_array(2))
-	mti        int   = mt19937.nn
-	bytes_left int
-	buffer     u64
+	state []u64 = get_first_state(seed.time_seed_array(2))
+	mti   int   = mt19937.nn
 }

 fn get_first_state(seed_data []u32) []u64 {
--- a/vlib/rand/musl/musl_rng.v
+++ b/vlib/rand/musl/musl_rng.v
@ -4,15 +4,15 @@
 module musl

 import rand.seed
+import rand.buffer

 pub const seed_len = 1

 // MuslRNG ported from https://git.musl-libc.org/cgit/musl/tree/src/prng/rand_r.c
 pub struct MuslRNG {
+	buffer.PRNGBuffer
 mut:
-	state      u32 = seed.time_seed_32()
-	bytes_left int
-	buffer     u32
+	state u32 = seed.time_seed_32()
 }

 // seed sets the current random state based on `seed_data`.
--- a/vlib/rand/pcg32/pcg32.v
+++ b/vlib/rand/pcg32/pcg32.v
@ -4,6 +4,7 @@
 module pcg32

 import rand.seed
+import rand.buffer

 pub const seed_len = 4

@ -11,11 +12,10 @@ pub const seed_len = 4
 // https://github.com/imneme/pcg-c-basic/blob/master/pcg_basic.c, and
 // https://github.com/imneme/pcg-c-basic/blob/master/pcg_basic.h
 pub struct PCG32RNG {
+	buffer.PRNGBuffer
 mut:
-	state      u64 = u64(0x853c49e6748fea9b) ^ seed.time_seed_64()
-	inc        u64 = u64(0xda3e39cb94b95bdb) ^ seed.time_seed_64()
-	bytes_left int
-	buffer     u32
+	state u64 = u64(0x853c49e6748fea9b) ^ seed.time_seed_64()
+	inc   u64 = u64(0xda3e39cb94b95bdb) ^ seed.time_seed_64()
 }

 // seed seeds the PCG32RNG with 4 `u32` values.
--- a/vlib/rand/rand.v
+++ b/vlib/rand/rand.v
@ -274,34 +274,79 @@ pub fn (mut rng PRNG) ascii(len int) string {
 	return internal_string_from_set(mut rng, rand.ascii_chars, len)
 }

-// Configuration struct for the shuffle functions.
-// The start index is inclusive and the end index is exclusive.
-// Set the end to 0 to shuffle until the end of the array.
-[params]
-pub struct ShuffleConfigStruct {
-pub:
-	start int
-	end   int
+// bernoulli returns true with a probability p. Note that 0 <= p <= 1.
+pub fn (mut rng PRNG) bernoulli(p f64) ?bool {
+	if p < 0 || p > 1 {
+		return error('$p is not a valid probability value.')
+	}
+	return rng.f64() <= p
 }

-fn (config ShuffleConfigStruct) validate_for<T>(a []T) ? {
-	if config.start < 0 || config.start >= a.len {
-		return error("argument 'config.start' must be in range [0, a.len)")
+// normal returns a normally distributed pseudorandom f64 in range `[0, 1)`.
+// NOTE: Use normal_pair() instead if you're generating a lot of normal variates.
+pub fn (mut rng PRNG) normal(conf config.NormalConfigStruct) ?f64 {
+	x, _ := rng.normal_pair(conf)?
+	return x
+}
+
+// normal_pair returns a pair of normally distributed pseudorandom f64 in range `[0, 1)`.
+pub fn (mut rng PRNG) normal_pair(conf config.NormalConfigStruct) ?(f64, f64) {
+	if conf.sigma <= 0 {
+		return error('Standard deviation must be positive')
 	}
-	if config.end < 0 || config.end > a.len {
-		return error("argument 'config.end' must be in range [0, a.len]")
+	// This is an implementation of the Marsaglia polar method
+	// See: https://doi.org/10.1137%2F1006063
+	// Also: https://en.wikipedia.org/wiki/Marsaglia_polar_method
+	for {
+		u := rng.f64_in_range(-1, 1) or { 0.0 }
+		v := rng.f64_in_range(-1, 1) or { 0.0 }
+
+		s := u * u + v * v
+		if s >= 1 || s == 0 {
+			continue
+		}
+		t := msqrt(-2 * mlog(s) / s)
+		x := conf.mu + conf.sigma * t * u
+		y := conf.mu + conf.sigma * t * v
+		return x, y
 	}
+	return error('Implementation error. Please file an issue.')
+}
+
+// binomial returns the number of successful trials out of n when the
+// probability of success for each trial is p.
+pub fn (mut rng PRNG) binomial(n int, p f64) ?int {
+	if p < 0 || p > 1 {
+		return error('$p is not a valid probability value.')
+	}
+	mut count := 0
+	for _ in 0 .. n {
+		if rng.bernoulli(p)! {
+			count++
+		}
+	}
+	return count
+}
+
+// exponential returns an exponentially distributed random number with the rate paremeter
+// lambda. It is expected that lambda is positive.
+pub fn (mut rng PRNG) exponential(lambda f64) f64 {
+	if lambda <= 0 {
+		panic('The rate (lambda) must be positive.')
+	}
+	// Use the inverse transform sampling method
+	return -mlog(rng.f64()) / lambda
 }

 // shuffle randomly permutates the elements in `a`. The range for shuffling is
 // optional and the entire array is shuffled by default. Leave the end as 0 to
 // shuffle all elements until the end.
 [direct_array_access]
-pub fn (mut rng PRNG) shuffle<T>(mut a []T, config ShuffleConfigStruct) ? {
+pub fn (mut rng PRNG) shuffle<T>(mut a []T, config config.ShuffleConfigStruct) ? {
 	config.validate_for(a)?
 	new_end := if config.end == 0 { a.len } else { config.end }
 	for i in config.start .. new_end {
-		x := rng.int_in_range(i, new_end) or { config.start }
+		x := rng.int_in_range(i, new_end) or { config.start + i }
 		// swap
 		a_i := a[i]
 		a[i] = a[x]
@ -311,7 +356,7 @@ pub fn (mut rng PRNG) shuffle<T>(mut a []T, config ShuffleConfigStruct) ? {

 // shuffle_clone returns a random permutation of the elements in `a`.
 // The permutation is done on a fresh clone of `a`, so `a` remains unchanged.
-pub fn (mut rng PRNG) shuffle_clone<T>(a []T, config ShuffleConfigStruct) ?[]T {
+pub fn (mut rng PRNG) shuffle_clone<T>(a []T, config config.ShuffleConfigStruct) ?[]T {
 	mut res := a.clone()
 	rng.shuffle(mut res, config)?
 	return res
@ -541,13 +586,13 @@ pub fn ascii(len int) string {
 // shuffle randomly permutates the elements in `a`. The range for shuffling is
 // optional and the entire array is shuffled by default. Leave the end as 0 to
 // shuffle all elements until the end.
-pub fn shuffle<T>(mut a []T, config ShuffleConfigStruct) ? {
+pub fn shuffle<T>(mut a []T, config config.ShuffleConfigStruct) ? {
 	default_rng.shuffle(mut a, config)?
 }

 // shuffle_clone returns a random permutation of the elements in `a`.
 // The permutation is done on a fresh clone of `a`, so `a` remains unchanged.
-pub fn shuffle_clone<T>(a []T, config ShuffleConfigStruct) ?[]T {
+pub fn shuffle_clone<T>(a []T, config config.ShuffleConfigStruct) ?[]T {
 	return default_rng.shuffle_clone(a, config)
 }

@ -563,3 +608,31 @@ pub fn choose<T>(array []T, k int) ?[]T {
 pub fn sample<T>(array []T, k int) []T {
 	return default_rng.sample(array, k)
 }
+
+// bernoulli returns true with a probability p. Note that 0 <= p <= 1.
+pub fn bernoulli(p f64) ?bool {
+	return default_rng.bernoulli(p)
+}
+
+// normal returns a normally distributed pseudorandom f64 in range `[0, 1)`.
+// NOTE: Use normal_pair() instead if you're generating a lot of normal variates.
+pub fn normal(conf config.NormalConfigStruct) ?f64 {
+	return default_rng.normal(conf)
+}
+
+// normal_pair returns a pair of normally distributed pseudorandom f64 in range `[0, 1)`.
+pub fn normal_pair(conf config.NormalConfigStruct) ?(f64, f64) {
+	return default_rng.normal_pair(conf)
+}
+
+// binomial returns the number of successful trials out of n when the
+// probability of success for each trial is p.
+pub fn binomial(n int, p f64) ?int {
+	return default_rng.binomial(n, p)
+}
+
+// exponential returns an exponentially distributed random number with the rate paremeter
+// lambda. It is expected that lambda is positive.
+pub fn exponential(lambda f64) f64 {
+	return default_rng.exponential(lambda)
+}
--- a/vlib/rand/splitmix64/splitmix64.v
+++ b/vlib/rand/splitmix64/splitmix64.v
@ -4,11 +4,13 @@
 module splitmix64

 import rand.seed
+import rand.buffer

 pub const seed_len = 2

 // SplitMix64RNG ported from http://xoshiro.di.unimi.it/splitmix64.c
 pub struct SplitMix64RNG {
+	buffer.PRNGBuffer
 mut:
 	state      u64 = seed.time_seed_64()
 	bytes_left int
--- a/vlib/rand/sys/system_rng.c.v
+++ b/vlib/rand/sys/system_rng.c.v
@ -4,6 +4,7 @@
 module sys

 import math.bits
+import rand.buffer
 import rand.seed

 // Implementation note:
@ -36,10 +37,9 @@ fn calculate_iterations_for(bits int) int {

 // SysRNG is the PRNG provided by default in the libc implementiation that V uses.
 pub struct SysRNG {
+	buffer.PRNGBuffer
 mut:
-	seed       u32 = seed.time_seed_32()
-	buffer     int
-	bytes_left int
+	seed u32 = seed.time_seed_32()
 }

 // r.seed() sets the seed of the accepting SysRNG to the given data.
@ -71,7 +71,7 @@ pub fn (mut r SysRNG) u8() u8 {
 		r.buffer >>= 8
 		return value
 	}
-	r.buffer = r.default_rand()
+	r.buffer = u64(r.default_rand())
 	r.bytes_left = sys.rand_bytesize - 1
 	value := u8(r.buffer)
 	r.buffer >>= 8
--- a/vlib/rand/wyrand/wyrand.v
+++ b/vlib/rand/wyrand/wyrand.v
@ -4,6 +4,7 @@
 module wyrand

 import hash
+import rand.buffer
 import rand.seed

 // Redefinition of some constants that we will need for pseudorandom number generation.
@ -16,6 +17,7 @@ pub const seed_len = 2

 // WyRandRNG is a RNG based on the WyHash hashing algorithm.
 pub struct WyRandRNG {
+	buffer.PRNGBuffer
 mut:
 	state      u64 = seed.time_seed_64()
 	bytes_left int