diff --git a/vlib/rand/util/util.v b/vlib/rand/util/util.v index 9f52a8f5a6..920e2adf90 100644 --- a/vlib/rand/util/util.v +++ b/vlib/rand/util/util.v @@ -3,4 +3,34 @@ // that can be found in the LICENSE file. module util +import rand +import arrays +// sample_nr returns a sample of the array without replacement. This means the indices cannot repeat and it restricts the sample size to be less than or equal to the size of the given array. Note that if the array has repeating elements, then the sample may have repeats as well. +pub fn sample_nr(array []T, k int) []T { + n := array.len + if k > n { + panic('Cannot sample $k elements without replacement from a $n-element array.') + } + mut results := []T{len: k} + mut indices := []int{len: n} + // Initialize with all indices + for i, mut v in indices { + v = i + } + arrays.shuffle(mut indices, k) + for i in 0 .. k { + results[i] = array[indices[i]] + } + return results +} + +// sample_r returns a sample of the array with replacement. This means the elements can repeat and the size of the sample may exceed the size of the array +pub fn sample_r(array []T, k int) []T { + n := array.len + mut results := []T{len: k} + for i in 0 .. k { + results[i] = array[rand.intn(n)] + } + return results +} diff --git a/vlib/rand/util/util_test.v b/vlib/rand/util/util_test.v new file mode 100644 index 0000000000..947be117b9 --- /dev/null +++ b/vlib/rand/util/util_test.v @@ -0,0 +1,31 @@ +import rand.util + +fn test_sample_nr() { + lengths := [1, 3, 4, 5, 6, 7] + a := ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] + for length in lengths { + b := util.sample_nr(a, length) + assert b.len == length + for element in b { + assert element in a + // make sure every element occurs once + mut count := 0 + for e in b { + if e == element { + count++ + } + } + assert count == 1 + } + } +} + +fn test_sample_r() { + k := 20 + a := ['heads', 'tails'] + b := util.sample_r(a, k) + assert b.len == k + for element in b { + assert element in a + } +}