rand: further bridge gap b/w rand module and PRNG interface (#13719)

pull/13724/head
Subhomoy Haldar 2022-03-12 13:11:12 +05:30 committed by GitHub
parent a3e9409196
commit 40504e8600
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 142 additions and 52 deletions

View File

@ -1,12 +1,14 @@
module rand
import time
const clock_seq_hi_and_reserved_valid_values = [`8`, `9`, `a`, `b`]
// uuid_v4 generates a random (v4) UUID
// See https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)
pub fn uuid_v4() string {
return internal_uuid_v4(mut default_rng)
}
fn internal_uuid_v4(mut rng PRNG) string {
buflen := 36
mut buf := unsafe { malloc_noscan(37) }
mut i_buf := 0
@ -14,7 +16,7 @@ pub fn uuid_v4() string {
mut d := byte(0)
for i_buf < buflen {
mut c := 0
x = default_rng.u64()
x = rng.u64()
// do most of the bit manipulation at once:
x &= 0x0F0F0F0F0F0F0F0F
x += 0x3030303030303030
@ -54,18 +56,7 @@ pub fn uuid_v4() string {
const ulid_encoding = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
// ulid generates an Unique Lexicographically sortable IDentifier.
// See https://github.com/ulid/spec .
// Note: ULIDs can leak timing information, if you make them public, because
// you can infer the rate at which some resource is being created, like
// users or business transactions.
// (https://news.ycombinator.com/item?id=14526173)
pub fn ulid() string {
return ulid_at_millisecond(u64(time.utc().unix_time_milli()))
}
// ulid_at_millisecond does the same as `ulid` but takes a custom Unix millisecond timestamp via `unix_time_milli`.
pub fn ulid_at_millisecond(unix_time_milli u64) string {
fn internal_ulid_at_millisecond(mut rng PRNG, unix_time_milli u64) string {
buflen := 26
mut buf := unsafe { malloc_noscan(27) }
mut t := unix_time_milli
@ -78,7 +69,7 @@ pub fn ulid_at_millisecond(unix_time_milli u64) string {
i--
}
// first rand set
mut x := default_rng.u64()
mut x := rng.u64()
i = 10
for i < 19 {
unsafe {
@ -88,7 +79,7 @@ pub fn ulid_at_millisecond(unix_time_milli u64) string {
i++
}
// second rand set
x = default_rng.u64()
x = rng.u64()
for i < 26 {
unsafe {
buf[i] = rand.ulid_encoding[x & 0x1F]
@ -102,8 +93,7 @@ pub fn ulid_at_millisecond(unix_time_milli u64) string {
}
}
// string_from_set returns a string of length `len` containing random characters sampled from the given `charset`
pub fn string_from_set(charset string, len int) string {
fn internal_string_from_set(mut rng PRNG, charset string, len int) string {
if len == 0 {
return ''
}
@ -119,21 +109,6 @@ pub fn string_from_set(charset string, len int) string {
return unsafe { buf.vstring_with_len(len) }
}
// string returns a string of length `len` containing random characters in range `[a-zA-Z]`.
pub fn string(len int) string {
return string_from_set(english_letters, len)
}
// hex returns a hexadecimal number of length `len` containing random characters in range `[a-f0-9]`.
pub fn hex(len int) string {
return string_from_set(hex_chars, len)
}
// ascii returns a random string of the printable ASCII characters with length `len`.
pub fn ascii(len int) string {
return string_from_set(ascii_chars, len)
}
fn deinit() {
unsafe {
default_rng.free() // free the implementation

View File

@ -1,16 +1,14 @@
module rand
import time
// init initializes the default RNG.
fn init() {
default_rng = new_default()
}
pub fn string(len int) string {
fn internal_string_from_set(mut rng PRNG, charset string, len int) string {
result := ''
#
#const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
#const characters = charset.str;
#const charactersLength = characters.length;
#for (let i = 0;i < len.val;i++)
#result.str += characters.charAt(Math.random() * charactersLength);
@ -22,18 +20,7 @@ const (
ulid_encoding = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
)
// ulid generates an Unique Lexicographically sortable IDentifier.
// See https://github.com/ulid/spec .
// Note: ULIDs can leak timing information, if you make them public, because
// you can infer the rate at which some resource is being created, like
// users or business transactions.
// (https://news.ycombinator.com/item?id=14526173)
pub fn ulid() string {
return ulid_at_millisecond(u64(time.utc().unix_time_milli()))
}
// ulid_at_millisecond does the same as `ulid` but takes a custom Unix millisecond timestamp via `unix_time_milli`.
pub fn ulid_at_millisecond(unix_time_milli u64) string {
fn internal_ulid_at_millisecond(mut rng PRNG, unix_time_milli u64) string {
mut buf := []byte{cap: 27}
mut t := unix_time_milli
mut i := 9
@ -43,7 +30,7 @@ pub fn ulid_at_millisecond(unix_time_milli u64) string {
i--
}
mut x := default_rng.u64()
mut x := rng.u64()
i = 10
for i < 19 {
buf[i] = rand.ulid_encoding[int(x & 0x1f)]
@ -52,7 +39,7 @@ pub fn ulid_at_millisecond(unix_time_milli u64) string {
i++
}
x = default_rng.u64()
x = rng.u64()
for i < 26 {
buf[i] = rand.ulid_encoding[int(x & 0x1f)]
x = x >> 5

View File

@ -8,6 +8,7 @@ import math.bits
import rand.config
import rand.constants
import rand.wyrand
import time
// PRNG is a common interface for all PRNGs that can be used seamlessly with the rand
// modules's API. It defines all the methods that a PRNG (in the vlib or custom made) must
@ -36,6 +37,11 @@ pub fn (mut rng PRNG) bytes(bytes_needed int) ?[]byte {
return buffer
}
// read fills in `buf` with a maximum of `buf.len` random bytes
pub fn (mut rng PRNG) read(mut buf []byte) {
read_internal(mut rng, mut buf)
}
// u32n returns a uniformly distributed pseudorandom 32-bit signed positive `u32` in range `[0, max)`.
[inline]
pub fn (mut rng PRNG) u32n(max u32) ?u32 {
@ -233,6 +239,41 @@ pub fn (mut rng PRNG) f64_in_range(min f64, max f64) ?f64 {
return min + rng.f64n(max - min) ?
}
// ulid generates an Unique Lexicographically sortable IDentifier.
// See https://github.com/ulid/spec .
// Note: ULIDs can leak timing information, if you make them public, because
// you can infer the rate at which some resource is being created, like
// users or business transactions.
// (https://news.ycombinator.com/item?id=14526173)
pub fn (mut rng PRNG) ulid() string {
return internal_ulid_at_millisecond(mut rng, u64(time.utc().unix_time_milli()))
}
// ulid_at_millisecond does the same as `ulid` but takes a custom Unix millisecond timestamp via `unix_time_milli`.
pub fn (mut rng PRNG) ulid_at_millisecond(unix_time_milli u64) string {
return internal_ulid_at_millisecond(mut rng, unix_time_milli)
}
// string_from_set returns a string of length `len` containing random characters sampled from the given `charset`
pub fn (mut rng PRNG) string_from_set(charset string, len int) string {
return internal_string_from_set(mut rng, charset, len)
}
// string returns a string of length `len` containing random characters in range `[a-zA-Z]`.
pub fn (mut rng PRNG) string(len int) string {
return internal_string_from_set(mut rng, rand.english_letters, len)
}
// hex returns a hexadecimal number of length `len` containing random characters in range `[a-f0-9]`.
pub fn (mut rng PRNG) hex(len int) string {
return internal_string_from_set(mut rng, rand.hex_chars, len)
}
// ascii returns a random string of the printable ASCII characters with length `len`.
pub fn (mut rng PRNG) ascii(len int) string {
return internal_string_from_set(mut rng, rand.ascii_chars, len)
}
__global default_rng &PRNG
// new_default returns a new instance of the default RNG. If the seed is not provided, the current time will be used to seed the instance.
@ -391,3 +432,38 @@ const (
hex_chars = 'abcdef0123456789'
ascii_chars = '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~'
)
// ulid generates an Unique Lexicographically sortable IDentifier.
// See https://github.com/ulid/spec .
// Note: ULIDs can leak timing information, if you make them public, because
// you can infer the rate at which some resource is being created, like
// users or business transactions.
// (https://news.ycombinator.com/item?id=14526173)
pub fn ulid() string {
return internal_ulid_at_millisecond(mut default_rng, u64(time.utc().unix_time_milli()))
}
// ulid_at_millisecond does the same as `ulid` but takes a custom Unix millisecond timestamp via `unix_time_milli`.
pub fn ulid_at_millisecond(unix_time_milli u64) string {
return internal_ulid_at_millisecond(mut default_rng, unix_time_milli)
}
// string_from_set returns a string of length `len` containing random characters sampled from the given `charset`
pub fn string_from_set(charset string, len int) string {
return internal_string_from_set(mut default_rng, charset, len)
}
// string returns a string of length `len` containing random characters in range `[a-zA-Z]`.
pub fn string(len int) string {
return string_from_set(rand.english_letters, len)
}
// hex returns a hexadecimal number of length `len` containing random characters in range `[a-f0-9]`.
pub fn hex(len int) string {
return string_from_set(rand.hex_chars, len)
}
// ascii returns a random string of the printable ASCII characters with length `len`.
pub fn ascii(len int) string {
return string_from_set(rand.ascii_chars, len)
}

View File

@ -21,6 +21,28 @@ fn test_rand_bytes() ? {
dump(differences)
}
fn test_prng_rand_bytes() ? {
mut randoms := []string{}
mut rng := rand.get_current_rng()
for i in 0 .. 100 {
x := rng.bytes(i) ?.hex()
if x.len > 0 {
randoms << x
}
assert x.len == i * 2
}
mut differences := 0
for idx in 1 .. randoms.len {
start := randoms[idx]#[0..8]
prev_start := randoms[idx - 1]#[0..8]
if start != prev_start {
differences++
}
}
assert differences > 95 // normally around 98
dump(differences)
}
fn test_rand_read() ? {
max := 50
mut a := []byte{len: max}
@ -49,3 +71,33 @@ fn test_rand_read() ? {
dump(differences)
assert differences > 11700 // normally around 11758
}
fn test_prng_rand_read() ? {
max := 50
mut a := []byte{len: max}
mut differences := 0
mut rng := rand.get_current_rng()
for j in 1 .. max {
start := '00'.repeat(j)
for k in j + 1 .. max {
end := '00'.repeat(max - k)
middle := '00'.repeat(k - j)
// eprintln('> j: $j | k: $k | start: $start | middle: $middle | end: $end')
for i in 0 .. max {
a[i] = 0
}
assert a[j..k].hex() == middle
for i in 0 .. 10 {
rng.read(mut a[j..k])
// dump(a.hex())
assert a[0..j].hex() == start
assert a[k..].hex() == end
if a[j..k].hex() != middle {
differences++
}
}
}
}
dump(differences)
assert differences > 11700 // normally around 11758
}