encoding: add base58 support (#11288)

2021-08-23 22:22:46 -05:00 · 2021-08-23 22:22:46 -05:00 · 83e2a84c21
parent d78e7e3b2b
commit 83e2a84c21
3 changed files with 335 additions and 0 deletions
--- a/vlib/encoding/base58/alphabet.v
+++ b/vlib/encoding/base58/alphabet.v
@ -0,0 +1,65 @@
 module base58
 // alphabets is a map of common base58 alphabets
 pub const alphabets = init_alphabets()
 // init_alphabet instantiates the preconfigured `Alphabet`s and returns them as `map[string]Alphabet`.
 // This is a temporary function. Setting const alphabets to the value returned in this function
 // causes a C error right now.
 fn init_alphabets() map[string]Alphabet {
 	return {
 		'btc':    new_alphabet('123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz') or {
 			panic(@MOD + '.' + @FN + ': this should never happen')
 		}
 		'flickr': new_alphabet('123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ') or {
 			panic(@MOD + '.' + @FN + ': this should never happen')
 		}
 		'ripple': new_alphabet('rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz') or {
 			panic(@MOD + '.' + @FN + ': this should never happen')
 		}
 	}
 }
 // Alphabet is the series of characters that an input
 // will be encoded to and a decode table.
 struct Alphabet {
 mut:
 	decode []i8   = []i8{len: 128, init: -1}
 	encode []byte = []byte{len: 58}
 }
 // str returns an Alphabet encode table byte array as a string
 pub fn (alphabet Alphabet) str() string {
 	// i guess i had a brain fart here. Why would I actually use this code?!
 	// mut str := []byte{}
 	// for entry in alphabet.encode {
 	// 	str << entry
 	// }
 	// return str.bytestr()
 	return alphabet.encode.bytestr()
 }
 // new_alphabet instantiates an Alphabet object based on
 // the provided characters
 pub fn new_alphabet(str string) ?Alphabet {
 	if str.len != 58 {
 		return error(@MOD + '.' + @FN + ': string must be 58 characters in length')
 	}
 	mut ret := Alphabet{}
 	copy(ret.encode, str.bytes())
 	mut distinct := 0
 	for i, b in ret.encode {
 		if ret.decode[b] == -1 {
 			distinct++
 		}
 		ret.decode[b] = i8(i)
 	}
 	if distinct != 58 {
 		return error(@MOD + '.' + @FN + ': string must not contain repeating characters')
 	}
 	return ret
 }
--- a/vlib/encoding/base58/base58.v
+++ b/vlib/encoding/base58/base58.v
@ -0,0 +1,181 @@
 // algorthim is adapted from https://github.com/mr-tron/base58 under the MIT license
 module base58
 import math
 // encode_int encodes any integer type to base58 string with Bitcoin alphabet
 pub fn encode_int(input int) ?string {
 	return encode_int_walpha(input, alphabets['btc'])
 }
 // encode_int_walpha any integer type to base58 string with custom alphabet
 pub fn encode_int_walpha(input int, alphabet Alphabet) ?string {
 	if input <= 0 {
 		return error(@MOD + '.' + @FN + ': input must be greater than zero')
 	}
 	mut buffer := []byte{}
 	mut i := input
 	for i > 0 {
 		remainder := i % 58
 		buffer << alphabet.encode[i8(remainder)]
 		// This needs to be casted so byte inputs can
 		// be used. i8 because remainder will never be
 		// over 58.
 		i = i / 58
 	}
 	return buffer.reverse().bytestr()
 }
 // encode encodes byte array to base58 with Bitcoin alphabet
 pub fn encode(input string) string {
 	return encode_walpha(input, alphabets['btc'])
 }
 // encode_walpha encodes byte array to base58 with custom aplhabet
 pub fn encode_walpha(input string, alphabet Alphabet) string {
 	if input.len == 0 {
 		return ''
 	}
 	bin := input.bytes()
 	mut sz := bin.len
 	mut zcount := 0
 	for zcount < sz && bin[zcount] == 0 {
 		zcount++
 	}
 	// It is crucial to make this as short as possible, especially for
 	// the usual case of Bitcoin addresses
 	sz = zcount + (sz - zcount) * 555 / 406 + 1
 	// integer simplification of
 	// ceil(log(256)/log(58))
 	mut out := []byte{len: sz}
 	mut i := 0
 	mut high := 0
 	mut carry := u32(0)
 	high = sz - 1
 	for b in bin {
 		i = sz - 1
 		for carry = u32(b); i > high || carry != 0; i-- {
 			carry = carry + 256 * u32(out[i])
 			out[i] = byte(carry % 58)
 			carry /= 58
 		}
 		high = 1
 	}
 	// determine additional "zero-gap" in the buffer, aside from zcount
 	for i = zcount; i < sz && out[i] == 0; i++ {}
 	// now encode the values with actual alphabet in-place
 	val := out[i - zcount..]
 	sz = val.len
 	for i = 0; i < sz; i++ {
 		out[i] = alphabet.encode[val[i]]
 	}
 	return out[..sz].bytestr()
 }
 // decode_int decodes base58 string to an integer with Bitcoin alphabet
 pub fn decode_int(input string) ?int {
 	return decode_int_walpha(input, alphabets['btc'])
 }
 // decode_int_walpha decodes base58 string to an integer with custom alphabet
 pub fn decode_int_walpha(input string, alphabet Alphabet) ?int {
 	mut total := 0 // to hold the results
 	b58 := input.reverse()
 	for i, ch in b58 {
 		ch_i := alphabet.encode.bytestr().index_byte(ch)
 		if ch_i == -1 {
 			return error(@MOD + '.' + @FN +
 				': input string contains values not found in the provided alphabet')
 		}
 		val := ch_i * math.pow(58, i)
 		total += int(val)
 	}
 	return total
 }
 // decode decodes base58 string using the Bitcoin alphabet
 pub fn decode(str string) ?string {
 	return decode_walpha(str, alphabets['btc'])
 }
 // decode_walpha decodes base58 string using custom alphabet
 pub fn decode_walpha(str string, alphabet Alphabet) ?string {
 	if str.len == 0 {
 		return ''
 	}
 	zero := alphabet.encode[0]
 	b58sz := str.len
 	mut zcount := 0
 	for i := 0; i < b58sz && str[i] == zero; i++ {
 		zcount++
 	}
 	mut t := u64(0)
 	mut c := u64(0)
 	// the 32-bit algorithm stretches the result up to 2x
 	mut binu := []byte{len: 2 * ((b58sz * 406 / 555) + 1)}
 	mut outi := []u32{len: (b58sz + 3) / 4}
 	for _, r in str {
 		if r > 127 {
 			panic(@MOD + '.' + @FN +
 				': high-bit set on invalid digit; outside of ascii range ($r). This should never happen.')
 		}
 		if alphabet.decode[r] == -1 {
 			return error(@MOD + '.' + @FN + ': invalid base58 digit ($r)')
 		}
 		c = u64(alphabet.decode[r])
 		for j := outi.len - 1; j >= 0; j-- {
 			t = u64(outi[j]) * 58 + c
 			c = t >> 32
 			outi[j] = u32(t & 0xffffffff)
 		}
 	}
 	// initial mask depend on b58sz, on further loops it always starts at 24 bits
 	mut mask := (u32(b58sz % 4) * 8)
 	if mask == 0 {
 		mask = 32
 	}
 	mask -= 8
 	mut out_len := 0
 	for j := 0; j < outi.len; j++ {
 		for mask < 32 {
 			binu[out_len] = byte(outi[j] >> mask)
 			mask -= 8
 			out_len++
 		}
 		mask = 24
 	}
 	// find the most significant byte post-decode, if any
 	for msb := zcount; msb < binu.len; msb++ { // loop relies on u32 overflow
 		if binu[msb] > 0 {
 			return binu[msb - zcount..out_len].bytestr()
 		}
 	}
 	// it's all zeroes
 	return binu[..out_len].bytestr()
 }
--- a/vlib/encoding/base58/base58_test.v
+++ b/vlib/encoding/base58/base58_test.v
@ -0,0 +1,89 @@
 module base58
 fn main() {
 	test_encode_int() or {}
 	test_decode_int() or {}
 	test_encode_string()
 	test_fails() or {}
 }
 fn test_encode_int() ? {
 	a := 0x24 // should be 'd' in base58
 	assert encode_int(a) ? == 'd'
 	test_encode_int_walpha() ?
 }
 fn test_encode_int_walpha() ? {
 	// random alphabet
 	abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
 		panic(@MOD + '.' + @FN + ': this should never happen')
 	}
 	a := 0x24 // should be '_' in base58 with our custom alphabet
 	assert encode_int_walpha(a, abc) ? == '_'
 }
 fn test_decode_int() ? {
 	a := 'd'
 	assert decode_int(a) ? == 0x24
 	test_decode_int_walpha() ?
 }
 fn test_decode_int_walpha() ? {
 	abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
 		panic(@MOD + '.' + @FN + ': this should never happen')
 	}
 	a := '_'
 	assert decode_int_walpha(a, abc) ? == 0x24
 }
 fn test_encode_string() {
 	// should be 'TtaR6twpTGu8VpY' in base58 and '0P7yfPSL0pQh2L5' with our custom alphabet
 	a := 'lorem ipsum'
 	assert encode(a) == 'TtaR6twpTGu8VpY'
 	abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
 		panic(@MOD + '.' + @FN + ': this should never happen')
 	}
 	assert encode_walpha(a, abc) == '0P7yfPSL0pQh2L5'
 }
 fn test_decode_string() ? {
 	a := 'TtaR6twpTGu8VpY'
 	assert decode(a) ? == 'lorem ipsum'
 	abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or {
 		panic(@MOD + '.' + @FN + ': this should never happen')
 	}
 	b := '0P7yfPSL0pQh2L5'
 	assert decode_walpha(b, abc) ? == 'lorem ipsum'
 }
 fn test_fails() ? {
 	a := -238
 	b := 0
 	if z := encode_int(a) {
 		return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z')
 	}
 	if z := encode_int(b) {
 		return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z')
 	}
 	c := '!'
 	if z := decode_int(c) {
 		return error(@MOD + '.' + @FN + ': expected decode_int to fail, got $z')
 	}
 	if z := decode(c) {
 		return error(@MOD + '.' + @FN + ': expected decode to fail, got $z')
 	}
 	// repeating character
 	if abc := new_alphabet('aaaaafghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') {
 		return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc')
 	}
 	// more than 58 characters long
 	if abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUVWXYZ') {
 		return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc')
 	}
 }