From 83e2a84c21d34869248d57140e94442a1db0e514 Mon Sep 17 00:00:00 2001 From: Adam Oates <31167933+islonely@users.noreply.github.com> Date: Mon, 23 Aug 2021 22:22:46 -0500 Subject: [PATCH] encoding: add base58 support (#11288) --- vlib/encoding/base58/alphabet.v | 65 +++++++++++ vlib/encoding/base58/base58.v | 181 +++++++++++++++++++++++++++++ vlib/encoding/base58/base58_test.v | 89 ++++++++++++++ 3 files changed, 335 insertions(+) create mode 100644 vlib/encoding/base58/alphabet.v create mode 100644 vlib/encoding/base58/base58.v create mode 100644 vlib/encoding/base58/base58_test.v diff --git a/vlib/encoding/base58/alphabet.v b/vlib/encoding/base58/alphabet.v new file mode 100644 index 0000000000..44d4fc30f9 --- /dev/null +++ b/vlib/encoding/base58/alphabet.v @@ -0,0 +1,65 @@ +module base58 + +// alphabets is a map of common base58 alphabets +pub const alphabets = init_alphabets() + +// init_alphabet instantiates the preconfigured `Alphabet`s and returns them as `map[string]Alphabet`. +// This is a temporary function. Setting const alphabets to the value returned in this function +// causes a C error right now. +fn init_alphabets() map[string]Alphabet { + return { + 'btc': new_alphabet('123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz') or { + panic(@MOD + '.' + @FN + ': this should never happen') + } + 'flickr': new_alphabet('123456789abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ') or { + panic(@MOD + '.' + @FN + ': this should never happen') + } + 'ripple': new_alphabet('rpshnaf39wBUDNEGHJKLM4PQRST7VWXYZ2bcdeCg65jkm8oFqi1tuvAxyz') or { + panic(@MOD + '.' + @FN + ': this should never happen') + } + } +} + +// Alphabet is the series of characters that an input +// will be encoded to and a decode table. +struct Alphabet { +mut: + decode []i8 = []i8{len: 128, init: -1} + encode []byte = []byte{len: 58} +} + +// str returns an Alphabet encode table byte array as a string +pub fn (alphabet Alphabet) str() string { + // i guess i had a brain fart here. Why would I actually use this code?! + // mut str := []byte{} + // for entry in alphabet.encode { + // str << entry + // } + // return str.bytestr() + return alphabet.encode.bytestr() +} + +// new_alphabet instantiates an Alphabet object based on +// the provided characters +pub fn new_alphabet(str string) ?Alphabet { + if str.len != 58 { + return error(@MOD + '.' + @FN + ': string must be 58 characters in length') + } + + mut ret := Alphabet{} + copy(ret.encode, str.bytes()) + + mut distinct := 0 + for i, b in ret.encode { + if ret.decode[b] == -1 { + distinct++ + } + ret.decode[b] = i8(i) + } + + if distinct != 58 { + return error(@MOD + '.' + @FN + ': string must not contain repeating characters') + } + + return ret +} diff --git a/vlib/encoding/base58/base58.v b/vlib/encoding/base58/base58.v new file mode 100644 index 0000000000..fb2ff72027 --- /dev/null +++ b/vlib/encoding/base58/base58.v @@ -0,0 +1,181 @@ +// algorthim is adapted from https://github.com/mr-tron/base58 under the MIT license + +module base58 + +import math + +// encode_int encodes any integer type to base58 string with Bitcoin alphabet +pub fn encode_int(input int) ?string { + return encode_int_walpha(input, alphabets['btc']) +} + +// encode_int_walpha any integer type to base58 string with custom alphabet +pub fn encode_int_walpha(input int, alphabet Alphabet) ?string { + if input <= 0 { + return error(@MOD + '.' + @FN + ': input must be greater than zero') + } + + mut buffer := []byte{} + + mut i := input + for i > 0 { + remainder := i % 58 + buffer << alphabet.encode[i8(remainder)] + // This needs to be casted so byte inputs can + // be used. i8 because remainder will never be + // over 58. + i = i / 58 + } + + return buffer.reverse().bytestr() +} + +// encode encodes byte array to base58 with Bitcoin alphabet +pub fn encode(input string) string { + return encode_walpha(input, alphabets['btc']) +} + +// encode_walpha encodes byte array to base58 with custom aplhabet +pub fn encode_walpha(input string, alphabet Alphabet) string { + if input.len == 0 { + return '' + } + + bin := input.bytes() + mut sz := bin.len + + mut zcount := 0 + for zcount < sz && bin[zcount] == 0 { + zcount++ + } + + // It is crucial to make this as short as possible, especially for + // the usual case of Bitcoin addresses + sz = zcount + (sz - zcount) * 555 / 406 + 1 + // integer simplification of + // ceil(log(256)/log(58)) + + mut out := []byte{len: sz} + mut i := 0 + mut high := 0 + mut carry := u32(0) + + high = sz - 1 + for b in bin { + i = sz - 1 + for carry = u32(b); i > high || carry != 0; i-- { + carry = carry + 256 * u32(out[i]) + out[i] = byte(carry % 58) + carry /= 58 + } + high = 1 + } + + // determine additional "zero-gap" in the buffer, aside from zcount + for i = zcount; i < sz && out[i] == 0; i++ {} + + // now encode the values with actual alphabet in-place + val := out[i - zcount..] + sz = val.len + for i = 0; i < sz; i++ { + out[i] = alphabet.encode[val[i]] + } + + return out[..sz].bytestr() +} + +// decode_int decodes base58 string to an integer with Bitcoin alphabet +pub fn decode_int(input string) ?int { + return decode_int_walpha(input, alphabets['btc']) +} + +// decode_int_walpha decodes base58 string to an integer with custom alphabet +pub fn decode_int_walpha(input string, alphabet Alphabet) ?int { + mut total := 0 // to hold the results + b58 := input.reverse() + for i, ch in b58 { + ch_i := alphabet.encode.bytestr().index_byte(ch) + if ch_i == -1 { + return error(@MOD + '.' + @FN + + ': input string contains values not found in the provided alphabet') + } + + val := ch_i * math.pow(58, i) + + total += int(val) + } + + return total +} + +// decode decodes base58 string using the Bitcoin alphabet +pub fn decode(str string) ?string { + return decode_walpha(str, alphabets['btc']) +} + +// decode_walpha decodes base58 string using custom alphabet +pub fn decode_walpha(str string, alphabet Alphabet) ?string { + if str.len == 0 { + return '' + } + + zero := alphabet.encode[0] + b58sz := str.len + + mut zcount := 0 + for i := 0; i < b58sz && str[i] == zero; i++ { + zcount++ + } + + mut t := u64(0) + mut c := u64(0) + + // the 32-bit algorithm stretches the result up to 2x + mut binu := []byte{len: 2 * ((b58sz * 406 / 555) + 1)} + mut outi := []u32{len: (b58sz + 3) / 4} + + for _, r in str { + if r > 127 { + panic(@MOD + '.' + @FN + + ': high-bit set on invalid digit; outside of ascii range ($r). This should never happen.') + } + if alphabet.decode[r] == -1 { + return error(@MOD + '.' + @FN + ': invalid base58 digit ($r)') + } + + c = u64(alphabet.decode[r]) + + for j := outi.len - 1; j >= 0; j-- { + t = u64(outi[j]) * 58 + c + c = t >> 32 + outi[j] = u32(t & 0xffffffff) + } + } + + // initial mask depend on b58sz, on further loops it always starts at 24 bits + mut mask := (u32(b58sz % 4) * 8) + if mask == 0 { + mask = 32 + } + mask -= 8 + + mut out_len := 0 + for j := 0; j < outi.len; j++ { + for mask < 32 { + binu[out_len] = byte(outi[j] >> mask) + mask -= 8 + out_len++ + } + mask = 24 + } + + // find the most significant byte post-decode, if any + for msb := zcount; msb < binu.len; msb++ { // loop relies on u32 overflow + if binu[msb] > 0 { + return binu[msb - zcount..out_len].bytestr() + } + } + + // it's all zeroes + return binu[..out_len].bytestr() +} diff --git a/vlib/encoding/base58/base58_test.v b/vlib/encoding/base58/base58_test.v new file mode 100644 index 0000000000..5cbd37b5bd --- /dev/null +++ b/vlib/encoding/base58/base58_test.v @@ -0,0 +1,89 @@ +module base58 + +fn main() { + test_encode_int() or {} + test_decode_int() or {} + test_encode_string() + test_fails() or {} +} + +fn test_encode_int() ? { + a := 0x24 // should be 'd' in base58 + assert encode_int(a) ? == 'd' + + test_encode_int_walpha() ? +} + +fn test_encode_int_walpha() ? { + // random alphabet + abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or { + panic(@MOD + '.' + @FN + ': this should never happen') + } + a := 0x24 // should be '_' in base58 with our custom alphabet + assert encode_int_walpha(a, abc) ? == '_' +} + +fn test_decode_int() ? { + a := 'd' + assert decode_int(a) ? == 0x24 + + test_decode_int_walpha() ? +} + +fn test_decode_int_walpha() ? { + abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or { + panic(@MOD + '.' + @FN + ': this should never happen') + } + a := '_' + assert decode_int_walpha(a, abc) ? == 0x24 +} + +fn test_encode_string() { + // should be 'TtaR6twpTGu8VpY' in base58 and '0P7yfPSL0pQh2L5' with our custom alphabet + a := 'lorem ipsum' + assert encode(a) == 'TtaR6twpTGu8VpY' + + abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or { + panic(@MOD + '.' + @FN + ': this should never happen') + } + assert encode_walpha(a, abc) == '0P7yfPSL0pQh2L5' +} + +fn test_decode_string() ? { + a := 'TtaR6twpTGu8VpY' + assert decode(a) ? == 'lorem ipsum' + + abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') or { + panic(@MOD + '.' + @FN + ': this should never happen') + } + b := '0P7yfPSL0pQh2L5' + assert decode_walpha(b, abc) ? == 'lorem ipsum' +} + +fn test_fails() ? { + a := -238 + b := 0 + if z := encode_int(a) { + return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z') + } + if z := encode_int(b) { + return error(@MOD + '.' + @FN + ': expected encode_int to fail, got $z') + } + + c := '!' + if z := decode_int(c) { + return error(@MOD + '.' + @FN + ': expected decode_int to fail, got $z') + } + if z := decode(c) { + return error(@MOD + '.' + @FN + ': expected decode to fail, got $z') + } + + // repeating character + if abc := new_alphabet('aaaaafghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUV') { + return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc') + } + // more than 58 characters long + if abc := new_alphabet('abcdefghij\$lmnopqrstuvwxyz0123456789_ABCDEFGHIJLMNOPQRSTUVWXYZ') { + return error(@MOD + '.' + @FN + ': expected new_alphabet to fail, got $abc') + } +}