From 2d05c906d57669a99d86a70a358b84ebfe045050 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Sat, 26 Oct 2019 18:20:36 +0300 Subject: [PATCH] base64: some optimizations * Add a test for the base64 encoding/decoding of long strings (i.e. mainly memory allocation). * Make vlib/encoding/base64/base64_memory_test.v resemble more test.v from https://github.com/kostya/benchmarks . * base64: some optimizations, also add base64.encode_in_buffer and base64.decode_in_buffer . * Fix tests passing static strings. * Reduce time needed for base64_memory_test.v . * Optimize encoding.base64.Index access too (it is static), which speeds up decoding. --- vlib/encoding/base64/base64.v | 112 +++++++++++++++------- vlib/encoding/base64/base64_memory_test.v | 32 +++++++ vlib/encoding/base64/base64_test.v | 12 ++- 3 files changed, 120 insertions(+), 36 deletions(-) create mode 100644 vlib/encoding/base64/base64_memory_test.v diff --git a/vlib/encoding/base64/base64.v b/vlib/encoding/base64/base64.v index db1fd66170..b3c844116b 100644 --- a/vlib/encoding/base64/base64.v +++ b/vlib/encoding/base64/base64.v @@ -5,16 +5,50 @@ module base64 const ( - Index = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Index = [int(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 63, 62, 62, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51] + 47, 48, 49, 50, 51]!! + + EndingTable = [0, 2, 1] + EncodingTable = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' ) +/** + * decode - expects a base64 encoded string. Returns its decoded version. + * @param data - the encoded input string. + * @return the decoded version of the input string data. + * NB: if you need to decode many strings repeatedly, take a look at decode_in_buffer too. + */ pub fn decode(data string) string { + buffer := malloc( data.len * 3 / 4 ) + return tos(buffer, decode_in_buffer(data, mut buffer) ) +} + +/** + * decode - expects a string. Returns its base64 encoded version. + * @param data - the input string. + * @return the base64 encoded version of the input string. + * NB: base64 encoding returns a string that is ~ 4/3 larger than the input. + * NB: if you need to encode many strings repeatedly, take a look at encode_in_buffer too. + */ +pub fn encode(data string) string { + buffer := malloc( 4 * ((data.len + 2) / 3) ) + return tos(buffer, encode_in_buffer(data, mut buffer)) +} + +/** + * decode_in_buffer - expects a string reference, and a buffer in which to store its decoded version. + * @param data - a reference/pointer to the input string that will be decoded. + * @param buffer - a reference/pointer to the buffer that will hold the result. + * The buffer should be large enough (i.e. 3/4 of the data.len, or larger) to hold the decoded data. + * @return the actual size of the decoded data in the buffer. + * NB: this function does NOT allocate new memory, and is suitable for handling very large strings. + */ +pub fn decode_in_buffer(data &string, buffer mut byteptr) int { mut padding := 0 if data.ends_with('=') { if data.ends_with('==') { @@ -29,52 +63,67 @@ pub fn decode(data string) string { mut i := 0 mut j := 0 - mut str := malloc(output_length) + mut b := &byte(0) + mut d := &byte(0) + unsafe{ + d = byteptr(data.str) + b = byteptr(buffer) + } for i < input_length { mut char_a := 0 mut char_b := 0 mut char_c := 0 mut char_d := 0 - if i < input_length { - char_a = Index[int(data[i])] + char_a = Index[d[i]] i++ } if i < input_length { - char_b = Index[int(data[i])] + char_b = Index[d[i]] i++ } if i < input_length { - char_c = Index[int(data[i])] + char_c = Index[d[i]] i++ } if i < input_length { - char_d = Index[int(data[i])] + char_d = Index[d[i]] i++ } decoded_bytes := (char_a << 18) | (char_b << 12) | (char_c << 6) | (char_d << 0) - str[j] = decoded_bytes >> 16 - str[j+1] = (decoded_bytes >> 8) & 0xff - str[j+2] = (decoded_bytes >> 0) & 0xff - + b[j] = decoded_bytes >> 16 + b[j+1] = (decoded_bytes >> 8) & 0xff + b[j+2] = (decoded_bytes >> 0) & 0xff j += 3 } - return tos(str, output_length) + return output_length } -const ( - EncodingTable = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -) - -pub fn encode(data string) string { +/** + * encode_in_buffer - expects a string reference, and a buffer in which to store its base64 encoded version. + * @param data - a reference/pointer to the input string. + * @param buffer - a reference/pointer to the buffer that will hold the result. + * The buffer should be large enough (i.e. 4/3 of the data.len, or larger) to hold the encoded data. + * @return the actual size of the encoded data in the buffer. + * NB: this function does NOT allocate new memory, and is suitable for handling very large strings. + */ +pub fn encode_in_buffer(data &string, buffer mut byteptr) int { input_length := data.len output_length := 4 * ((input_length + 2) / 3) mut i := 0 mut j := 0 - mut str := malloc(output_length) + + mut d := &byte(0) + mut b := &byte(0) + mut etable := &byte(0) + unsafe{ + d = &byte(data.str) + b = &byte(buffer) + etable = &byte(EncodingTable.str) + } for i < input_length { mut octet_a := 0 @@ -82,31 +131,30 @@ pub fn encode(data string) string { mut octet_c := 0 if i < input_length { - octet_a = int(data[i]) + octet_a = int(d[i]) i++ } if i < input_length { - octet_b = int(data[i]) + octet_b = int(d[i]) i++ } if i < input_length { - octet_c = int(data[i]) + octet_c = int(d[i]) i++ } - triple := ((octet_a << 0x10) + (octet_b << 0x08) + octet_c) + triple := ((int(octet_a) << 0x10) + (int(octet_b) << 0x08) + int(octet_c)) - str[j+0] = EncodingTable[(triple >> 3 * 6) & 63] // 63 is 0x3F - str[j+1] = EncodingTable[(triple >> 2 * 6) & 63] - str[j+2] = EncodingTable[(triple >> 1 * 6) & 63] - str[j+3] = EncodingTable[(triple >> 0 * 6) & 63] + b[j] = etable[ (triple >> 3 * 6) & 63 ] // 63 is 0x3F + b[j+1] = etable[ (triple >> 2 * 6) & 63 ] + b[j+2] = etable[ (triple >> 1 * 6) & 63 ] + b[j+3] = etable[ (triple >> 0 * 6) & 63 ] j += 4 } - mod_table := [0, 2, 1] - for i = 0; i < mod_table[input_length % 3]; i++ { - str[output_length - 1 - i] = `=` + padding_length := EndingTable[input_length % 3] + for i = 0; i < padding_length; i++ { + b[output_length - 1 - i] = `=` } - - return tos(str, output_length) + return output_length } diff --git a/vlib/encoding/base64/base64_memory_test.v b/vlib/encoding/base64/base64_memory_test.v new file mode 100644 index 0000000000..c7e71f1644 --- /dev/null +++ b/vlib/encoding/base64/base64_memory_test.v @@ -0,0 +1,32 @@ +import encoding.base64 + +fn test_long_encoding(){ + repeats := 1000 + input_size := 3000 + + s_original := 'a'.repeat(input_size) + s_encoded := base64.encode(s_original) + s_decoded := base64.decode(s_encoded) + + assert s_encoded.len > s_original.len + assert s_original == s_decoded + + mut s := 0 + + ebuffer := malloc( s_encoded.len ) + for i := 0; i < repeats; i++ { + resultsize := base64.encode_in_buffer(s_original, mut ebuffer) + s += resultsize + assert resultsize == s_encoded.len + } + + dbuffer := malloc( s_decoded.len ) + for i := 0; i < repeats; i++ { + resultsize := base64.decode_in_buffer(s_encoded, mut dbuffer) + s += resultsize + assert resultsize == s_decoded.len + } + + println( 'Final s: $s' ) + // assert s == 39147008 +} diff --git a/vlib/encoding/base64/base64_test.v b/vlib/encoding/base64/base64_test.v index e3257dc36a..1898ccb38b 100644 --- a/vlib/encoding/base64/base64_test.v +++ b/vlib/encoding/base64/base64_test.v @@ -31,11 +31,16 @@ const ( TestPair{'asure.', 'YXN1cmUu'}, TestPair{'sure.', 'c3VyZS4='}, ] + + man_pair = TestPair{ + 'Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.', + 'TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=' + } + ) fn test_decode() { - assert base64.decode('TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=') - == 'Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.' + assert base64.decode(man_pair.encoded) == man_pair.decoded // Test for incorrect padding. assert base64.decode('aGk') == 'hi' @@ -52,8 +57,7 @@ fn test_decode() { } fn test_encode() { - assert base64.encode('Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.') - == 'TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=' + assert base64.encode(man_pair.decoded) == man_pair.encoded for i, p in pairs { got := base64.encode(p.decoded)