base64: some optimizations

* Add a test for the base64 encoding/decoding of long strings (i.e. mainly memory allocation).

* Make vlib/encoding/base64/base64_memory_test.v resemble more test.v from https://github.com/kostya/benchmarks .

* base64: some optimizations, also add base64.encode_in_buffer and base64.decode_in_buffer .

* Fix tests passing static strings.

* Reduce time needed for base64_memory_test.v .

* Optimize encoding.base64.Index access too (it is static), which speeds up decoding.
pull/2558/head
Delyan Angelov 2019-10-26 18:20:36 +03:00 committed by Alexander Medvednikov
parent 272b0aec82
commit 2d05c906d5
3 changed files with 120 additions and 36 deletions

View File

@ -5,16 +5,50 @@
module base64 module base64
const ( const (
Index = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Index = [int(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62, 63, 62, 62, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 62, 63, 62, 62, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0,
0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51] 47, 48, 49, 50, 51]!!
EndingTable = [0, 2, 1]
EncodingTable = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
) )
/**
* decode - expects a base64 encoded string. Returns its decoded version.
* @param data - the encoded input string.
* @return the decoded version of the input string data.
* NB: if you need to decode many strings repeatedly, take a look at decode_in_buffer too.
*/
pub fn decode(data string) string { pub fn decode(data string) string {
buffer := malloc( data.len * 3 / 4 )
return tos(buffer, decode_in_buffer(data, mut buffer) )
}
/**
* decode - expects a string. Returns its base64 encoded version.
* @param data - the input string.
* @return the base64 encoded version of the input string.
* NB: base64 encoding returns a string that is ~ 4/3 larger than the input.
* NB: if you need to encode many strings repeatedly, take a look at encode_in_buffer too.
*/
pub fn encode(data string) string {
buffer := malloc( 4 * ((data.len + 2) / 3) )
return tos(buffer, encode_in_buffer(data, mut buffer))
}
/**
* decode_in_buffer - expects a string reference, and a buffer in which to store its decoded version.
* @param data - a reference/pointer to the input string that will be decoded.
* @param buffer - a reference/pointer to the buffer that will hold the result.
* The buffer should be large enough (i.e. 3/4 of the data.len, or larger) to hold the decoded data.
* @return the actual size of the decoded data in the buffer.
* NB: this function does NOT allocate new memory, and is suitable for handling very large strings.
*/
pub fn decode_in_buffer(data &string, buffer mut byteptr) int {
mut padding := 0 mut padding := 0
if data.ends_with('=') { if data.ends_with('=') {
if data.ends_with('==') { if data.ends_with('==') {
@ -29,52 +63,67 @@ pub fn decode(data string) string {
mut i := 0 mut i := 0
mut j := 0 mut j := 0
mut str := malloc(output_length) mut b := &byte(0)
mut d := &byte(0)
unsafe{
d = byteptr(data.str)
b = byteptr(buffer)
}
for i < input_length { for i < input_length {
mut char_a := 0 mut char_a := 0
mut char_b := 0 mut char_b := 0
mut char_c := 0 mut char_c := 0
mut char_d := 0 mut char_d := 0
if i < input_length { if i < input_length {
char_a = Index[int(data[i])] char_a = Index[d[i]]
i++ i++
} }
if i < input_length { if i < input_length {
char_b = Index[int(data[i])] char_b = Index[d[i]]
i++ i++
} }
if i < input_length { if i < input_length {
char_c = Index[int(data[i])] char_c = Index[d[i]]
i++ i++
} }
if i < input_length { if i < input_length {
char_d = Index[int(data[i])] char_d = Index[d[i]]
i++ i++
} }
decoded_bytes := (char_a << 18) | (char_b << 12) | (char_c << 6) | (char_d << 0) decoded_bytes := (char_a << 18) | (char_b << 12) | (char_c << 6) | (char_d << 0)
str[j] = decoded_bytes >> 16 b[j] = decoded_bytes >> 16
str[j+1] = (decoded_bytes >> 8) & 0xff b[j+1] = (decoded_bytes >> 8) & 0xff
str[j+2] = (decoded_bytes >> 0) & 0xff b[j+2] = (decoded_bytes >> 0) & 0xff
j += 3 j += 3
} }
return tos(str, output_length) return output_length
} }
const ( /**
EncodingTable = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' * encode_in_buffer - expects a string reference, and a buffer in which to store its base64 encoded version.
) * @param data - a reference/pointer to the input string.
* @param buffer - a reference/pointer to the buffer that will hold the result.
pub fn encode(data string) string { * The buffer should be large enough (i.e. 4/3 of the data.len, or larger) to hold the encoded data.
* @return the actual size of the encoded data in the buffer.
* NB: this function does NOT allocate new memory, and is suitable for handling very large strings.
*/
pub fn encode_in_buffer(data &string, buffer mut byteptr) int {
input_length := data.len input_length := data.len
output_length := 4 * ((input_length + 2) / 3) output_length := 4 * ((input_length + 2) / 3)
mut i := 0 mut i := 0
mut j := 0 mut j := 0
mut str := malloc(output_length)
mut d := &byte(0)
mut b := &byte(0)
mut etable := &byte(0)
unsafe{
d = &byte(data.str)
b = &byte(buffer)
etable = &byte(EncodingTable.str)
}
for i < input_length { for i < input_length {
mut octet_a := 0 mut octet_a := 0
@ -82,31 +131,30 @@ pub fn encode(data string) string {
mut octet_c := 0 mut octet_c := 0
if i < input_length { if i < input_length {
octet_a = int(data[i]) octet_a = int(d[i])
i++ i++
} }
if i < input_length { if i < input_length {
octet_b = int(data[i]) octet_b = int(d[i])
i++ i++
} }
if i < input_length { if i < input_length {
octet_c = int(data[i]) octet_c = int(d[i])
i++ i++
} }
triple := ((octet_a << 0x10) + (octet_b << 0x08) + octet_c) triple := ((int(octet_a) << 0x10) + (int(octet_b) << 0x08) + int(octet_c))
str[j+0] = EncodingTable[(triple >> 3 * 6) & 63] // 63 is 0x3F b[j] = etable[ (triple >> 3 * 6) & 63 ] // 63 is 0x3F
str[j+1] = EncodingTable[(triple >> 2 * 6) & 63] b[j+1] = etable[ (triple >> 2 * 6) & 63 ]
str[j+2] = EncodingTable[(triple >> 1 * 6) & 63] b[j+2] = etable[ (triple >> 1 * 6) & 63 ]
str[j+3] = EncodingTable[(triple >> 0 * 6) & 63] b[j+3] = etable[ (triple >> 0 * 6) & 63 ]
j += 4 j += 4
} }
mod_table := [0, 2, 1] padding_length := EndingTable[input_length % 3]
for i = 0; i < mod_table[input_length % 3]; i++ { for i = 0; i < padding_length; i++ {
str[output_length - 1 - i] = `=` b[output_length - 1 - i] = `=`
} }
return output_length
return tos(str, output_length)
} }

View File

@ -0,0 +1,32 @@
import encoding.base64
fn test_long_encoding(){
repeats := 1000
input_size := 3000
s_original := 'a'.repeat(input_size)
s_encoded := base64.encode(s_original)
s_decoded := base64.decode(s_encoded)
assert s_encoded.len > s_original.len
assert s_original == s_decoded
mut s := 0
ebuffer := malloc( s_encoded.len )
for i := 0; i < repeats; i++ {
resultsize := base64.encode_in_buffer(s_original, mut ebuffer)
s += resultsize
assert resultsize == s_encoded.len
}
dbuffer := malloc( s_decoded.len )
for i := 0; i < repeats; i++ {
resultsize := base64.decode_in_buffer(s_encoded, mut dbuffer)
s += resultsize
assert resultsize == s_decoded.len
}
println( 'Final s: $s' )
// assert s == 39147008
}

View File

@ -31,11 +31,16 @@ const (
TestPair{'asure.', 'YXN1cmUu'}, TestPair{'asure.', 'YXN1cmUu'},
TestPair{'sure.', 'c3VyZS4='}, TestPair{'sure.', 'c3VyZS4='},
] ]
man_pair = TestPair{
'Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.',
'TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4='
}
) )
fn test_decode() { fn test_decode() {
assert base64.decode('TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=') assert base64.decode(man_pair.encoded) == man_pair.decoded
== 'Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.'
// Test for incorrect padding. // Test for incorrect padding.
assert base64.decode('aGk') == 'hi' assert base64.decode('aGk') == 'hi'
@ -52,8 +57,7 @@ fn test_decode() {
} }
fn test_encode() { fn test_encode() {
assert base64.encode('Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.') assert base64.encode(man_pair.decoded) == man_pair.encoded
== 'TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4='
for i, p in pairs { for i, p in pairs {
got := base64.encode(p.decoded) got := base64.encode(p.decoded)