base64: some optimizations
* Add a test for the base64 encoding/decoding of long strings (i.e. mainly memory allocation). * Make vlib/encoding/base64/base64_memory_test.v resemble more test.v from https://github.com/kostya/benchmarks . * base64: some optimizations, also add base64.encode_in_buffer and base64.decode_in_buffer . * Fix tests passing static strings. * Reduce time needed for base64_memory_test.v . * Optimize encoding.base64.Index access too (it is static), which speeds up decoding.pull/2558/head
parent
272b0aec82
commit
2d05c906d5
|
@ -5,16 +5,50 @@
|
||||||
module base64
|
module base64
|
||||||
|
|
||||||
const (
|
const (
|
||||||
Index = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
Index = [int(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
62, 63, 62, 62, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0,
|
62, 63, 62, 62, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
|
||||||
17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29,
|
17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29,
|
||||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
|
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
|
||||||
47, 48, 49, 50, 51]
|
47, 48, 49, 50, 51]!!
|
||||||
|
|
||||||
|
EndingTable = [0, 2, 1]
|
||||||
|
EncodingTable = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* decode - expects a base64 encoded string. Returns its decoded version.
|
||||||
|
* @param data - the encoded input string.
|
||||||
|
* @return the decoded version of the input string data.
|
||||||
|
* NB: if you need to decode many strings repeatedly, take a look at decode_in_buffer too.
|
||||||
|
*/
|
||||||
pub fn decode(data string) string {
|
pub fn decode(data string) string {
|
||||||
|
buffer := malloc( data.len * 3 / 4 )
|
||||||
|
return tos(buffer, decode_in_buffer(data, mut buffer) )
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* decode - expects a string. Returns its base64 encoded version.
|
||||||
|
* @param data - the input string.
|
||||||
|
* @return the base64 encoded version of the input string.
|
||||||
|
* NB: base64 encoding returns a string that is ~ 4/3 larger than the input.
|
||||||
|
* NB: if you need to encode many strings repeatedly, take a look at encode_in_buffer too.
|
||||||
|
*/
|
||||||
|
pub fn encode(data string) string {
|
||||||
|
buffer := malloc( 4 * ((data.len + 2) / 3) )
|
||||||
|
return tos(buffer, encode_in_buffer(data, mut buffer))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* decode_in_buffer - expects a string reference, and a buffer in which to store its decoded version.
|
||||||
|
* @param data - a reference/pointer to the input string that will be decoded.
|
||||||
|
* @param buffer - a reference/pointer to the buffer that will hold the result.
|
||||||
|
* The buffer should be large enough (i.e. 3/4 of the data.len, or larger) to hold the decoded data.
|
||||||
|
* @return the actual size of the decoded data in the buffer.
|
||||||
|
* NB: this function does NOT allocate new memory, and is suitable for handling very large strings.
|
||||||
|
*/
|
||||||
|
pub fn decode_in_buffer(data &string, buffer mut byteptr) int {
|
||||||
mut padding := 0
|
mut padding := 0
|
||||||
if data.ends_with('=') {
|
if data.ends_with('=') {
|
||||||
if data.ends_with('==') {
|
if data.ends_with('==') {
|
||||||
|
@ -29,52 +63,67 @@ pub fn decode(data string) string {
|
||||||
|
|
||||||
mut i := 0
|
mut i := 0
|
||||||
mut j := 0
|
mut j := 0
|
||||||
mut str := malloc(output_length)
|
mut b := &byte(0)
|
||||||
|
mut d := &byte(0)
|
||||||
|
unsafe{
|
||||||
|
d = byteptr(data.str)
|
||||||
|
b = byteptr(buffer)
|
||||||
|
}
|
||||||
|
|
||||||
for i < input_length {
|
for i < input_length {
|
||||||
mut char_a := 0
|
mut char_a := 0
|
||||||
mut char_b := 0
|
mut char_b := 0
|
||||||
mut char_c := 0
|
mut char_c := 0
|
||||||
mut char_d := 0
|
mut char_d := 0
|
||||||
|
|
||||||
if i < input_length {
|
if i < input_length {
|
||||||
char_a = Index[int(data[i])]
|
char_a = Index[d[i]]
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
if i < input_length {
|
if i < input_length {
|
||||||
char_b = Index[int(data[i])]
|
char_b = Index[d[i]]
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
if i < input_length {
|
if i < input_length {
|
||||||
char_c = Index[int(data[i])]
|
char_c = Index[d[i]]
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
if i < input_length {
|
if i < input_length {
|
||||||
char_d = Index[int(data[i])]
|
char_d = Index[d[i]]
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
decoded_bytes := (char_a << 18) | (char_b << 12) | (char_c << 6) | (char_d << 0)
|
decoded_bytes := (char_a << 18) | (char_b << 12) | (char_c << 6) | (char_d << 0)
|
||||||
str[j] = decoded_bytes >> 16
|
b[j] = decoded_bytes >> 16
|
||||||
str[j+1] = (decoded_bytes >> 8) & 0xff
|
b[j+1] = (decoded_bytes >> 8) & 0xff
|
||||||
str[j+2] = (decoded_bytes >> 0) & 0xff
|
b[j+2] = (decoded_bytes >> 0) & 0xff
|
||||||
|
|
||||||
j += 3
|
j += 3
|
||||||
}
|
}
|
||||||
return tos(str, output_length)
|
return output_length
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
/**
|
||||||
EncodingTable = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
|
* encode_in_buffer - expects a string reference, and a buffer in which to store its base64 encoded version.
|
||||||
)
|
* @param data - a reference/pointer to the input string.
|
||||||
|
* @param buffer - a reference/pointer to the buffer that will hold the result.
|
||||||
pub fn encode(data string) string {
|
* The buffer should be large enough (i.e. 4/3 of the data.len, or larger) to hold the encoded data.
|
||||||
|
* @return the actual size of the encoded data in the buffer.
|
||||||
|
* NB: this function does NOT allocate new memory, and is suitable for handling very large strings.
|
||||||
|
*/
|
||||||
|
pub fn encode_in_buffer(data &string, buffer mut byteptr) int {
|
||||||
input_length := data.len
|
input_length := data.len
|
||||||
output_length := 4 * ((input_length + 2) / 3)
|
output_length := 4 * ((input_length + 2) / 3)
|
||||||
|
|
||||||
mut i := 0
|
mut i := 0
|
||||||
mut j := 0
|
mut j := 0
|
||||||
mut str := malloc(output_length)
|
|
||||||
|
mut d := &byte(0)
|
||||||
|
mut b := &byte(0)
|
||||||
|
mut etable := &byte(0)
|
||||||
|
unsafe{
|
||||||
|
d = &byte(data.str)
|
||||||
|
b = &byte(buffer)
|
||||||
|
etable = &byte(EncodingTable.str)
|
||||||
|
}
|
||||||
|
|
||||||
for i < input_length {
|
for i < input_length {
|
||||||
mut octet_a := 0
|
mut octet_a := 0
|
||||||
|
@ -82,31 +131,30 @@ pub fn encode(data string) string {
|
||||||
mut octet_c := 0
|
mut octet_c := 0
|
||||||
|
|
||||||
if i < input_length {
|
if i < input_length {
|
||||||
octet_a = int(data[i])
|
octet_a = int(d[i])
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
if i < input_length {
|
if i < input_length {
|
||||||
octet_b = int(data[i])
|
octet_b = int(d[i])
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
if i < input_length {
|
if i < input_length {
|
||||||
octet_c = int(data[i])
|
octet_c = int(d[i])
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
triple := ((octet_a << 0x10) + (octet_b << 0x08) + octet_c)
|
triple := ((int(octet_a) << 0x10) + (int(octet_b) << 0x08) + int(octet_c))
|
||||||
|
|
||||||
str[j+0] = EncodingTable[(triple >> 3 * 6) & 63] // 63 is 0x3F
|
b[j] = etable[ (triple >> 3 * 6) & 63 ] // 63 is 0x3F
|
||||||
str[j+1] = EncodingTable[(triple >> 2 * 6) & 63]
|
b[j+1] = etable[ (triple >> 2 * 6) & 63 ]
|
||||||
str[j+2] = EncodingTable[(triple >> 1 * 6) & 63]
|
b[j+2] = etable[ (triple >> 1 * 6) & 63 ]
|
||||||
str[j+3] = EncodingTable[(triple >> 0 * 6) & 63]
|
b[j+3] = etable[ (triple >> 0 * 6) & 63 ]
|
||||||
j += 4
|
j += 4
|
||||||
}
|
}
|
||||||
|
|
||||||
mod_table := [0, 2, 1]
|
padding_length := EndingTable[input_length % 3]
|
||||||
for i = 0; i < mod_table[input_length % 3]; i++ {
|
for i = 0; i < padding_length; i++ {
|
||||||
str[output_length - 1 - i] = `=`
|
b[output_length - 1 - i] = `=`
|
||||||
}
|
}
|
||||||
|
return output_length
|
||||||
return tos(str, output_length)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
import encoding.base64
|
||||||
|
|
||||||
|
fn test_long_encoding(){
|
||||||
|
repeats := 1000
|
||||||
|
input_size := 3000
|
||||||
|
|
||||||
|
s_original := 'a'.repeat(input_size)
|
||||||
|
s_encoded := base64.encode(s_original)
|
||||||
|
s_decoded := base64.decode(s_encoded)
|
||||||
|
|
||||||
|
assert s_encoded.len > s_original.len
|
||||||
|
assert s_original == s_decoded
|
||||||
|
|
||||||
|
mut s := 0
|
||||||
|
|
||||||
|
ebuffer := malloc( s_encoded.len )
|
||||||
|
for i := 0; i < repeats; i++ {
|
||||||
|
resultsize := base64.encode_in_buffer(s_original, mut ebuffer)
|
||||||
|
s += resultsize
|
||||||
|
assert resultsize == s_encoded.len
|
||||||
|
}
|
||||||
|
|
||||||
|
dbuffer := malloc( s_decoded.len )
|
||||||
|
for i := 0; i < repeats; i++ {
|
||||||
|
resultsize := base64.decode_in_buffer(s_encoded, mut dbuffer)
|
||||||
|
s += resultsize
|
||||||
|
assert resultsize == s_decoded.len
|
||||||
|
}
|
||||||
|
|
||||||
|
println( 'Final s: $s' )
|
||||||
|
// assert s == 39147008
|
||||||
|
}
|
|
@ -31,11 +31,16 @@ const (
|
||||||
TestPair{'asure.', 'YXN1cmUu'},
|
TestPair{'asure.', 'YXN1cmUu'},
|
||||||
TestPair{'sure.', 'c3VyZS4='},
|
TestPair{'sure.', 'c3VyZS4='},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
man_pair = TestPair{
|
||||||
|
'Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.',
|
||||||
|
'TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4='
|
||||||
|
}
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
fn test_decode() {
|
fn test_decode() {
|
||||||
assert base64.decode('TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=')
|
assert base64.decode(man_pair.encoded) == man_pair.decoded
|
||||||
== 'Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.'
|
|
||||||
|
|
||||||
// Test for incorrect padding.
|
// Test for incorrect padding.
|
||||||
assert base64.decode('aGk') == 'hi'
|
assert base64.decode('aGk') == 'hi'
|
||||||
|
@ -52,8 +57,7 @@ fn test_decode() {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_encode() {
|
fn test_encode() {
|
||||||
assert base64.encode('Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.')
|
assert base64.encode(man_pair.decoded) == man_pair.encoded
|
||||||
== 'TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4='
|
|
||||||
|
|
||||||
for i, p in pairs {
|
for i, p in pairs {
|
||||||
got := base64.encode(p.decoded)
|
got := base64.encode(p.decoded)
|
||||||
|
|
Loading…
Reference in New Issue