compress: add a new module `compress.gzip` too (#14686)
parent
7b25957a26
commit
3a90d8ef14
|
@ -1,4 +1,8 @@
|
||||||
## Description:
|
## Description:
|
||||||
|
|
||||||
`compress` is a namespace for (multiple) compression algorithms supported by V.
|
`compress` is a namespace for (multiple) compression algorithms supported by V.
|
||||||
At the moment, only `compress.zlib` and `compress.deflate` are implemented.
|
|
||||||
|
At the moment, the following compression algorithms are implemented:
|
||||||
|
- `compress.deflate`
|
||||||
|
- `compress.gzip`
|
||||||
|
- `compress.zlib`
|
||||||
|
|
|
@ -9,7 +9,7 @@ fn C.tdefl_compress_mem_to_heap(source_buf voidptr, source_buf_len usize, out_le
|
||||||
fn C.tinfl_decompress_mem_to_heap(source_buf voidptr, source_buf_len usize, out_len &usize, flags int) voidptr
|
fn C.tinfl_decompress_mem_to_heap(source_buf voidptr, source_buf_len usize, out_len &usize, flags int) voidptr
|
||||||
|
|
||||||
// compresses an array of bytes based on providing flags and returns the compressed bytes in a new array
|
// compresses an array of bytes based on providing flags and returns the compressed bytes in a new array
|
||||||
// see `gzip.compress([]u8)` and `zlib.compress([]u8)` for default implementations.
|
// NB: this is a low level api, a high level implementation like zlib/gzip should be preferred
|
||||||
[manualfree]
|
[manualfree]
|
||||||
pub fn compress(data []u8, flags int) ?[]u8 {
|
pub fn compress(data []u8, flags int) ?[]u8 {
|
||||||
if u64(data.len) > compress.max_size {
|
if u64(data.len) > compress.max_size {
|
||||||
|
@ -28,7 +28,7 @@ pub fn compress(data []u8, flags int) ?[]u8 {
|
||||||
}
|
}
|
||||||
|
|
||||||
// decompresses an array of bytes based on providing flags and returns the decompressed bytes in a new array
|
// decompresses an array of bytes based on providing flags and returns the decompressed bytes in a new array
|
||||||
// see `gzip.decompress([]u8)` and `zlib.decompress([]u8)` for default implementations.
|
// NB: this is a low level api, a high level implementation like zlib/gzip should be preferred
|
||||||
[manualfree]
|
[manualfree]
|
||||||
pub fn decompress(data []u8, flags int) ?[]u8 {
|
pub fn decompress(data []u8, flags int) ?[]u8 {
|
||||||
mut out_len := usize(0)
|
mut out_len := usize(0)
|
||||||
|
|
|
@ -3,9 +3,6 @@
|
||||||
`compress.deflate` is a module that assists in the compression and
|
`compress.deflate` is a module that assists in the compression and
|
||||||
decompression of binary data using `deflate` compression
|
decompression of binary data using `deflate` compression
|
||||||
|
|
||||||
NOTE: To decompress gzip, discard first 10 bytes of compressed bytes
|
|
||||||
then use `compress.deflate.decompress`. (Header validation won't be
|
|
||||||
performed in this case)
|
|
||||||
|
|
||||||
## Examples:
|
## Examples:
|
||||||
|
|
||||||
|
|
|
@ -2,15 +2,14 @@ module deflate
|
||||||
|
|
||||||
import compress
|
import compress
|
||||||
|
|
||||||
// compresses an array of bytes using gzip and returns the compressed bytes in a new array
|
// compresses an array of bytes using deflate and returns the compressed bytes in a new array
|
||||||
// Example: compressed := gzip.compress(b)?
|
// Example: compressed := deflate.compress(b)?
|
||||||
pub fn compress(data []u8) ?[]u8 {
|
pub fn compress(data []u8) ?[]u8 {
|
||||||
return compress.compress(data, 0)
|
return compress.compress(data, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
|
// decompresses an array of bytes using deflate and returns the decompressed bytes in a new array
|
||||||
// Example: decompressed := zlib.decompress(b)?
|
// Example: decompressed := deflate.decompress(b)?
|
||||||
[manualfree]
|
|
||||||
pub fn decompress(data []u8) ?[]u8 {
|
pub fn decompress(data []u8) ?[]u8 {
|
||||||
return compress.decompress(data, 0)
|
return compress.decompress(data, 0)
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
## Description:
|
||||||
|
|
||||||
|
`compress.gzip` is a module that assists in the compression and
|
||||||
|
decompression of binary data using `gzip` compression
|
||||||
|
|
||||||
|
|
||||||
|
## Examples:
|
||||||
|
|
||||||
|
```v
|
||||||
|
import compress.gzip
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
compressed := gzip.compress(uncompressed.bytes())?
|
||||||
|
decompressed := gzip.decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
```
|
|
@ -0,0 +1,114 @@
|
||||||
|
// [rfc1952](https://datatracker.ietf.org/doc/html/rfc1952) compliant
|
||||||
|
// gzip compression/decompression
|
||||||
|
|
||||||
|
module gzip
|
||||||
|
|
||||||
|
import compress
|
||||||
|
import hash.crc32
|
||||||
|
|
||||||
|
// compresses an array of bytes using gzip and returns the compressed bytes in a new array
|
||||||
|
// Example: compressed := gzip.compress(b)?
|
||||||
|
pub fn compress(data []u8) ?[]u8 {
|
||||||
|
compressed := compress.compress(data, 0)?
|
||||||
|
// header
|
||||||
|
mut result := [
|
||||||
|
u8(0x1f), // magic numbers (1F 8B)
|
||||||
|
0x8b,
|
||||||
|
0x08, // deflate
|
||||||
|
0x00, // header flags
|
||||||
|
0x00, // 4-byte timestamp, 0 = no timestamp (00 00 00 00)
|
||||||
|
0x00,
|
||||||
|
0x00,
|
||||||
|
0x00,
|
||||||
|
0x00, // extra flags
|
||||||
|
0xff, // operating system id (0xff = unknown)
|
||||||
|
] // 10 bytes
|
||||||
|
result << compressed
|
||||||
|
// trailer
|
||||||
|
checksum := crc32.sum(data)
|
||||||
|
length := data.len
|
||||||
|
result << [
|
||||||
|
u8(checksum >> 24),
|
||||||
|
u8(checksum >> 16),
|
||||||
|
u8(checksum >> 8),
|
||||||
|
u8(checksum),
|
||||||
|
u8(length >> 24),
|
||||||
|
u8(length >> 16),
|
||||||
|
u8(length >> 8),
|
||||||
|
u8(length),
|
||||||
|
] // 8 bytes
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
|
||||||
|
// Example: decompressed := gzip.decompress(b)?
|
||||||
|
pub fn decompress(data []u8) ?[]u8 {
|
||||||
|
if data.len < 18 {
|
||||||
|
return error('data is too short, not gzip compressed?')
|
||||||
|
} else if data[0] != 0x1f || data[1] != 0x8b {
|
||||||
|
return error('wrong magic numbers, not gzip compressed?')
|
||||||
|
} else if data[2] != 0x08 {
|
||||||
|
return error('gzip data is not compressed with DEFLATE')
|
||||||
|
}
|
||||||
|
mut header_length := 10
|
||||||
|
|
||||||
|
// parse flags, we ignore most of them, but we still need to parse them
|
||||||
|
// correctly, so we dont accidently decompress something that belongs
|
||||||
|
// to the header
|
||||||
|
|
||||||
|
if data[4] & 0b1110_0000 > 0 { // reserved bits
|
||||||
|
// rfc 1952 2.3.1.2 Compliance
|
||||||
|
// A compliant decompressor must give an error indication if any
|
||||||
|
// reserved bit is non-zero, since such a bit could indicate the
|
||||||
|
// presence of a new field that would cause subsequent data to be
|
||||||
|
// interpreted incorrectly.
|
||||||
|
return error('reserved flags are set, unsupported field detected')
|
||||||
|
}
|
||||||
|
|
||||||
|
// if data[4] & 0b0000_0001 {} // FTEXT
|
||||||
|
if data[4] & 0b0000_0100 > 0 { // FEXTRA, extra data
|
||||||
|
xlen := data[header_length]
|
||||||
|
header_length += xlen + 1
|
||||||
|
}
|
||||||
|
if data[4] & 0b0000_1000 > 0 { // FNAME, file name
|
||||||
|
// filename is zero-terminated, so skip until we hit a zero byte
|
||||||
|
for header_length < data.len && data[header_length] != 0x00 {
|
||||||
|
header_length++
|
||||||
|
}
|
||||||
|
header_length++
|
||||||
|
}
|
||||||
|
if data[4] & 0b0001_0000 > 0 { // FCOMMENT
|
||||||
|
// comment is zero-terminated, so skip until we hit a zero byte
|
||||||
|
for header_length < data.len && data[header_length] != 0x00 {
|
||||||
|
header_length++
|
||||||
|
}
|
||||||
|
header_length++
|
||||||
|
}
|
||||||
|
if data[4] & 0b0000_0010 > 0 { // FHCRC, flag header crc
|
||||||
|
if header_length + 12 > data.len {
|
||||||
|
return error('data too short')
|
||||||
|
}
|
||||||
|
checksum_header := crc32.sum(data[..header_length])
|
||||||
|
checksum_header_expected := (u32(data[header_length]) << 24) | (u32(data[header_length + 1]) << 16) | (u32(data[
|
||||||
|
header_length + 2]) << 8) | data[header_length + 3]
|
||||||
|
if checksum_header != checksum_header_expected {
|
||||||
|
return error('header checksum verification failed')
|
||||||
|
}
|
||||||
|
header_length += 4
|
||||||
|
}
|
||||||
|
if header_length + 8 > data.len {
|
||||||
|
return error('data too short')
|
||||||
|
}
|
||||||
|
|
||||||
|
decompressed := compress.decompress(data[header_length..data.len - 8], 0)?
|
||||||
|
length_expected := (u32(data[data.len - 4]) << 24) | (u32(data[data.len - 3]) << 16) | (u32(data[data.len - 2]) << 8) | data[data.len - 1]
|
||||||
|
if decompressed.len != length_expected {
|
||||||
|
return error('length verification failed, got $decompressed.len, expected $length_expected')
|
||||||
|
}
|
||||||
|
checksum := crc32.sum(decompressed)
|
||||||
|
checksum_expected := (u32(data[data.len - 8]) << 24) | (u32(data[data.len - 7]) << 16) | (u32(data[data.len - 6]) << 8) | data[data.len - 5]
|
||||||
|
if checksum != checksum_expected {
|
||||||
|
return error('checksum verification failed')
|
||||||
|
}
|
||||||
|
return decompressed
|
||||||
|
}
|
|
@ -0,0 +1,134 @@
|
||||||
|
module gzip
|
||||||
|
|
||||||
|
import hash.crc32
|
||||||
|
|
||||||
|
fn test_gzip() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
compressed := compress(uncompressed.bytes())?
|
||||||
|
decompressed := decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_decompress_error(data []u8, reason string) ? {
|
||||||
|
decompress(data) or {
|
||||||
|
assert err.msg() == reason
|
||||||
|
return
|
||||||
|
}
|
||||||
|
return error('did not error')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_invalid_too_short() ? {
|
||||||
|
assert_decompress_error([]u8{}, 'data is too short, not gzip compressed?')?
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_invalid_magic_numbers() ? {
|
||||||
|
assert_decompress_error([]u8{len: 100}, 'wrong magic numbers, not gzip compressed?')?
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_invalid_compression() ? {
|
||||||
|
mut data := []u8{len: 100}
|
||||||
|
data[0] = 0x1f
|
||||||
|
data[1] = 0x8b
|
||||||
|
assert_decompress_error(data, 'gzip data is not compressed with DEFLATE')?
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_ftext() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b0000_0001 // FTEXT
|
||||||
|
decompressed := decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_fname() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b0000_1000
|
||||||
|
compressed.insert(10, `h`)
|
||||||
|
compressed.insert(11, `i`)
|
||||||
|
compressed.insert(12, 0x00)
|
||||||
|
decompressed := decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_fcomment() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b0001_0000
|
||||||
|
compressed.insert(10, `h`)
|
||||||
|
compressed.insert(11, `i`)
|
||||||
|
compressed.insert(12, 0x00)
|
||||||
|
decompressed := decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_fname_fcomment() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b0001_1000
|
||||||
|
compressed.insert(10, `h`)
|
||||||
|
compressed.insert(11, `i`)
|
||||||
|
compressed.insert(12, 0x00)
|
||||||
|
compressed.insert(10, `h`)
|
||||||
|
compressed.insert(11, `i`)
|
||||||
|
compressed.insert(12, 0x00)
|
||||||
|
decompressed := decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_fextra() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b0000_0100
|
||||||
|
compressed.insert(10, 2)
|
||||||
|
compressed.insert(11, `h`)
|
||||||
|
compressed.insert(12, `i`)
|
||||||
|
decompressed := decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_hcrc() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b0000_0010
|
||||||
|
checksum := crc32.sum(compressed[..10])
|
||||||
|
compressed.insert(10, u8(checksum >> 24))
|
||||||
|
compressed.insert(11, u8(checksum >> 16))
|
||||||
|
compressed.insert(12, u8(checksum >> 8))
|
||||||
|
compressed.insert(13, u8(checksum))
|
||||||
|
decompressed := decompress(compressed)?
|
||||||
|
assert decompressed == uncompressed.bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_invalid_hcrc() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b0000_0010
|
||||||
|
checksum := crc32.sum(compressed[..10])
|
||||||
|
compressed.insert(10, u8(checksum >> 24))
|
||||||
|
compressed.insert(11, u8(checksum >> 16))
|
||||||
|
compressed.insert(12, u8(checksum >> 8))
|
||||||
|
compressed.insert(13, u8(checksum + 1))
|
||||||
|
assert_decompress_error(compressed, 'header checksum verification failed')?
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_invalid_checksum() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[compressed.len - 5] += 1
|
||||||
|
assert_decompress_error(compressed, 'checksum verification failed')?
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_invalid_length() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[compressed.len - 1] += 1
|
||||||
|
assert_decompress_error(compressed, 'length verification failed, got 12, expected 13')?
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_gzip_with_invalid_flags() ? {
|
||||||
|
uncompressed := 'Hello world!'
|
||||||
|
mut compressed := compress(uncompressed.bytes())?
|
||||||
|
compressed[4] |= 0b1000_0000
|
||||||
|
assert_decompress_error(compressed, 'reserved flags are set, unsupported field detected')?
|
||||||
|
}
|
|
@ -4,7 +4,6 @@ import compress
|
||||||
|
|
||||||
// compresses an array of bytes using zlib and returns the compressed bytes in a new array
|
// compresses an array of bytes using zlib and returns the compressed bytes in a new array
|
||||||
// Example: compressed := zlib.compress(b)?
|
// Example: compressed := zlib.compress(b)?
|
||||||
[manualfree]
|
|
||||||
pub fn compress(data []u8) ?[]u8 {
|
pub fn compress(data []u8) ?[]u8 {
|
||||||
// flags = TDEFL_WRITE_ZLIB_HEADER (0x01000)
|
// flags = TDEFL_WRITE_ZLIB_HEADER (0x01000)
|
||||||
return compress.compress(data, 0x01000)
|
return compress.compress(data, 0x01000)
|
||||||
|
@ -12,7 +11,6 @@ pub fn compress(data []u8) ?[]u8 {
|
||||||
|
|
||||||
// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
|
// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
|
||||||
// Example: decompressed := zlib.decompress(b)?
|
// Example: decompressed := zlib.decompress(b)?
|
||||||
[manualfree]
|
|
||||||
pub fn decompress(data []u8) ?[]u8 {
|
pub fn decompress(data []u8) ?[]u8 {
|
||||||
// flags = TINFL_FLAG_PARSE_ZLIB_HEADER (0x1)
|
// flags = TINFL_FLAG_PARSE_ZLIB_HEADER (0x1)
|
||||||
return compress.decompress(data, 0x1)
|
return compress.decompress(data, 0x1)
|
||||||
|
|
Loading…
Reference in New Issue