v/vlib/compress/gzip/gzip.v

// [rfc1952](https://datatracker.ietf.org/doc/html/rfc1952) compliant
// gzip compression/decompression

module gzip

import compress
import hash.crc32

// compresses an array of bytes using gzip and returns the compressed bytes in a new array
// Example: compressed := gzip.compress(b)?
pub fn compress(data []u8) ?[]u8 {
	compressed := compress.compress(data, 0)?
	// header
	mut result := [
		u8(0x1f), // magic numbers (1F 8B)
		0x8b,
		0x08, // deflate
		0x00, // header flags
		0x00, // 4-byte timestamp, 0 = no timestamp (00 00 00 00)
		0x00,
		0x00,
		0x00,
		0x00, // extra flags
		0xff, // operating system id (0xff = unknown)
	] // 10 bytes
	result << compressed
	// trailer
	checksum := crc32.sum(data)
	length := data.len
	result << [
		u8(checksum >> 24),
		u8(checksum >> 16),
		u8(checksum >> 8),
		u8(checksum),
		u8(length >> 24),
		u8(length >> 16),
		u8(length >> 8),
		u8(length),
	] // 8 bytes
	return result
}

// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
// Example: decompressed := gzip.decompress(b)?
pub fn decompress(data []u8) ?[]u8 {
	if data.len < 18 {
		return error('data is too short, not gzip compressed?')
	} else if data[0] != 0x1f || data[1] != 0x8b {
		return error('wrong magic numbers, not gzip compressed?')
	} else if data[2] != 0x08 {
		return error('gzip data is not compressed with DEFLATE')
	}
	mut header_length := 10

	// parse flags, we ignore most of them, but we still need to parse them
	// correctly, so we dont accidently decompress something that belongs
	// to the header

	if data[4] & 0b1110_0000 > 0 { // reserved bits
		// rfc 1952 2.3.1.2 Compliance
		// A compliant decompressor must give an error indication if any
		// reserved bit is non-zero, since such a bit could indicate the
		// presence of a new field that would cause subsequent data to be
		// interpreted incorrectly.
		return error('reserved flags are set, unsupported field detected')
	}

	// if data[4] & 0b0000_0001 {} // FTEXT
	if data[4] & 0b0000_0100 > 0 { // FEXTRA, extra data
		xlen := data[header_length]
		header_length += xlen + 1
	}
	if data[4] & 0b0000_1000 > 0 { // FNAME, file name
		// filename is zero-terminated, so skip until we hit a zero byte
		for header_length < data.len && data[header_length] != 0x00 {
			header_length++
		}
		header_length++
	}
	if data[4] & 0b0001_0000 > 0 { // FCOMMENT
		// comment is zero-terminated, so skip until we hit a zero byte
		for header_length < data.len && data[header_length] != 0x00 {
			header_length++
		}
		header_length++
	}
	if data[4] & 0b0000_0010 > 0 { // FHCRC, flag header crc
		if header_length + 12 > data.len {
			return error('data too short')
		}
		checksum_header := crc32.sum(data[..header_length])
		checksum_header_expected := (u32(data[header_length]) << 24) | (u32(data[header_length + 1]) << 16) | (u32(data[
			header_length + 2]) << 8) | data[header_length + 3]
		if checksum_header != checksum_header_expected {
			return error('header checksum verification failed')
		}
		header_length += 4
	}
	if header_length + 8 > data.len {
		return error('data too short')
	}

	decompressed := compress.decompress(data[header_length..data.len - 8], 0)?
	length_expected := (u32(data[data.len - 4]) << 24) | (u32(data[data.len - 3]) << 16) | (u32(data[data.len - 2]) << 8) | data[data.len - 1]
	if decompressed.len != length_expected {
		return error('length verification failed, got $decompressed.len, expected $length_expected')
	}
	checksum := crc32.sum(decompressed)
	checksum_expected := (u32(data[data.len - 8]) << 24) | (u32(data[data.len - 7]) << 16) | (u32(data[data.len - 6]) << 8) | data[data.len - 5]
	if checksum != checksum_expected {
		return error('checksum verification failed')
	}
	return decompressed
}
compress: add a new module `compress.gzip` too (#14686) 2022-06-05 17:53:45 +02:00			`// [rfc1952](https://datatracker.ietf.org/doc/html/rfc1952) compliant`
			`// gzip compression/decompression`

			`module gzip`

			`import compress`
			`import hash.crc32`

			`// compresses an array of bytes using gzip and returns the compressed bytes in a new array`
			`// Example: compressed := gzip.compress(b)?`
			`pub fn compress(data []u8) ?[]u8 {`
			`compressed := compress.compress(data, 0)?`
			`// header`
			`mut result := [`
			`u8(0x1f), // magic numbers (1F 8B)`
			`0x8b,`
			`0x08, // deflate`
			`0x00, // header flags`
			`0x00, // 4-byte timestamp, 0 = no timestamp (00 00 00 00)`
			`0x00,`
			`0x00,`
			`0x00,`
			`0x00, // extra flags`
			`0xff, // operating system id (0xff = unknown)`
			`] // 10 bytes`
			`result << compressed`
			`// trailer`
			`checksum := crc32.sum(data)`
			`length := data.len`
			`result << [`
			`u8(checksum >> 24),`
			`u8(checksum >> 16),`
			`u8(checksum >> 8),`
			`u8(checksum),`
			`u8(length >> 24),`
			`u8(length >> 16),`
			`u8(length >> 8),`
			`u8(length),`
			`] // 8 bytes`
			`return result`
			`}`

			`// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array`
			`// Example: decompressed := gzip.decompress(b)?`
			`pub fn decompress(data []u8) ?[]u8 {`
			`if data.len < 18 {`
			`return error('data is too short, not gzip compressed?')`
			`} else if data[0] != 0x1f \|\| data[1] != 0x8b {`
			`return error('wrong magic numbers, not gzip compressed?')`
			`} else if data[2] != 0x08 {`
			`return error('gzip data is not compressed with DEFLATE')`
			`}`
			`mut header_length := 10`

			`// parse flags, we ignore most of them, but we still need to parse them`
			`// correctly, so we dont accidently decompress something that belongs`
			`// to the header`

			`if data[4] & 0b1110_0000 > 0 { // reserved bits`
			`// rfc 1952 2.3.1.2 Compliance`
			`// A compliant decompressor must give an error indication if any`
			`// reserved bit is non-zero, since such a bit could indicate the`
			`// presence of a new field that would cause subsequent data to be`
			`// interpreted incorrectly.`
			`return error('reserved flags are set, unsupported field detected')`
			`}`

			`// if data[4] & 0b0000_0001 {} // FTEXT`
			`if data[4] & 0b0000_0100 > 0 { // FEXTRA, extra data`
			`xlen := data[header_length]`
			`header_length += xlen + 1`
			`}`
			`if data[4] & 0b0000_1000 > 0 { // FNAME, file name`
			`// filename is zero-terminated, so skip until we hit a zero byte`
			`for header_length < data.len && data[header_length] != 0x00 {`
			`header_length++`
			`}`
			`header_length++`
			`}`
			`if data[4] & 0b0001_0000 > 0 { // FCOMMENT`
			`// comment is zero-terminated, so skip until we hit a zero byte`
			`for header_length < data.len && data[header_length] != 0x00 {`
			`header_length++`
			`}`
			`header_length++`
			`}`
			`if data[4] & 0b0000_0010 > 0 { // FHCRC, flag header crc`
			`if header_length + 12 > data.len {`
			`return error('data too short')`
			`}`
			`checksum_header := crc32.sum(data[..header_length])`
			`checksum_header_expected := (u32(data[header_length]) << 24) \| (u32(data[header_length + 1]) << 16) \| (u32(data[`
			`header_length + 2]) << 8) \| data[header_length + 3]`
			`if checksum_header != checksum_header_expected {`
			`return error('header checksum verification failed')`
			`}`
			`header_length += 4`
			`}`
			`if header_length + 8 > data.len {`
			`return error('data too short')`
			`}`

			`decompressed := compress.decompress(data[header_length..data.len - 8], 0)?`
			`length_expected := (u32(data[data.len - 4]) << 24) \| (u32(data[data.len - 3]) << 16) \| (u32(data[data.len - 2]) << 8) \| data[data.len - 1]`
			`if decompressed.len != length_expected {`
			`return error('length verification failed, got $decompressed.len, expected $length_expected')`
			`}`
			`checksum := crc32.sum(decompressed)`
			`checksum_expected := (u32(data[data.len - 8]) << 24) \| (u32(data[data.len - 7]) << 16) \| (u32(data[data.len - 6]) << 8) \| data[data.len - 5]`
			`if checksum != checksum_expected {`
			`return error('checksum verification failed')`
			`}`
			`return decompressed`
			`}`