v/vlib/compress/gzip/gzip.v

// [rfc1952](https://datatracker.ietf.org/doc/html/rfc1952) compliant
// gzip compression/decompression

module gzip

import compress
import hash.crc32

// compresses an array of bytes using gzip and returns the compressed bytes in a new array
// Example: compressed := gzip.compress(b)?
pub fn compress(data []u8) ?[]u8 {
	compressed := compress.compress(data, 0)?
	// header
	mut result := [
		u8(0x1f), // magic numbers (1F 8B)
		0x8b,
		0x08, // deflate
		0x00, // header flags
		0x00, // 4-byte timestamp, 0 = no timestamp (00 00 00 00)
		0x00,
		0x00,
		0x00,
		0x00, // extra flags
		0xff, // operating system id (0xff = unknown)
	] // 10 bytes
	result << compressed
	// trailer
	checksum := crc32.sum(data)
	length := data.len
	result << [
		u8(checksum >> 24),
		u8(checksum >> 16),
		u8(checksum >> 8),
		u8(checksum),
		u8(length >> 24),
		u8(length >> 16),
		u8(length >> 8),
		u8(length),
	] // 8 bytes
	return result
}

// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
// Example: decompressed := gzip.decompress(b)?
pub fn decompress(data []u8) ?[]u8 {
	if data.len < 18 {
		return error('data is too short, not gzip compressed?')
	} else if data[0] != 0x1f || data[1] != 0x8b {
		return error('wrong magic numbers, not gzip compressed?')
	} else if data[2] != 0x08 {
		return error('gzip data is not compressed with DEFLATE')
	}
	mut header_length := 10

	// parse flags, we ignore most of them, but we still need to parse them
	// correctly, so we dont accidently decompress something that belongs
	// to the header

	if data[4] & 0b1110_0000 > 0 { // reserved bits
		// rfc 1952 2.3.1.2 Compliance
		// A compliant decompressor must give an error indication if any
		// reserved bit is non-zero, since such a bit could indicate the
		// presence of a new field that would cause subsequent data to be
		// interpreted incorrectly.
		return error('reserved flags are set, unsupported field detected')
	}

	// if data[4] & 0b0000_0001 {} // FTEXT
	if data[4] & 0b0000_0100 > 0 { // FEXTRA, extra data
		xlen := data[header_length]
		header_length += xlen + 1
	}
	if data[4] & 0b0000_1000 > 0 { // FNAME, file name
		// filename is zero-terminated, so skip until we hit a zero byte
		for header_length < data.len && data[header_length] != 0x00 {
			header_length++
		}
		header_length++
	}
	if data[4] & 0b0001_0000 > 0 { // FCOMMENT
		// comment is zero-terminated, so skip until we hit a zero byte
		for header_length < data.len && data[header_length] != 0x00 {
			header_length++
		}
		header_length++
	}
	if data[4] & 0b0000_0010 > 0 { // FHCRC, flag header crc
		if header_length + 12 > data.len {
			return error('data too short')
		}
		checksum_header := crc32.sum(data[..header_length])
		checksum_header_expected := (u32(data[header_length]) << 24) | (u32(data[header_length + 1]) << 16) | (u32(data[
			header_length + 2]) << 8) | data[header_length + 3]
		if checksum_header != checksum_header_expected {
			return error('header checksum verification failed')
		}
		header_length += 4
	}
	if header_length + 8 > data.len {
		return error('data too short')
	}

	decompressed := compress.decompress(data[header_length..data.len - 8], 0)?
	length_expected := (u32(data[data.len - 4]) << 24) | (u32(data[data.len - 3]) << 16) | (u32(data[data.len - 2]) << 8) | data[data.len - 1]
	if decompressed.len != length_expected {
		return error('length verification failed, got $decompressed.len, expected $length_expected')
	}
	checksum := crc32.sum(decompressed)
	checksum_expected := (u32(data[data.len - 8]) << 24) | (u32(data[data.len - 7]) << 16) | (u32(data[data.len - 6]) << 8) | data[data.len - 5]
	if checksum != checksum_expected {
		return error('checksum verification failed')
	}
	return decompressed
}