1 #include "GZipFormat.h"
2 #include <cstdint>
3 #include <cstring>
4 #include <string>
5 
6 extern "C" {
7 #include <miniz/miniz.h>
8 }
9 
10 namespace {
11 	enum GZipFlags {
12 		FLAG_TEXT = 1,
13 		FLAG_HCRC = 2, // Header CRC included
14 		FLAG_EXTRA = 4,
15 		FLAG_NAME = 8,
16 		FLAG_COMMENT = 16,
17 
18 		FLAGS_ALL = (FLAG_TEXT | FLAG_HCRC | FLAG_EXTRA | FLAG_NAME | FLAG_COMMENT),
19 	};
20 
21 	enum GZipCompressionModes {
22 		CM_DEFLATE = 8, // The only one defined in the RFC (and the one we need).
23 	};
24 
25 	enum GZipHeaderSizes {
26 		BASE_HEADER_SIZE = 10,
27 		BASE_FOOTER_SIZE = 8,
28 	};
29 
30 	// Streaming output function for tdefl_compress_mem_to_output.
PutBytesToString(const void * buf,int len,void * user)31 	static mz_bool PutBytesToString(const void *buf, int len, void *user)
32 	{
33 		std::string *out = static_cast<std::string *>(user);
34 		out->append(static_cast<const char *>(buf), len);
35 		return MZ_TRUE;
36 	}
37 
ReadLE32(const unsigned char * data)38 	static uint32_t ReadLE32(const unsigned char *data)
39 	{
40 		return (uint32_t(data[0]) << 0) |
41 			(uint32_t(data[1]) << 8) |
42 			(uint32_t(data[2]) << 16) |
43 			(uint32_t(data[3]) << 24);
44 	}
45 
WriteLE32(unsigned char * out,uint32_t value)46 	static void WriteLE32(unsigned char *out, uint32_t value)
47 	{
48 		out[0] = (value >> 0) & 0xffu;
49 		out[1] = (value >> 8) & 0xffu;
50 		out[2] = (value >> 16) & 0xffu;
51 		out[3] = (value >> 24) & 0xffu;
52 	}
53 } // namespace
54 
IsGZipFormat(const unsigned char * data,size_t length)55 bool gzip::IsGZipFormat(const unsigned char *data, size_t length)
56 {
57 	// (This assumes it's possible to provide 0 bytes of compressed data, which is probably not true).
58 	if (length < BASE_HEADER_SIZE + BASE_FOOTER_SIZE) {
59 		return false;
60 	}
61 	if (data[0] != 0x1fu || data[1] != 0x8bu) {
62 		return false;
63 	}
64 	return true;
65 }
66 
DecompressDeflateOrGZip(const unsigned char * data,size_t length)67 std::string gzip::DecompressDeflateOrGZip(const unsigned char *data, size_t length)
68 {
69 	assert(data != nullptr);
70 	if (gzip::IsGZipFormat(data, length)) {
71 		return gzip::DecompressGZip(data, length);
72 	} else {
73 		// No GZip header. Assume raw DEFLATE data (which is what Pioneer used to save).
74 		return gzip::DecompressRawDeflate(data, length);
75 	}
76 }
77 
DecompressGZip(const unsigned char * data,size_t length)78 std::string gzip::DecompressGZip(const unsigned char *data, size_t length)
79 {
80 	assert(data != nullptr);
81 	assert(length >= BASE_HEADER_SIZE + BASE_FOOTER_SIZE);
82 
83 	const unsigned char *at_header = data;
84 	const unsigned char *at_footer = data + (length - BASE_FOOTER_SIZE);
85 
86 	// We only know about DEFLATE.
87 	if (at_header[2] != CM_DEFLATE) {
88 		throw gzip::DecompressionFailedException();
89 	}
90 
91 	int gzip_flags = at_header[3];
92 	// There are not supposed to be any unknown flags!
93 	if (gzip_flags & ~FLAGS_ALL) {
94 		throw gzip::DecompressionFailedException();
95 	}
96 
97 	const unsigned char *at_data = at_header + BASE_HEADER_SIZE;
98 	assert(at_data <= at_footer);
99 	size_t data_length = length - BASE_HEADER_SIZE + BASE_FOOTER_SIZE;
100 
101 	if (gzip_flags & FLAG_EXTRA) {
102 		if (data_length < 2) {
103 			throw gzip::DecompressionFailedException();
104 		}
105 		size_t xlen = uint8_t(at_data[0]) | (uint8_t(at_data[1]) << 8);
106 		xlen += 2; // Add the two bytes for the length itself.
107 		if (data_length < xlen) {
108 			throw gzip::DecompressionFailedException();
109 		}
110 		at_data += xlen;
111 		assert(at_data <= at_footer);
112 		data_length = at_footer - at_data;
113 	}
114 
115 	if (gzip_flags & FLAG_NAME) {
116 		const unsigned char *name_end = static_cast<const unsigned char *>(std::memchr(at_data, 0, data_length));
117 		if (!name_end) {
118 			throw gzip::DecompressionFailedException();
119 		}
120 		at_data = name_end + 1; // +1 to skip the null terminator.
121 		assert(at_data <= at_footer);
122 		data_length = at_footer - at_data;
123 	}
124 
125 	if (gzip_flags & FLAG_COMMENT) {
126 		const unsigned char *comment_end = static_cast<const unsigned char *>(std::memchr(at_data, 0, data_length));
127 		if (!comment_end) {
128 			throw gzip::DecompressionFailedException();
129 		}
130 		at_data = comment_end + 1; // +1 to skip the null terminator.
131 		assert(at_data <= at_footer);
132 		data_length = at_footer - at_data;
133 	}
134 
135 	if (gzip_flags & FLAG_HCRC) {
136 		if (data_length < 2) {
137 			throw gzip::DecompressionFailedException();
138 		}
139 		uint32_t true_crc = mz_crc32(MZ_CRC32_INIT, at_header, (at_data - at_header));
140 		true_crc &= 0xffffu; // Only care about the bottom 16 bits.
141 		uint32_t file_crc = uint8_t(at_data[0]) | (uint8_t(at_data[1]) << 8);
142 		if (true_crc != file_crc) {
143 			throw gzip::DecompressionFailedException();
144 		}
145 		at_data += 2;
146 		data_length -= 2;
147 		assert(at_data <= at_footer);
148 	}
149 
150 	std::string out;
151 
152 	assert(at_data + data_length == at_footer);
153 	bool inflate_success = tinfl_decompress_mem_to_callback(static_cast<const void *>(at_data), &data_length, &PutBytesToString, static_cast<void *>(&out), 0);
154 	if (!inflate_success) {
155 		throw gzip::DecompressionFailedException();
156 	}
157 
158 	uint32_t true_crc = mz_crc32(MZ_CRC32_INIT, reinterpret_cast<const mz_uint8 *>(out.data()), out.size());
159 	uint32_t crc_from_file = ReadLE32(at_footer + 0);
160 	uint32_t size_from_header = ReadLE32(at_footer + 4);
161 	if (true_crc != crc_from_file) {
162 		throw gzip::DecompressionFailedException();
163 	}
164 	if (size_from_header != static_cast<uint32_t>(out.size())) {
165 		throw gzip::DecompressionFailedException();
166 	}
167 
168 	return out;
169 }
170 
DecompressRawDeflate(const unsigned char * data,size_t length)171 std::string gzip::DecompressRawDeflate(const unsigned char *data, size_t length)
172 {
173 	assert(data != nullptr);
174 	std::string out;
175 	size_t in_size = length;
176 	bool success = tinfl_decompress_mem_to_callback(static_cast<const void *>(data), &in_size, &PutBytesToString, static_cast<void *>(&out), 0);
177 	if (!success) {
178 		throw gzip::DecompressionFailedException();
179 	}
180 	return out;
181 }
182 
CompressGZip(const std::string & data,const std::string & inner_file_name)183 std::string gzip::CompressGZip(const std::string &data, const std::string &inner_file_name)
184 {
185 	std::string out;
186 
187 	// The base GZip header.
188 	const unsigned char header_bytes[10] = { 31, 139, 8, FLAG_HCRC | FLAG_NAME, 0, 0, 0, 0, 0, 255 };
189 	out.append(reinterpret_cast<const char *>(header_bytes), sizeof(header_bytes));
190 
191 	// Add inner file name, *including* null terminator (c_str() ensures that the data is null terminated).
192 	out.append(inner_file_name.c_str(), inner_file_name.size() + 1);
193 
194 	// Add 16-bit header-CRC.
195 	uint32_t header_crc = mz_crc32(MZ_CRC32_INIT, reinterpret_cast<const mz_uint8 *>(out.data()), out.size());
196 	const unsigned char crc_buf[2] = {
197 		static_cast<unsigned char>((header_crc >> 0) & 0xffu),
198 		static_cast<unsigned char>((header_crc >> 8) & 0xffu),
199 	};
200 	out.append(reinterpret_cast<const char *>(crc_buf), sizeof(crc_buf));
201 
202 	bool success = tdefl_compress_mem_to_output(data.data(), data.size(), &PutBytesToString, static_cast<void *>(&out), TDEFL_DEFAULT_MAX_PROBES);
203 	if (!success) {
204 		throw gzip::CompressionFailedException();
205 	}
206 
207 	unsigned char footer_bytes[8];
208 	uint32_t data_crc = mz_crc32(MZ_CRC32_INIT, reinterpret_cast<const mz_uint8 *>(data.data()), data.size());
209 	WriteLE32(footer_bytes + 0, data_crc);
210 	// GZip specifies that size is written little-endian, modulo 2^32
211 	// (ie, if size is really > 2^32 we just chop off the high bits).
212 	WriteLE32(footer_bytes + 4, data.size());
213 	out.append(reinterpret_cast<const char *>(footer_bytes), sizeof(footer_bytes));
214 
215 	return out;
216 }
217