1 #include "GZipFormat.h"
2 #include <cstdint>
3 #include <cstring>
4 #include <string>
5
6 extern "C" {
7 #include <miniz/miniz.h>
8 }
9
10 namespace {
11 enum GZipFlags {
12 FLAG_TEXT = 1,
13 FLAG_HCRC = 2, // Header CRC included
14 FLAG_EXTRA = 4,
15 FLAG_NAME = 8,
16 FLAG_COMMENT = 16,
17
18 FLAGS_ALL = (FLAG_TEXT | FLAG_HCRC | FLAG_EXTRA | FLAG_NAME | FLAG_COMMENT),
19 };
20
21 enum GZipCompressionModes {
22 CM_DEFLATE = 8, // The only one defined in the RFC (and the one we need).
23 };
24
25 enum GZipHeaderSizes {
26 BASE_HEADER_SIZE = 10,
27 BASE_FOOTER_SIZE = 8,
28 };
29
30 // Streaming output function for tdefl_compress_mem_to_output.
PutBytesToString(const void * buf,int len,void * user)31 static mz_bool PutBytesToString(const void *buf, int len, void *user)
32 {
33 std::string *out = static_cast<std::string *>(user);
34 out->append(static_cast<const char *>(buf), len);
35 return MZ_TRUE;
36 }
37
ReadLE32(const unsigned char * data)38 static uint32_t ReadLE32(const unsigned char *data)
39 {
40 return (uint32_t(data[0]) << 0) |
41 (uint32_t(data[1]) << 8) |
42 (uint32_t(data[2]) << 16) |
43 (uint32_t(data[3]) << 24);
44 }
45
WriteLE32(unsigned char * out,uint32_t value)46 static void WriteLE32(unsigned char *out, uint32_t value)
47 {
48 out[0] = (value >> 0) & 0xffu;
49 out[1] = (value >> 8) & 0xffu;
50 out[2] = (value >> 16) & 0xffu;
51 out[3] = (value >> 24) & 0xffu;
52 }
53 } // namespace
54
IsGZipFormat(const unsigned char * data,size_t length)55 bool gzip::IsGZipFormat(const unsigned char *data, size_t length)
56 {
57 // (This assumes it's possible to provide 0 bytes of compressed data, which is probably not true).
58 if (length < BASE_HEADER_SIZE + BASE_FOOTER_SIZE) {
59 return false;
60 }
61 if (data[0] != 0x1fu || data[1] != 0x8bu) {
62 return false;
63 }
64 return true;
65 }
66
DecompressDeflateOrGZip(const unsigned char * data,size_t length)67 std::string gzip::DecompressDeflateOrGZip(const unsigned char *data, size_t length)
68 {
69 assert(data != nullptr);
70 if (gzip::IsGZipFormat(data, length)) {
71 return gzip::DecompressGZip(data, length);
72 } else {
73 // No GZip header. Assume raw DEFLATE data (which is what Pioneer used to save).
74 return gzip::DecompressRawDeflate(data, length);
75 }
76 }
77
DecompressGZip(const unsigned char * data,size_t length)78 std::string gzip::DecompressGZip(const unsigned char *data, size_t length)
79 {
80 assert(data != nullptr);
81 assert(length >= BASE_HEADER_SIZE + BASE_FOOTER_SIZE);
82
83 const unsigned char *at_header = data;
84 const unsigned char *at_footer = data + (length - BASE_FOOTER_SIZE);
85
86 // We only know about DEFLATE.
87 if (at_header[2] != CM_DEFLATE) {
88 throw gzip::DecompressionFailedException();
89 }
90
91 int gzip_flags = at_header[3];
92 // There are not supposed to be any unknown flags!
93 if (gzip_flags & ~FLAGS_ALL) {
94 throw gzip::DecompressionFailedException();
95 }
96
97 const unsigned char *at_data = at_header + BASE_HEADER_SIZE;
98 assert(at_data <= at_footer);
99 size_t data_length = length - BASE_HEADER_SIZE + BASE_FOOTER_SIZE;
100
101 if (gzip_flags & FLAG_EXTRA) {
102 if (data_length < 2) {
103 throw gzip::DecompressionFailedException();
104 }
105 size_t xlen = uint8_t(at_data[0]) | (uint8_t(at_data[1]) << 8);
106 xlen += 2; // Add the two bytes for the length itself.
107 if (data_length < xlen) {
108 throw gzip::DecompressionFailedException();
109 }
110 at_data += xlen;
111 assert(at_data <= at_footer);
112 data_length = at_footer - at_data;
113 }
114
115 if (gzip_flags & FLAG_NAME) {
116 const unsigned char *name_end = static_cast<const unsigned char *>(std::memchr(at_data, 0, data_length));
117 if (!name_end) {
118 throw gzip::DecompressionFailedException();
119 }
120 at_data = name_end + 1; // +1 to skip the null terminator.
121 assert(at_data <= at_footer);
122 data_length = at_footer - at_data;
123 }
124
125 if (gzip_flags & FLAG_COMMENT) {
126 const unsigned char *comment_end = static_cast<const unsigned char *>(std::memchr(at_data, 0, data_length));
127 if (!comment_end) {
128 throw gzip::DecompressionFailedException();
129 }
130 at_data = comment_end + 1; // +1 to skip the null terminator.
131 assert(at_data <= at_footer);
132 data_length = at_footer - at_data;
133 }
134
135 if (gzip_flags & FLAG_HCRC) {
136 if (data_length < 2) {
137 throw gzip::DecompressionFailedException();
138 }
139 uint32_t true_crc = mz_crc32(MZ_CRC32_INIT, at_header, (at_data - at_header));
140 true_crc &= 0xffffu; // Only care about the bottom 16 bits.
141 uint32_t file_crc = uint8_t(at_data[0]) | (uint8_t(at_data[1]) << 8);
142 if (true_crc != file_crc) {
143 throw gzip::DecompressionFailedException();
144 }
145 at_data += 2;
146 data_length -= 2;
147 assert(at_data <= at_footer);
148 }
149
150 std::string out;
151
152 assert(at_data + data_length == at_footer);
153 bool inflate_success = tinfl_decompress_mem_to_callback(static_cast<const void *>(at_data), &data_length, &PutBytesToString, static_cast<void *>(&out), 0);
154 if (!inflate_success) {
155 throw gzip::DecompressionFailedException();
156 }
157
158 uint32_t true_crc = mz_crc32(MZ_CRC32_INIT, reinterpret_cast<const mz_uint8 *>(out.data()), out.size());
159 uint32_t crc_from_file = ReadLE32(at_footer + 0);
160 uint32_t size_from_header = ReadLE32(at_footer + 4);
161 if (true_crc != crc_from_file) {
162 throw gzip::DecompressionFailedException();
163 }
164 if (size_from_header != static_cast<uint32_t>(out.size())) {
165 throw gzip::DecompressionFailedException();
166 }
167
168 return out;
169 }
170
DecompressRawDeflate(const unsigned char * data,size_t length)171 std::string gzip::DecompressRawDeflate(const unsigned char *data, size_t length)
172 {
173 assert(data != nullptr);
174 std::string out;
175 size_t in_size = length;
176 bool success = tinfl_decompress_mem_to_callback(static_cast<const void *>(data), &in_size, &PutBytesToString, static_cast<void *>(&out), 0);
177 if (!success) {
178 throw gzip::DecompressionFailedException();
179 }
180 return out;
181 }
182
CompressGZip(const std::string & data,const std::string & inner_file_name)183 std::string gzip::CompressGZip(const std::string &data, const std::string &inner_file_name)
184 {
185 std::string out;
186
187 // The base GZip header.
188 const unsigned char header_bytes[10] = { 31, 139, 8, FLAG_HCRC | FLAG_NAME, 0, 0, 0, 0, 0, 255 };
189 out.append(reinterpret_cast<const char *>(header_bytes), sizeof(header_bytes));
190
191 // Add inner file name, *including* null terminator (c_str() ensures that the data is null terminated).
192 out.append(inner_file_name.c_str(), inner_file_name.size() + 1);
193
194 // Add 16-bit header-CRC.
195 uint32_t header_crc = mz_crc32(MZ_CRC32_INIT, reinterpret_cast<const mz_uint8 *>(out.data()), out.size());
196 const unsigned char crc_buf[2] = {
197 static_cast<unsigned char>((header_crc >> 0) & 0xffu),
198 static_cast<unsigned char>((header_crc >> 8) & 0xffu),
199 };
200 out.append(reinterpret_cast<const char *>(crc_buf), sizeof(crc_buf));
201
202 bool success = tdefl_compress_mem_to_output(data.data(), data.size(), &PutBytesToString, static_cast<void *>(&out), TDEFL_DEFAULT_MAX_PROBES);
203 if (!success) {
204 throw gzip::CompressionFailedException();
205 }
206
207 unsigned char footer_bytes[8];
208 uint32_t data_crc = mz_crc32(MZ_CRC32_INIT, reinterpret_cast<const mz_uint8 *>(data.data()), data.size());
209 WriteLE32(footer_bytes + 0, data_crc);
210 // GZip specifies that size is written little-endian, modulo 2^32
211 // (ie, if size is really > 2^32 we just chop off the high bits).
212 WriteLE32(footer_bytes + 4, data.size());
213 out.append(reinterpret_cast<const char *>(footer_bytes), sizeof(footer_bytes));
214
215 return out;
216 }
217