1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3 /* Fluent Bit
4 * ==========
5 * Copyright (C) 2019-2021 The Fluent Bit Authors
6 * Copyright (C) 2015-2018 Treasure Data Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 #include <fluent-bit/flb_info.h>
22 #include <fluent-bit/flb_mem.h>
23 #include <fluent-bit/flb_log.h>
24 #include <fluent-bit/flb_gzip.h>
25 #include <miniz/miniz.h>
26
27 #define FLB_GZIP_HEADER_OFFSET 10
28
29 typedef enum {
30 FTEXT = 1,
31 FHCRC = 2,
32 FEXTRA = 4,
33 FNAME = 8,
34 FCOMMENT = 16
35 } flb_tinf_gzip_flag;
36
read_le16(const unsigned char * p)37 static unsigned int read_le16(const unsigned char *p)
38 {
39 return ((unsigned int) p[0]) | ((unsigned int) p[1] << 8);
40 }
41
read_le32(const unsigned char * p)42 static unsigned int read_le32(const unsigned char *p)
43 {
44 return ((unsigned int) p[0])
45 | ((unsigned int) p[1] << 8)
46 | ((unsigned int) p[2] << 16)
47 | ((unsigned int) p[3] << 24);
48 }
49
gzip_header(void * buf)50 static inline void gzip_header(void *buf)
51 {
52 uint8_t *p;
53
54 /* GZip Magic bytes */
55 p = buf;
56 *p++ = 0x1F;
57 *p++ = 0x8B;
58 *p++ = 8;
59 *p++ = 0;
60 *p++ = 0;
61 *p++ = 0;
62 *p++ = 0;
63 *p++ = 0;
64 *p++ = 0;
65 *p++ = 0xFF;
66 }
67
flb_gzip_compress(void * in_data,size_t in_len,void ** out_data,size_t * out_len)68 int flb_gzip_compress(void *in_data, size_t in_len,
69 void **out_data, size_t *out_len)
70 {
71 int flush;
72 int status;
73 int footer_start;
74 uint8_t *pb;
75 size_t out_size;
76 void *out_buf;
77 z_stream strm;
78 mz_ulong crc;
79
80 /*
81 * Calculating the upper bound for a gzip compression is
82 * non-trivial, so we rely on miniz's own calculation
83 * to guarantee memory safety.
84 */
85 out_size = compressBound(in_len);
86 out_buf = flb_malloc(out_size);
87
88 if (!out_buf) {
89 flb_errno();
90 flb_error("[gzip] could not allocate outgoing buffer");
91 return -1;
92 }
93
94 /* Initialize streaming buffer context */
95 memset(&strm, '\0', sizeof(strm));
96 strm.zalloc = Z_NULL;
97 strm.zfree = Z_NULL;
98 strm.opaque = Z_NULL;
99 strm.next_in = in_data;
100 strm.avail_in = in_len;
101 strm.total_out = 0;
102
103 /* Deflate mode */
104 deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
105 Z_DEFLATED, -Z_DEFAULT_WINDOW_BITS, 9, Z_DEFAULT_STRATEGY);
106
107 /*
108 * Miniz don't support GZip format directly, instead we will:
109 *
110 * - append manual GZip magic bytes
111 * - deflate raw content
112 * - append manual CRC32 data
113 */
114 gzip_header(out_buf);
115
116 /* Header offset */
117 pb = (uint8_t *) out_buf + FLB_GZIP_HEADER_OFFSET;
118
119 flush = Z_NO_FLUSH;
120 while (1) {
121 strm.next_out = pb + strm.total_out;
122 strm.avail_out = out_size - (pb - (uint8_t *) out_buf);
123
124 if (strm.avail_in == 0) {
125 flush = Z_FINISH;
126 }
127
128 status = deflate(&strm, flush);
129 if (status == Z_STREAM_END) {
130 break;
131 }
132 else if (status != Z_OK) {
133 deflateEnd(&strm);
134 return -1;
135 }
136 }
137
138 if (deflateEnd(&strm) != Z_OK) {
139 flb_free(out_buf);
140 return -1;
141 }
142 *out_len = strm.total_out;
143
144 /* Construct the gzip checksum (CRC32 footer) */
145 footer_start = FLB_GZIP_HEADER_OFFSET + *out_len;
146 pb = (uint8_t *) out_buf + footer_start;
147
148 crc = mz_crc32(MZ_CRC32_INIT, in_data, in_len);
149 *pb++ = crc & 0xFF;
150 *pb++ = (crc >> 8) & 0xFF;
151 *pb++ = (crc >> 16) & 0xFF;
152 *pb++ = (crc >> 24) & 0xFF;
153 *pb++ = in_len & 0xFF;
154 *pb++ = (in_len >> 8) & 0xFF;
155 *pb++ = (in_len >> 16) & 0xFF;
156 *pb++ = (in_len >> 24) & 0xFF;
157
158 /* Set the real buffer size for the caller */
159 *out_len += FLB_GZIP_HEADER_OFFSET + 8;
160 *out_data = out_buf;
161
162 return 0;
163 }
164
165 /* Uncompress (inflate) GZip data */
flb_gzip_uncompress(void * in_data,size_t in_len,void ** out_data,size_t * out_len)166 int flb_gzip_uncompress(void *in_data, size_t in_len,
167 void **out_data, size_t *out_len)
168 {
169 int status;
170 uint8_t *p;
171 void *out_buf;
172 size_t out_size = 0;
173 void *zip_data;
174 size_t zip_len;
175 unsigned char flg;
176 unsigned int xlen, hcrc;
177 unsigned int dlen, crc;
178 mz_ulong crc_out;
179 mz_stream stream;
180 const unsigned char *start;
181
182 /* Minimal length: header + crc32 */
183 if (in_len < 18) {
184 flb_error("[gzip] unexpected content length");
185 return -1;
186 }
187
188 /* Magic bytes */
189 p = in_data;
190 if (p[0] != 0x1F || p[1] != 0x8B) {
191 flb_error("[gzip] invalid magic bytes");
192 return -1;
193 }
194
195 if (p[2] != 8) {
196 flb_error("[gzip] invalid method");
197 return -1;
198 }
199
200 /* Flag byte */
201 flg = p[3];
202
203 /* Reserved bits */
204 if (flg & 0xE0) {
205 flb_error("[gzip] invalid flag");
206 return -1;
207 }
208
209 /* Skip base header of 10 bytes */
210 start = p + FLB_GZIP_HEADER_OFFSET;
211
212 /* Skip extra data if present */
213 if (flg & FEXTRA) {
214 xlen = read_le16(start);
215 if (xlen > in_len - 12) {
216 flb_error("[gzip] invalid gzip data");
217 return -1;
218 }
219 start += xlen + 2;
220 }
221
222 /* Skip file name if present */
223 if (flg & FNAME) {
224 do {
225 if (start - p >= in_len) {
226 flb_error("[gzip] invalid gzip data (FNAME)");
227 return -1;
228 }
229 } while (*start++);
230 }
231
232 /* Skip file comment if present */
233 if (flg & FCOMMENT) {
234 do {
235 if (start - p >= in_len) {
236 flb_error("[gzip] invalid gzip data (FCOMMENT)");
237 return -1;
238 }
239 } while (*start++);
240 }
241
242 /* Check header crc if present */
243 if (flg & FHCRC) {
244 if (start - p > in_len - 2) {
245 flb_error("[gzip] invalid gzip data (FHRC)");
246 return -1;
247 }
248
249 hcrc = read_le16(start);
250 crc = mz_crc32(MZ_CRC32_INIT, p, start - p) & 0x0000FFFF;
251 if (hcrc != crc) {
252 flb_error("[gzip] invalid gzip header CRC");
253 return -1;
254 }
255 start += 2;
256 }
257
258 /* Get decompressed length */
259 dlen = read_le32(&p[in_len - 4]);
260
261 /* Limit decompressed length to 100MB */
262 if (dlen > 100000000) {
263 flb_error("[gzip] maximum decompression size is 100MB");
264 return -1;
265 }
266
267 /* Get CRC32 checksum of original data */
268 crc = read_le32(&p[in_len - 8]);
269
270 /* Decompress data */
271 if ((p + in_len) - p < 8) {
272 flb_error("[gzip] invalid gzip CRC32 checksum");
273 return -1;
274 }
275
276 /* Allocate outgoing buffer */
277 out_buf = flb_malloc(dlen);
278 if (!out_buf) {
279 flb_errno();
280 return -1;
281 }
282 out_size = dlen;
283
284 /* Ensure size is above 0 */
285 if (((p + in_len) - start - 8) <= 0) {
286 flb_free(out_buf);
287 return -1;
288 }
289
290 /* Map zip content */
291 zip_data = (uint8_t *) start;
292 zip_len = (p + in_len) - start - 8;
293
294 memset(&stream, 0, sizeof(stream));
295 stream.next_in = zip_data;
296 stream.avail_in = zip_len;
297 stream.next_out = out_buf;
298 stream.avail_out = out_size;
299
300 status = mz_inflateInit2(&stream, -Z_DEFAULT_WINDOW_BITS);
301 if (status != MZ_OK) {
302 flb_free(out_buf);
303 return -1;
304 }
305
306 status = mz_inflate(&stream, MZ_FINISH);
307 if (status != MZ_STREAM_END) {
308 mz_inflateEnd(&stream);
309 flb_free(out_buf);
310 return -1;
311 }
312
313 if (stream.total_out != dlen) {
314 mz_inflateEnd(&stream);
315 flb_free(out_buf);
316 flb_error("[gzip] invalid gzip data size");
317 return -1;
318 }
319
320 /* terminate the stream, it's not longer required */
321 mz_inflateEnd(&stream);
322
323 /* Validate message CRC vs inflated data CRC */
324 crc_out = mz_crc32(MZ_CRC32_INIT, out_buf, dlen);
325 if (crc_out != crc) {
326 flb_free(out_buf);
327 flb_error("[gzip] invalid GZip checksum (CRC32)");
328 return -1;
329 }
330
331 /* set the uncompressed data */
332 *out_len = dlen;
333 *out_data = out_buf;
334
335 return 0;
336 }
337