1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 
3 /*  Fluent Bit
4  *  ==========
5  *  Copyright (C) 2019-2021 The Fluent Bit Authors
6  *  Copyright (C) 2015-2018 Treasure Data Inc.
7  *
8  *  Licensed under the Apache License, Version 2.0 (the "License");
9  *  you may not use this file except in compliance with the License.
10  *  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *  Unless required by applicable law or agreed to in writing, software
15  *  distributed under the License is distributed on an "AS IS" BASIS,
16  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  *  See the License for the specific language governing permissions and
18  *  limitations under the License.
19  */
20 
21 #include <fluent-bit/flb_info.h>
22 #include <fluent-bit/flb_mem.h>
23 #include <fluent-bit/flb_log.h>
24 #include <fluent-bit/flb_gzip.h>
25 #include <miniz/miniz.h>
26 
27 #define FLB_GZIP_HEADER_OFFSET 10
28 
29 typedef enum {
30     FTEXT    = 1,
31     FHCRC    = 2,
32     FEXTRA   = 4,
33     FNAME    = 8,
34     FCOMMENT = 16
35 } flb_tinf_gzip_flag;
36 
read_le16(const unsigned char * p)37 static unsigned int read_le16(const unsigned char *p)
38 {
39     return ((unsigned int) p[0]) | ((unsigned int) p[1] << 8);
40 }
41 
read_le32(const unsigned char * p)42 static unsigned int read_le32(const unsigned char *p)
43 {
44     return ((unsigned int) p[0])
45         | ((unsigned int) p[1] << 8)
46         | ((unsigned int) p[2] << 16)
47         | ((unsigned int) p[3] << 24);
48 }
49 
gzip_header(void * buf)50 static inline void gzip_header(void *buf)
51 {
52     uint8_t *p;
53 
54     /* GZip Magic bytes */
55     p = buf;
56     *p++ = 0x1F;
57     *p++ = 0x8B;
58     *p++ = 8;
59     *p++ = 0;
60     *p++ = 0;
61     *p++ = 0;
62     *p++ = 0;
63     *p++ = 0;
64     *p++ = 0;
65     *p++ = 0xFF;
66 }
67 
flb_gzip_compress(void * in_data,size_t in_len,void ** out_data,size_t * out_len)68 int flb_gzip_compress(void *in_data, size_t in_len,
69                       void **out_data, size_t *out_len)
70 {
71     int flush;
72     int status;
73     int footer_start;
74     uint8_t *pb;
75     size_t out_size;
76     void *out_buf;
77     z_stream strm;
78     mz_ulong crc;
79 
80     /*
81      * Calculating the upper bound for a gzip compression is
82      * non-trivial, so we rely on miniz's own calculation
83      * to guarantee memory safety.
84      */
85     out_size = compressBound(in_len);
86     out_buf = flb_malloc(out_size);
87 
88     if (!out_buf) {
89         flb_errno();
90         flb_error("[gzip] could not allocate outgoing buffer");
91         return -1;
92     }
93 
94     /* Initialize streaming buffer context */
95     memset(&strm, '\0', sizeof(strm));
96     strm.zalloc    = Z_NULL;
97     strm.zfree     = Z_NULL;
98     strm.opaque    = Z_NULL;
99     strm.next_in   = in_data;
100     strm.avail_in  = in_len;
101     strm.total_out = 0;
102 
103     /* Deflate mode */
104     deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
105                  Z_DEFLATED, -Z_DEFAULT_WINDOW_BITS, 9, Z_DEFAULT_STRATEGY);
106 
107     /*
108      * Miniz don't support GZip format directly, instead we will:
109      *
110      * - append manual GZip magic bytes
111      * - deflate raw content
112      * - append manual CRC32 data
113      */
114     gzip_header(out_buf);
115 
116     /* Header offset */
117     pb = (uint8_t *) out_buf + FLB_GZIP_HEADER_OFFSET;
118 
119     flush = Z_NO_FLUSH;
120     while (1) {
121         strm.next_out  = pb + strm.total_out;
122         strm.avail_out = out_size - (pb - (uint8_t *) out_buf);
123 
124         if (strm.avail_in == 0) {
125             flush = Z_FINISH;
126         }
127 
128         status = deflate(&strm, flush);
129         if (status == Z_STREAM_END) {
130             break;
131         }
132         else if (status != Z_OK) {
133             deflateEnd(&strm);
134             return -1;
135         }
136     }
137 
138     if (deflateEnd(&strm) != Z_OK) {
139         flb_free(out_buf);
140         return -1;
141     }
142     *out_len = strm.total_out;
143 
144     /* Construct the gzip checksum (CRC32 footer) */
145     footer_start = FLB_GZIP_HEADER_OFFSET + *out_len;
146     pb = (uint8_t *) out_buf + footer_start;
147 
148     crc = mz_crc32(MZ_CRC32_INIT, in_data, in_len);
149     *pb++ = crc & 0xFF;
150     *pb++ = (crc >> 8) & 0xFF;
151     *pb++ = (crc >> 16) & 0xFF;
152     *pb++ = (crc >> 24) & 0xFF;
153     *pb++ = in_len & 0xFF;
154     *pb++ = (in_len >> 8) & 0xFF;
155     *pb++ = (in_len >> 16) & 0xFF;
156     *pb++ = (in_len >> 24) & 0xFF;
157 
158     /* Set the real buffer size for the caller */
159     *out_len += FLB_GZIP_HEADER_OFFSET + 8;
160     *out_data = out_buf;
161 
162     return 0;
163 }
164 
165 /* Uncompress (inflate) GZip data */
flb_gzip_uncompress(void * in_data,size_t in_len,void ** out_data,size_t * out_len)166 int flb_gzip_uncompress(void *in_data, size_t in_len,
167                         void **out_data, size_t *out_len)
168 {
169     int status;
170     uint8_t *p;
171     void *out_buf;
172     size_t out_size = 0;
173     void *zip_data;
174     size_t zip_len;
175     unsigned char flg;
176     unsigned int xlen, hcrc;
177     unsigned int dlen, crc;
178     mz_ulong crc_out;
179     mz_stream stream;
180     const unsigned char *start;
181 
182     /* Minimal length: header + crc32 */
183     if (in_len < 18) {
184         flb_error("[gzip] unexpected content length");
185         return -1;
186     }
187 
188     /* Magic bytes */
189     p = in_data;
190     if (p[0] != 0x1F || p[1] != 0x8B) {
191         flb_error("[gzip] invalid magic bytes");
192         return -1;
193     }
194 
195     if (p[2] != 8) {
196         flb_error("[gzip] invalid method");
197         return -1;
198     }
199 
200     /* Flag byte */
201     flg = p[3];
202 
203     /* Reserved bits */
204     if (flg & 0xE0) {
205         flb_error("[gzip] invalid flag");
206         return -1;
207     }
208 
209     /* Skip base header of 10 bytes */
210     start = p + FLB_GZIP_HEADER_OFFSET;
211 
212     /* Skip extra data if present */
213     if (flg & FEXTRA) {
214         xlen = read_le16(start);
215         if (xlen > in_len - 12) {
216             flb_error("[gzip] invalid gzip data");
217             return -1;
218         }
219         start += xlen + 2;
220     }
221 
222     /* Skip file name if present */
223     if (flg & FNAME) {
224         do {
225             if (start - p >= in_len) {
226                 flb_error("[gzip] invalid gzip data (FNAME)");
227                 return -1;
228             }
229         } while (*start++);
230     }
231 
232     /* Skip file comment if present */
233     if (flg & FCOMMENT) {
234         do {
235             if (start - p >= in_len) {
236                 flb_error("[gzip] invalid gzip data (FCOMMENT)");
237                 return -1;
238             }
239         } while (*start++);
240     }
241 
242     /* Check header crc if present */
243     if (flg & FHCRC) {
244         if (start - p > in_len - 2) {
245             flb_error("[gzip] invalid gzip data (FHRC)");
246             return -1;
247         }
248 
249         hcrc = read_le16(start);
250         crc = mz_crc32(MZ_CRC32_INIT, p, start - p) & 0x0000FFFF;
251         if (hcrc != crc) {
252             flb_error("[gzip] invalid gzip header CRC");
253             return -1;
254         }
255         start += 2;
256     }
257 
258     /* Get decompressed length */
259     dlen = read_le32(&p[in_len - 4]);
260 
261     /* Limit decompressed length to 100MB */
262     if (dlen > 100000000) {
263         flb_error("[gzip] maximum decompression size is 100MB");
264         return -1;
265     }
266 
267     /* Get CRC32 checksum of original data */
268     crc = read_le32(&p[in_len - 8]);
269 
270     /* Decompress data */
271     if ((p + in_len) - p < 8) {
272         flb_error("[gzip] invalid gzip CRC32 checksum");
273         return -1;
274     }
275 
276     /* Allocate outgoing buffer */
277     out_buf = flb_malloc(dlen);
278     if (!out_buf) {
279         flb_errno();
280         return -1;
281     }
282     out_size = dlen;
283 
284     /* Ensure size is above 0 */
285     if (((p + in_len) - start - 8) <= 0) {
286         flb_free(out_buf);
287         return -1;
288     }
289 
290     /* Map zip content */
291     zip_data = (uint8_t *) start;
292     zip_len = (p + in_len) - start - 8;
293 
294     memset(&stream, 0, sizeof(stream));
295     stream.next_in = zip_data;
296     stream.avail_in = zip_len;
297     stream.next_out = out_buf;
298     stream.avail_out = out_size;
299 
300     status = mz_inflateInit2(&stream, -Z_DEFAULT_WINDOW_BITS);
301     if (status != MZ_OK) {
302         flb_free(out_buf);
303         return -1;
304     }
305 
306     status = mz_inflate(&stream, MZ_FINISH);
307     if (status != MZ_STREAM_END) {
308         mz_inflateEnd(&stream);
309         flb_free(out_buf);
310         return -1;
311     }
312 
313     if (stream.total_out != dlen) {
314         mz_inflateEnd(&stream);
315         flb_free(out_buf);
316         flb_error("[gzip] invalid gzip data size");
317         return -1;
318     }
319 
320     /* terminate the stream, it's not longer required */
321     mz_inflateEnd(&stream);
322 
323     /* Validate message CRC vs inflated data CRC */
324     crc_out = mz_crc32(MZ_CRC32_INIT, out_buf, dlen);
325     if (crc_out != crc) {
326         flb_free(out_buf);
327         flb_error("[gzip] invalid GZip checksum (CRC32)");
328         return -1;
329     }
330 
331     /* set the uncompressed data */
332     *out_len = dlen;
333     *out_data = out_buf;
334 
335     return 0;
336 }
337