1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to you under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  * https://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14  * implied.  See the License for the specific language governing
15  * permissions and limitations under the License.
16  */
17 
18 #include <string.h>
19 #ifdef SNAPPY_CODEC
20 #include <snappy-c.h>
21 #  if defined(__APPLE__)
22 #    include <libkern/OSByteOrder.h>
23 #    define __bswap_32 OSSwapInt32
24 #  elif defined(__FreeBSD__)
25 #    include <sys/endian.h>
26 #    define __bswap_32 bswap32
27 #  elif defined(_WIN32)
28 #    include <stdlib.h>
29 #    define __bswap_32 _byteswap_ulong
30 #  else
31 #    include <byteswap.h>
32 #  endif
33 #endif
34 #ifdef DEFLATE_CODEC
35 #include <zlib.h>
36 #endif
37 #ifdef LZMA_CODEC
38 #include <lzma.h>
39 #endif
40 #include "avro/errors.h"
41 #include "avro/allocation.h"
42 #include "codec.h"
43 
44 #define DEFAULT_BLOCK_SIZE	(16 * 1024)
45 
46 /* NULL codec */
47 
48 static int
codec_null(avro_codec_t codec)49 codec_null(avro_codec_t codec)
50 {
51 	codec->name = "null";
52 	codec->type = AVRO_CODEC_NULL;
53 	codec->block_size = 0;
54 	codec->used_size = 0;
55 	codec->block_data = NULL;
56 	codec->codec_data = NULL;
57 
58 	return 0;
59 }
60 
encode_null(avro_codec_t c,void * data,int64_t len)61 static int encode_null(avro_codec_t c, void * data, int64_t len)
62 {
63 	c->block_data = data;
64 	c->block_size = len;
65 	c->used_size = len;
66 
67 	return 0;
68 }
69 
decode_null(avro_codec_t c,void * data,int64_t len)70 static int decode_null(avro_codec_t c, void * data, int64_t len)
71 {
72 	c->block_data = data;
73 	c->block_size = len;
74 	c->used_size = len;
75 
76 	return 0;
77 }
78 
reset_null(avro_codec_t c)79 static int reset_null(avro_codec_t c)
80 {
81 	c->block_data = NULL;
82 	c->block_size = 0;
83 	c->used_size = 0;
84 	c->codec_data = NULL;
85 
86 	return 0;
87 }
88 
89 /* Snappy codec */
90 
91 #ifdef SNAPPY_CODEC
92 
93 static int
codec_snappy(avro_codec_t codec)94 codec_snappy(avro_codec_t codec)
95 {
96 	codec->name = "snappy";
97 	codec->type = AVRO_CODEC_SNAPPY;
98 	codec->block_size = 0;
99 	codec->used_size = 0;
100 	codec->block_data = NULL;
101 	codec->codec_data = NULL;
102 
103 	return 0;
104 }
105 
encode_snappy(avro_codec_t c,void * data,int64_t len)106 static int encode_snappy(avro_codec_t c, void * data, int64_t len)
107 {
108         uint32_t crc;
109         size_t outlen = snappy_max_compressed_length(len);
110 
111 	if (!c->block_data) {
112 		c->block_data = avro_malloc(outlen+4);
113 		c->block_size = outlen+4;
114 	} else if (c->block_size < (int64_t) (outlen+4)) {
115             c->block_data = avro_realloc(c->block_data, c->block_size, (outlen+4));
116 		c->block_size = outlen+4;
117 	}
118 
119 	if (!c->block_data) {
120 		avro_set_error("Cannot allocate memory for snappy");
121 		return 1;
122 	}
123 
124         if (snappy_compress((const char *)data, len, (char*)c->block_data, &outlen) != SNAPPY_OK)
125         {
126                 avro_set_error("Error compressing block with Snappy");
127 		return 1;
128 	}
129 
130         crc = __bswap_32(crc32(0, (const Bytef *)data, len));
131         memcpy((char*)c->block_data+outlen, &crc, 4);
132         c->used_size = outlen+4;
133 
134 	return 0;
135 }
136 
decode_snappy(avro_codec_t c,void * data,int64_t len)137 static int decode_snappy(avro_codec_t c, void * data, int64_t len)
138 {
139         uint32_t crc;
140         size_t outlen;
141 
142         if (snappy_uncompressed_length((const char*)data, len-4, &outlen) != SNAPPY_OK) {
143 		avro_set_error("Uncompressed length error in snappy");
144 		return 1;
145         }
146 
147 	if (!c->block_data) {
148 		c->block_data = avro_malloc(outlen);
149 		c->block_size = outlen;
150 	} else if ( (size_t)c->block_size < outlen) {
151 		c->block_data = avro_realloc(c->block_data, c->block_size, outlen);
152 		c->block_size = outlen;
153 	}
154 
155 	if (!c->block_data)
156 	{
157 		avro_set_error("Cannot allocate memory for snappy");
158 		return 1;
159 	}
160 
161         if (snappy_uncompress((const char*)data, len-4, (char*)c->block_data, &outlen) != SNAPPY_OK)
162         {
163                 avro_set_error("Error uncompressing block with Snappy");
164 		return 1;
165 	}
166 
167         crc = __bswap_32(crc32(0, (const Bytef *)c->block_data, outlen));
168         if (memcmp(&crc, (char*)data+len-4, 4))
169         {
170                 avro_set_error("CRC32 check failure uncompressing block with Snappy");
171 		return 1;
172 	}
173 
174         c->used_size = outlen;
175 
176 	return 0;
177 }
178 
reset_snappy(avro_codec_t c)179 static int reset_snappy(avro_codec_t c)
180 {
181 	if (c->block_data) {
182 		avro_free(c->block_data, c->block_size);
183 	}
184 
185 	c->block_data = NULL;
186 	c->block_size = 0;
187 	c->used_size = 0;
188 	c->codec_data = NULL;
189 
190 	return 0;
191 }
192 
193 #endif // SNAPPY_CODEC
194 
195 /* Deflate codec */
196 
197 #ifdef DEFLATE_CODEC
198 
199 struct codec_data_deflate {
200 	z_stream deflate;
201 	z_stream inflate;
202 };
203 #define codec_data_deflate_stream(cd)	&((struct codec_data_deflate *)cd)->deflate
204 #define codec_data_inflate_stream(cd)	&((struct codec_data_deflate *)cd)->inflate
205 
206 
207 static int
codec_deflate(avro_codec_t codec)208 codec_deflate(avro_codec_t codec)
209 {
210 	codec->name = "deflate";
211 	codec->type = AVRO_CODEC_DEFLATE;
212 	codec->block_size = 0;
213 	codec->used_size = 0;
214 	codec->block_data = NULL;
215 	codec->codec_data = avro_new(struct codec_data_deflate);
216 
217 	if (!codec->codec_data) {
218 		avro_set_error("Cannot allocate memory for zlib");
219 		return 1;
220 	}
221 
222 	z_stream *ds = codec_data_deflate_stream(codec->codec_data);
223 	z_stream *is = codec_data_inflate_stream(codec->codec_data);
224 
225 	memset(ds, 0, sizeof(z_stream));
226 	memset(is, 0, sizeof(z_stream));
227 
228 	ds->zalloc = is->zalloc = Z_NULL;
229 	ds->zfree  = is->zfree  = Z_NULL;
230 	ds->opaque = is->opaque = Z_NULL;
231 
232 	if (deflateInit2(ds, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
233 		avro_freet(struct codec_data_deflate, codec->codec_data);
234 		codec->codec_data = NULL;
235 		avro_set_error("Cannot initialize zlib deflate");
236 		return 1;
237 	}
238 
239 	if (inflateInit2(is, -15) != Z_OK) {
240 		avro_freet(struct codec_data_deflate, codec->codec_data);
241 		codec->codec_data = NULL;
242 		avro_set_error("Cannot initialize zlib inflate");
243 		return 1;
244 	}
245 
246 	return 0;
247 }
248 
encode_deflate(avro_codec_t c,void * data,int64_t len)249 static int encode_deflate(avro_codec_t c, void * data, int64_t len)
250 {
251 	int err;
252 	int64_t defl_len = compressBound((uLong)len * 1.2);
253 
254 	if (!c->block_data) {
255 		c->block_data = avro_malloc(defl_len);
256 		c->block_size = defl_len;
257 	} else if ( c->block_size < defl_len) {
258 		c->block_data = avro_realloc(c->block_data, c->block_size, defl_len);
259 		c->block_size = defl_len;
260 	}
261 
262 	if (!c->block_data)
263 	{
264 		avro_set_error("Cannot allocate memory for deflate");
265 		return 1;
266 	}
267 
268 	c->used_size = 0;
269 
270 	z_stream *s = codec_data_deflate_stream(c->codec_data);
271 
272 	s->next_in = (Bytef*)data;
273 	s->avail_in = (uInt)len;
274 
275 	s->next_out = c->block_data;
276 	s->avail_out = (uInt)c->block_size;
277 
278 	s->total_out = 0;
279 
280 	err = deflate(s, Z_FINISH);
281 	if (err != Z_STREAM_END) {
282 		deflateEnd(s);
283 		if (err != Z_OK) {
284 			avro_set_error("Error compressing block with deflate (%i)", err);
285 			return 1;
286 		}
287 		return 0;
288 	}
289 
290 	// zlib resizes the buffer?
291 	c->block_size = s->total_out;
292 	c->used_size = s->total_out;
293 
294 	if (deflateReset(s) != Z_OK) {
295 		return 1;
296 	}
297 
298 	return 0;
299 }
300 
decode_deflate(avro_codec_t c,void * data,int64_t len)301 static int decode_deflate(avro_codec_t c, void * data, int64_t len)
302 {
303 	int err;
304 	z_stream *s = codec_data_inflate_stream(c->codec_data);
305 
306 	if (!c->block_data) {
307 		c->block_data = avro_malloc(DEFAULT_BLOCK_SIZE);
308 		c->block_size = DEFAULT_BLOCK_SIZE;
309 	}
310 
311 	if (!c->block_data)
312 	{
313 		avro_set_error("Cannot allocate memory for deflate");
314 		return 1;
315 	}
316 
317 	c->used_size = 0;
318 
319 	s->next_in = data;
320 	s->avail_in = len;
321 
322 	s->next_out = c->block_data;
323 	s->avail_out = c->block_size;
324 
325 	s->total_out = 0;
326 
327 	do
328 	{
329 		err = inflate(s, Z_FINISH);
330 
331 		// Apparently if there is yet available space in the output then something
332 		// has gone wrong in decompressing the data (according to cpython zlibmodule.c)
333 		if (err == Z_BUF_ERROR && s->avail_out > 0) {
334 			inflateEnd(s);
335 			avro_set_error("Error decompressing block with deflate, possible data error");
336 			return 1;
337 		}
338 
339 		// The buffer was not big enough. resize it.
340 		if (err == Z_BUF_ERROR)
341 		{
342 			c->block_data = avro_realloc(c->block_data, c->block_size, c->block_size * 2);
343 			s->next_out = c->block_data + s->total_out;
344 			s->avail_out += c->block_size;
345 			c->block_size = c->block_size * 2;
346 		}
347 	} while (err == Z_BUF_ERROR);
348 
349 	if (err != Z_STREAM_END) {
350 		inflateEnd(s);
351 		if (err != Z_OK) {
352 			avro_set_error("Error decompressing block with deflate (%i)", err);
353 			return 1;
354 		}
355 		return 0;
356 	}
357 
358 	c->used_size = s->total_out;
359 
360 	if (inflateReset(s) != Z_OK) {
361 		avro_set_error("Error resetting deflate decompression");
362 		return 1;
363 	}
364 
365 	return 0;
366 }
367 
reset_deflate(avro_codec_t c)368 static int reset_deflate(avro_codec_t c)
369 {
370 	if (c->block_data) {
371 		avro_free(c->block_data, c->block_size);
372 	}
373 	if (c->codec_data) {
374 		deflateEnd(codec_data_deflate_stream(c->codec_data));
375 		inflateEnd(codec_data_inflate_stream(c->codec_data));
376 		avro_freet(struct codec_data_deflate, c->codec_data);
377 	}
378 
379 	c->block_data = NULL;
380 	c->block_size = 0;
381 	c->used_size = 0;
382 	c->codec_data = NULL;
383 
384 	return 0;
385 }
386 
387 #endif // DEFLATE_CODEC
388 
389 /* LZMA codec */
390 
391 #ifdef LZMA_CODEC
392 
393 struct codec_data_lzma {
394 	lzma_filter filters[2];
395 	lzma_options_lzma options;
396 };
397 #define codec_data_lzma_filters(cd)	((struct codec_data_lzma *)cd)->filters
398 #define codec_data_lzma_options(cd)	&((struct codec_data_lzma *)cd)->options
399 
400 static int
codec_lzma(avro_codec_t codec)401 codec_lzma(avro_codec_t codec)
402 {
403 	codec->name = "lzma";
404 	codec->type = AVRO_CODEC_LZMA;
405 	codec->block_size = 0;
406 	codec->used_size = 0;
407 	codec->block_data = NULL;
408 	codec->codec_data = avro_new(struct codec_data_lzma);
409 
410 	if (!codec->codec_data) {
411 		avro_set_error("Cannot allocate memory for lzma");
412 		return 1;
413 	}
414 
415 	lzma_options_lzma* opt = codec_data_lzma_options(codec->codec_data);
416 	lzma_lzma_preset(opt, LZMA_PRESET_DEFAULT);
417 
418 	lzma_filter* filters = codec_data_lzma_filters(codec->codec_data);
419 	filters[0].id = LZMA_FILTER_LZMA2;
420 	filters[0].options = opt;
421 	filters[1].id = LZMA_VLI_UNKNOWN;
422 	filters[1].options = NULL;
423 
424 	return 0;
425 }
426 
encode_lzma(avro_codec_t codec,void * data,int64_t len)427 static int encode_lzma(avro_codec_t codec, void * data, int64_t len)
428 {
429 	lzma_ret ret;
430 	size_t written = 0;
431 	lzma_filter* filters = codec_data_lzma_filters(codec->codec_data);
432 
433 	int64_t buff_len = len + lzma_raw_encoder_memusage(filters);
434 
435 	if (!codec->block_data) {
436 		codec->block_data = avro_malloc(buff_len);
437 		codec->block_size = buff_len;
438 	}
439 
440 	if (!codec->block_data)
441 	{
442 		avro_set_error("Cannot allocate memory for lzma encoder");
443 		return 1;
444 	}
445 
446 	ret = lzma_raw_buffer_encode(filters, NULL, data, len, codec->block_data, &written, codec->block_size);
447 
448 	codec->used_size = written;
449 
450 	if (ret != LZMA_OK) {
451 		avro_set_error("Error in lzma encoder");
452 		return 1;
453 	}
454 
455 	return 0;
456 }
457 
decode_lzma(avro_codec_t codec,void * data,int64_t len)458 static int decode_lzma(avro_codec_t codec, void * data, int64_t len)
459 {
460 	size_t read_pos = 0;
461 	size_t write_pos = 0;
462 	lzma_ret ret;
463 	lzma_filter* filters = codec_data_lzma_filters(codec->codec_data);
464 
465 	if (!codec->block_data) {
466 		codec->block_data = avro_malloc(DEFAULT_BLOCK_SIZE);
467 		codec->block_size = DEFAULT_BLOCK_SIZE;
468 	}
469 
470 	if (!codec->block_data) {
471 		avro_set_error("Cannot allocate memory for lzma decoder");
472 		return 1;
473 	}
474 
475 	do
476 	{
477 		ret = lzma_raw_buffer_decode(filters, NULL, data,
478 			&read_pos, len, codec->block_data, &write_pos,
479 			codec->block_size);
480 
481 		codec->used_size = write_pos;
482 
483 		// If it ran out of space to decode, give it more!!
484 		// It will continue where it left off because of read_pos and write_pos.
485 		if (ret == LZMA_BUF_ERROR) {
486 			codec->block_data = avro_realloc(codec->block_data, codec->block_size, codec->block_size * 2);
487 			codec->block_size = codec->block_size * 2;
488 		}
489 
490 	} while (ret == LZMA_BUF_ERROR);
491 
492 	if (ret != LZMA_OK) {
493 		avro_set_error("Error in lzma decoder");
494 		return 1;
495 	}
496 
497 	return 0;
498 }
499 
reset_lzma(avro_codec_t c)500 static int reset_lzma(avro_codec_t c)
501 {
502 	if (c->block_data) {
503 		avro_free(c->block_data, c->block_size);
504 	}
505 	if (c->codec_data) {
506 		avro_freet(struct codec_data_lzma, c->codec_data);
507 	}
508 
509 	c->block_data = NULL;
510 	c->block_size = 0;
511 	c->used_size = 0;
512 	c->codec_data = NULL;
513 
514 	return 0;
515 }
516 
517 #endif // LZMA_CODEC
518 
519 /* Common interface */
520 
avro_codec(avro_codec_t codec,const char * type)521 int avro_codec(avro_codec_t codec, const char *type)
522 {
523 	if (type == NULL) {
524 		return codec_null(codec);
525 	}
526 
527 #ifdef SNAPPY_CODEC
528 	if (strcmp("snappy", type) == 0) {
529 		return codec_snappy(codec);
530 	}
531 #endif
532 
533 #ifdef DEFLATE_CODEC
534 	if (strcmp("deflate", type) == 0) {
535 		return codec_deflate(codec);
536 	}
537 #endif
538 
539 #ifdef LZMA_CODEC
540 	if (strcmp("lzma", type) == 0) {
541 		return codec_lzma(codec);
542 	}
543 #endif
544 
545 	if (strcmp("null", type) == 0) {
546 		return codec_null(codec);
547 	}
548 
549 	avro_set_error("Unknown codec %s", type);
550 	return 1;
551 }
552 
avro_codec_encode(avro_codec_t c,void * data,int64_t len)553 int avro_codec_encode(avro_codec_t c, void * data, int64_t len)
554 {
555 	switch(c->type)
556 	{
557 	case AVRO_CODEC_NULL:
558 		return encode_null(c, data, len);
559 #ifdef SNAPPY_CODEC
560 	case AVRO_CODEC_SNAPPY:
561 		return encode_snappy(c, data, len);
562 #endif
563 #ifdef DEFLATE_CODEC
564 	case AVRO_CODEC_DEFLATE:
565 		return encode_deflate(c, data, len);
566 #endif
567 #ifdef LZMA_CODEC
568 	case AVRO_CODEC_LZMA:
569 		return encode_lzma(c, data, len);
570 #endif
571 	default:
572 		return 1;
573 	}
574 }
575 
avro_codec_decode(avro_codec_t c,void * data,int64_t len)576 int avro_codec_decode(avro_codec_t c, void * data, int64_t len)
577 {
578 	switch(c->type)
579 	{
580 	case AVRO_CODEC_NULL:
581 		return decode_null(c, data, len);
582 #ifdef SNAPPY_CODEC
583 	case AVRO_CODEC_SNAPPY:
584 		return decode_snappy(c, data, len);
585 #endif
586 #ifdef DEFLATE_CODEC
587 	case AVRO_CODEC_DEFLATE:
588 		return decode_deflate(c, data, len);
589 #endif
590 #ifdef LZMA_CODEC
591 	case AVRO_CODEC_LZMA:
592 		return decode_lzma(c, data, len);
593 #endif
594 	default:
595 		return 1;
596 	}
597 }
598 
avro_codec_reset(avro_codec_t c)599 int avro_codec_reset(avro_codec_t c)
600 {
601 	switch(c->type)
602 	{
603 	case AVRO_CODEC_NULL:
604 		return reset_null(c);
605 #ifdef SNAPPY_CODEC
606 	case AVRO_CODEC_SNAPPY:
607 		return reset_snappy(c);
608 #endif
609 #ifdef DEFLATE_CODEC
610 	case AVRO_CODEC_DEFLATE:
611 		return reset_deflate(c);
612 #endif
613 #ifdef LZMA_CODEC
614 	case AVRO_CODEC_LZMA:
615 		return reset_lzma(c);
616 #endif
617 	default:
618 		return 1;
619 	}
620 }
621