1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to you under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14 * implied. See the License for the specific language governing
15 * permissions and limitations under the License.
16 */
17
18 #include <string.h>
19 #ifdef SNAPPY_CODEC
20 #include <snappy-c.h>
21 # if defined(__APPLE__)
22 # include <libkern/OSByteOrder.h>
23 # define __bswap_32 OSSwapInt32
24 # elif defined(__FreeBSD__)
25 # include <sys/endian.h>
26 # define __bswap_32 bswap32
27 # elif defined(_WIN32)
28 # include <stdlib.h>
29 # define __bswap_32 _byteswap_ulong
30 # else
31 # include <byteswap.h>
32 # endif
33 #endif
34 #ifdef DEFLATE_CODEC
35 #include <zlib.h>
36 #endif
37 #ifdef LZMA_CODEC
38 #include <lzma.h>
39 #endif
40 #include "avro/errors.h"
41 #include "avro/allocation.h"
42 #include "codec.h"
43
44 #define DEFAULT_BLOCK_SIZE (16 * 1024)
45
46 /* NULL codec */
47
48 static int
codec_null(avro_codec_t codec)49 codec_null(avro_codec_t codec)
50 {
51 codec->name = "null";
52 codec->type = AVRO_CODEC_NULL;
53 codec->block_size = 0;
54 codec->used_size = 0;
55 codec->block_data = NULL;
56 codec->codec_data = NULL;
57
58 return 0;
59 }
60
encode_null(avro_codec_t c,void * data,int64_t len)61 static int encode_null(avro_codec_t c, void * data, int64_t len)
62 {
63 c->block_data = data;
64 c->block_size = len;
65 c->used_size = len;
66
67 return 0;
68 }
69
decode_null(avro_codec_t c,void * data,int64_t len)70 static int decode_null(avro_codec_t c, void * data, int64_t len)
71 {
72 c->block_data = data;
73 c->block_size = len;
74 c->used_size = len;
75
76 return 0;
77 }
78
reset_null(avro_codec_t c)79 static int reset_null(avro_codec_t c)
80 {
81 c->block_data = NULL;
82 c->block_size = 0;
83 c->used_size = 0;
84 c->codec_data = NULL;
85
86 return 0;
87 }
88
89 /* Snappy codec */
90
91 #ifdef SNAPPY_CODEC
92
93 static int
codec_snappy(avro_codec_t codec)94 codec_snappy(avro_codec_t codec)
95 {
96 codec->name = "snappy";
97 codec->type = AVRO_CODEC_SNAPPY;
98 codec->block_size = 0;
99 codec->used_size = 0;
100 codec->block_data = NULL;
101 codec->codec_data = NULL;
102
103 return 0;
104 }
105
encode_snappy(avro_codec_t c,void * data,int64_t len)106 static int encode_snappy(avro_codec_t c, void * data, int64_t len)
107 {
108 uint32_t crc;
109 size_t outlen = snappy_max_compressed_length(len);
110
111 if (!c->block_data) {
112 c->block_data = avro_malloc(outlen+4);
113 c->block_size = outlen+4;
114 } else if (c->block_size < (int64_t) (outlen+4)) {
115 c->block_data = avro_realloc(c->block_data, c->block_size, (outlen+4));
116 c->block_size = outlen+4;
117 }
118
119 if (!c->block_data) {
120 avro_set_error("Cannot allocate memory for snappy");
121 return 1;
122 }
123
124 if (snappy_compress((const char *)data, len, (char*)c->block_data, &outlen) != SNAPPY_OK)
125 {
126 avro_set_error("Error compressing block with Snappy");
127 return 1;
128 }
129
130 crc = __bswap_32(crc32(0, (const Bytef *)data, len));
131 memcpy((char*)c->block_data+outlen, &crc, 4);
132 c->used_size = outlen+4;
133
134 return 0;
135 }
136
decode_snappy(avro_codec_t c,void * data,int64_t len)137 static int decode_snappy(avro_codec_t c, void * data, int64_t len)
138 {
139 uint32_t crc;
140 size_t outlen;
141
142 if (snappy_uncompressed_length((const char*)data, len-4, &outlen) != SNAPPY_OK) {
143 avro_set_error("Uncompressed length error in snappy");
144 return 1;
145 }
146
147 if (!c->block_data) {
148 c->block_data = avro_malloc(outlen);
149 c->block_size = outlen;
150 } else if ( (size_t)c->block_size < outlen) {
151 c->block_data = avro_realloc(c->block_data, c->block_size, outlen);
152 c->block_size = outlen;
153 }
154
155 if (!c->block_data)
156 {
157 avro_set_error("Cannot allocate memory for snappy");
158 return 1;
159 }
160
161 if (snappy_uncompress((const char*)data, len-4, (char*)c->block_data, &outlen) != SNAPPY_OK)
162 {
163 avro_set_error("Error uncompressing block with Snappy");
164 return 1;
165 }
166
167 crc = __bswap_32(crc32(0, (const Bytef *)c->block_data, outlen));
168 if (memcmp(&crc, (char*)data+len-4, 4))
169 {
170 avro_set_error("CRC32 check failure uncompressing block with Snappy");
171 return 1;
172 }
173
174 c->used_size = outlen;
175
176 return 0;
177 }
178
reset_snappy(avro_codec_t c)179 static int reset_snappy(avro_codec_t c)
180 {
181 if (c->block_data) {
182 avro_free(c->block_data, c->block_size);
183 }
184
185 c->block_data = NULL;
186 c->block_size = 0;
187 c->used_size = 0;
188 c->codec_data = NULL;
189
190 return 0;
191 }
192
193 #endif // SNAPPY_CODEC
194
195 /* Deflate codec */
196
197 #ifdef DEFLATE_CODEC
198
199 struct codec_data_deflate {
200 z_stream deflate;
201 z_stream inflate;
202 };
203 #define codec_data_deflate_stream(cd) &((struct codec_data_deflate *)cd)->deflate
204 #define codec_data_inflate_stream(cd) &((struct codec_data_deflate *)cd)->inflate
205
206
207 static int
codec_deflate(avro_codec_t codec)208 codec_deflate(avro_codec_t codec)
209 {
210 codec->name = "deflate";
211 codec->type = AVRO_CODEC_DEFLATE;
212 codec->block_size = 0;
213 codec->used_size = 0;
214 codec->block_data = NULL;
215 codec->codec_data = avro_new(struct codec_data_deflate);
216
217 if (!codec->codec_data) {
218 avro_set_error("Cannot allocate memory for zlib");
219 return 1;
220 }
221
222 z_stream *ds = codec_data_deflate_stream(codec->codec_data);
223 z_stream *is = codec_data_inflate_stream(codec->codec_data);
224
225 memset(ds, 0, sizeof(z_stream));
226 memset(is, 0, sizeof(z_stream));
227
228 ds->zalloc = is->zalloc = Z_NULL;
229 ds->zfree = is->zfree = Z_NULL;
230 ds->opaque = is->opaque = Z_NULL;
231
232 if (deflateInit2(ds, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
233 avro_freet(struct codec_data_deflate, codec->codec_data);
234 codec->codec_data = NULL;
235 avro_set_error("Cannot initialize zlib deflate");
236 return 1;
237 }
238
239 if (inflateInit2(is, -15) != Z_OK) {
240 avro_freet(struct codec_data_deflate, codec->codec_data);
241 codec->codec_data = NULL;
242 avro_set_error("Cannot initialize zlib inflate");
243 return 1;
244 }
245
246 return 0;
247 }
248
encode_deflate(avro_codec_t c,void * data,int64_t len)249 static int encode_deflate(avro_codec_t c, void * data, int64_t len)
250 {
251 int err;
252 int64_t defl_len = compressBound((uLong)len * 1.2);
253
254 if (!c->block_data) {
255 c->block_data = avro_malloc(defl_len);
256 c->block_size = defl_len;
257 } else if ( c->block_size < defl_len) {
258 c->block_data = avro_realloc(c->block_data, c->block_size, defl_len);
259 c->block_size = defl_len;
260 }
261
262 if (!c->block_data)
263 {
264 avro_set_error("Cannot allocate memory for deflate");
265 return 1;
266 }
267
268 c->used_size = 0;
269
270 z_stream *s = codec_data_deflate_stream(c->codec_data);
271
272 s->next_in = (Bytef*)data;
273 s->avail_in = (uInt)len;
274
275 s->next_out = c->block_data;
276 s->avail_out = (uInt)c->block_size;
277
278 s->total_out = 0;
279
280 err = deflate(s, Z_FINISH);
281 if (err != Z_STREAM_END) {
282 deflateEnd(s);
283 if (err != Z_OK) {
284 avro_set_error("Error compressing block with deflate (%i)", err);
285 return 1;
286 }
287 return 0;
288 }
289
290 // zlib resizes the buffer?
291 c->block_size = s->total_out;
292 c->used_size = s->total_out;
293
294 if (deflateReset(s) != Z_OK) {
295 return 1;
296 }
297
298 return 0;
299 }
300
decode_deflate(avro_codec_t c,void * data,int64_t len)301 static int decode_deflate(avro_codec_t c, void * data, int64_t len)
302 {
303 int err;
304 z_stream *s = codec_data_inflate_stream(c->codec_data);
305
306 if (!c->block_data) {
307 c->block_data = avro_malloc(DEFAULT_BLOCK_SIZE);
308 c->block_size = DEFAULT_BLOCK_SIZE;
309 }
310
311 if (!c->block_data)
312 {
313 avro_set_error("Cannot allocate memory for deflate");
314 return 1;
315 }
316
317 c->used_size = 0;
318
319 s->next_in = data;
320 s->avail_in = len;
321
322 s->next_out = c->block_data;
323 s->avail_out = c->block_size;
324
325 s->total_out = 0;
326
327 do
328 {
329 err = inflate(s, Z_FINISH);
330
331 // Apparently if there is yet available space in the output then something
332 // has gone wrong in decompressing the data (according to cpython zlibmodule.c)
333 if (err == Z_BUF_ERROR && s->avail_out > 0) {
334 inflateEnd(s);
335 avro_set_error("Error decompressing block with deflate, possible data error");
336 return 1;
337 }
338
339 // The buffer was not big enough. resize it.
340 if (err == Z_BUF_ERROR)
341 {
342 c->block_data = avro_realloc(c->block_data, c->block_size, c->block_size * 2);
343 s->next_out = c->block_data + s->total_out;
344 s->avail_out += c->block_size;
345 c->block_size = c->block_size * 2;
346 }
347 } while (err == Z_BUF_ERROR);
348
349 if (err != Z_STREAM_END) {
350 inflateEnd(s);
351 if (err != Z_OK) {
352 avro_set_error("Error decompressing block with deflate (%i)", err);
353 return 1;
354 }
355 return 0;
356 }
357
358 c->used_size = s->total_out;
359
360 if (inflateReset(s) != Z_OK) {
361 avro_set_error("Error resetting deflate decompression");
362 return 1;
363 }
364
365 return 0;
366 }
367
reset_deflate(avro_codec_t c)368 static int reset_deflate(avro_codec_t c)
369 {
370 if (c->block_data) {
371 avro_free(c->block_data, c->block_size);
372 }
373 if (c->codec_data) {
374 deflateEnd(codec_data_deflate_stream(c->codec_data));
375 inflateEnd(codec_data_inflate_stream(c->codec_data));
376 avro_freet(struct codec_data_deflate, c->codec_data);
377 }
378
379 c->block_data = NULL;
380 c->block_size = 0;
381 c->used_size = 0;
382 c->codec_data = NULL;
383
384 return 0;
385 }
386
387 #endif // DEFLATE_CODEC
388
389 /* LZMA codec */
390
391 #ifdef LZMA_CODEC
392
393 struct codec_data_lzma {
394 lzma_filter filters[2];
395 lzma_options_lzma options;
396 };
397 #define codec_data_lzma_filters(cd) ((struct codec_data_lzma *)cd)->filters
398 #define codec_data_lzma_options(cd) &((struct codec_data_lzma *)cd)->options
399
400 static int
codec_lzma(avro_codec_t codec)401 codec_lzma(avro_codec_t codec)
402 {
403 codec->name = "lzma";
404 codec->type = AVRO_CODEC_LZMA;
405 codec->block_size = 0;
406 codec->used_size = 0;
407 codec->block_data = NULL;
408 codec->codec_data = avro_new(struct codec_data_lzma);
409
410 if (!codec->codec_data) {
411 avro_set_error("Cannot allocate memory for lzma");
412 return 1;
413 }
414
415 lzma_options_lzma* opt = codec_data_lzma_options(codec->codec_data);
416 lzma_lzma_preset(opt, LZMA_PRESET_DEFAULT);
417
418 lzma_filter* filters = codec_data_lzma_filters(codec->codec_data);
419 filters[0].id = LZMA_FILTER_LZMA2;
420 filters[0].options = opt;
421 filters[1].id = LZMA_VLI_UNKNOWN;
422 filters[1].options = NULL;
423
424 return 0;
425 }
426
encode_lzma(avro_codec_t codec,void * data,int64_t len)427 static int encode_lzma(avro_codec_t codec, void * data, int64_t len)
428 {
429 lzma_ret ret;
430 size_t written = 0;
431 lzma_filter* filters = codec_data_lzma_filters(codec->codec_data);
432
433 int64_t buff_len = len + lzma_raw_encoder_memusage(filters);
434
435 if (!codec->block_data) {
436 codec->block_data = avro_malloc(buff_len);
437 codec->block_size = buff_len;
438 }
439
440 if (!codec->block_data)
441 {
442 avro_set_error("Cannot allocate memory for lzma encoder");
443 return 1;
444 }
445
446 ret = lzma_raw_buffer_encode(filters, NULL, data, len, codec->block_data, &written, codec->block_size);
447
448 codec->used_size = written;
449
450 if (ret != LZMA_OK) {
451 avro_set_error("Error in lzma encoder");
452 return 1;
453 }
454
455 return 0;
456 }
457
decode_lzma(avro_codec_t codec,void * data,int64_t len)458 static int decode_lzma(avro_codec_t codec, void * data, int64_t len)
459 {
460 size_t read_pos = 0;
461 size_t write_pos = 0;
462 lzma_ret ret;
463 lzma_filter* filters = codec_data_lzma_filters(codec->codec_data);
464
465 if (!codec->block_data) {
466 codec->block_data = avro_malloc(DEFAULT_BLOCK_SIZE);
467 codec->block_size = DEFAULT_BLOCK_SIZE;
468 }
469
470 if (!codec->block_data) {
471 avro_set_error("Cannot allocate memory for lzma decoder");
472 return 1;
473 }
474
475 do
476 {
477 ret = lzma_raw_buffer_decode(filters, NULL, data,
478 &read_pos, len, codec->block_data, &write_pos,
479 codec->block_size);
480
481 codec->used_size = write_pos;
482
483 // If it ran out of space to decode, give it more!!
484 // It will continue where it left off because of read_pos and write_pos.
485 if (ret == LZMA_BUF_ERROR) {
486 codec->block_data = avro_realloc(codec->block_data, codec->block_size, codec->block_size * 2);
487 codec->block_size = codec->block_size * 2;
488 }
489
490 } while (ret == LZMA_BUF_ERROR);
491
492 if (ret != LZMA_OK) {
493 avro_set_error("Error in lzma decoder");
494 return 1;
495 }
496
497 return 0;
498 }
499
reset_lzma(avro_codec_t c)500 static int reset_lzma(avro_codec_t c)
501 {
502 if (c->block_data) {
503 avro_free(c->block_data, c->block_size);
504 }
505 if (c->codec_data) {
506 avro_freet(struct codec_data_lzma, c->codec_data);
507 }
508
509 c->block_data = NULL;
510 c->block_size = 0;
511 c->used_size = 0;
512 c->codec_data = NULL;
513
514 return 0;
515 }
516
517 #endif // LZMA_CODEC
518
519 /* Common interface */
520
avro_codec(avro_codec_t codec,const char * type)521 int avro_codec(avro_codec_t codec, const char *type)
522 {
523 if (type == NULL) {
524 return codec_null(codec);
525 }
526
527 #ifdef SNAPPY_CODEC
528 if (strcmp("snappy", type) == 0) {
529 return codec_snappy(codec);
530 }
531 #endif
532
533 #ifdef DEFLATE_CODEC
534 if (strcmp("deflate", type) == 0) {
535 return codec_deflate(codec);
536 }
537 #endif
538
539 #ifdef LZMA_CODEC
540 if (strcmp("lzma", type) == 0) {
541 return codec_lzma(codec);
542 }
543 #endif
544
545 if (strcmp("null", type) == 0) {
546 return codec_null(codec);
547 }
548
549 avro_set_error("Unknown codec %s", type);
550 return 1;
551 }
552
avro_codec_encode(avro_codec_t c,void * data,int64_t len)553 int avro_codec_encode(avro_codec_t c, void * data, int64_t len)
554 {
555 switch(c->type)
556 {
557 case AVRO_CODEC_NULL:
558 return encode_null(c, data, len);
559 #ifdef SNAPPY_CODEC
560 case AVRO_CODEC_SNAPPY:
561 return encode_snappy(c, data, len);
562 #endif
563 #ifdef DEFLATE_CODEC
564 case AVRO_CODEC_DEFLATE:
565 return encode_deflate(c, data, len);
566 #endif
567 #ifdef LZMA_CODEC
568 case AVRO_CODEC_LZMA:
569 return encode_lzma(c, data, len);
570 #endif
571 default:
572 return 1;
573 }
574 }
575
avro_codec_decode(avro_codec_t c,void * data,int64_t len)576 int avro_codec_decode(avro_codec_t c, void * data, int64_t len)
577 {
578 switch(c->type)
579 {
580 case AVRO_CODEC_NULL:
581 return decode_null(c, data, len);
582 #ifdef SNAPPY_CODEC
583 case AVRO_CODEC_SNAPPY:
584 return decode_snappy(c, data, len);
585 #endif
586 #ifdef DEFLATE_CODEC
587 case AVRO_CODEC_DEFLATE:
588 return decode_deflate(c, data, len);
589 #endif
590 #ifdef LZMA_CODEC
591 case AVRO_CODEC_LZMA:
592 return decode_lzma(c, data, len);
593 #endif
594 default:
595 return 1;
596 }
597 }
598
avro_codec_reset(avro_codec_t c)599 int avro_codec_reset(avro_codec_t c)
600 {
601 switch(c->type)
602 {
603 case AVRO_CODEC_NULL:
604 return reset_null(c);
605 #ifdef SNAPPY_CODEC
606 case AVRO_CODEC_SNAPPY:
607 return reset_snappy(c);
608 #endif
609 #ifdef DEFLATE_CODEC
610 case AVRO_CODEC_DEFLATE:
611 return reset_deflate(c);
612 #endif
613 #ifdef LZMA_CODEC
614 case AVRO_CODEC_LZMA:
615 return reset_lzma(c);
616 #endif
617 default:
618 return 1;
619 }
620 }
621