1 /*
2 * Copyright (c) 2012-2018, Steeve Morin, Jonathan Underwood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of Steeve Morin nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #if defined(_WIN32) && defined(_MSC_VER)
33 #define inline __inline
34 #elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX)
35 #define inline
36 #endif
37
38 #include <py3c.h>
39 #include <py3c/capsulethunk.h>
40
41 #include <stdlib.h>
42 #include <math.h>
43 #include <lz4.h>
44 #include <lz4hc.h>
45
46 #ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */
47 #ifdef __GNUC__
48 #define Py_UNUSED(name) _unused_ ## name __attribute__((unused))
49 #else
50 #define Py_UNUSED(name) _unused_ ## name
51 #endif
52 #endif
53
54 #if defined(_WIN32) && defined(_MSC_VER)
55 #if _MSC_VER >= 1600
56 #include <stdint.h>
57 #else /* _MSC_VER >= 1600 */
58 typedef signed char int8_t;
59 typedef signed short int16_t;
60 typedef signed int int32_t;
61 typedef unsigned char uint8_t;
62 typedef unsigned short uint16_t;
63 typedef unsigned int uint32_t;
64 #endif /* _MSC_VER >= 1600 */
65 #endif
66
67 static inline void
store_le32(char * c,uint32_t x)68 store_le32 (char *c, uint32_t x)
69 {
70 c[0] = x & 0xff;
71 c[1] = (x >> 8) & 0xff;
72 c[2] = (x >> 16) & 0xff;
73 c[3] = (x >> 24) & 0xff;
74 }
75
76 static inline uint32_t
load_le32(const char * c)77 load_le32 (const char *c)
78 {
79 const uint8_t *d = (const uint8_t *) c;
80 return d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
81 }
82
83 static const size_t hdr_size = sizeof (uint32_t);
84
85 typedef enum
86 {
87 DEFAULT,
88 FAST,
89 HIGH_COMPRESSION
90 } compression_type;
91
92 static PyObject * LZ4BlockError;
93
94 static inline int
lz4_compress_generic(int comp,char * source,char * dest,int source_size,int dest_size,char * dict,int dict_size,int acceleration,int compression)95 lz4_compress_generic (int comp, char* source, char* dest, int source_size, int dest_size,
96 char* dict, int dict_size, int acceleration, int compression)
97 {
98 if (comp != HIGH_COMPRESSION)
99 {
100 LZ4_stream_t lz4_state;
101 LZ4_resetStream (&lz4_state);
102 if (dict)
103 {
104 LZ4_loadDict (&lz4_state, dict, dict_size);
105 }
106 if (comp != FAST)
107 {
108 acceleration = 1;
109 }
110 return LZ4_compress_fast_continue (&lz4_state, source, dest, source_size, dest_size, acceleration);
111 }
112 else
113 {
114 LZ4_streamHC_t lz4_state;
115 LZ4_resetStreamHC (&lz4_state, compression);
116 if (dict)
117 {
118 LZ4_loadDictHC (&lz4_state, dict, dict_size);
119 }
120 return LZ4_compress_HC_continue (&lz4_state, source, dest, source_size, dest_size);
121 }
122 }
123
124 #ifdef inline
125 #undef inline
126 #endif
127
128 static PyObject *
compress(PyObject * Py_UNUSED (self),PyObject * args,PyObject * kwargs)129 compress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)
130 {
131 const char *mode = "default";
132 size_t dest_size, total_size;
133 int acceleration = 1;
134 int compression = 9;
135 int store_size = 1;
136 PyObject *py_dest;
137 char *dest, *dest_start;
138 compression_type comp;
139 int output_size;
140 Py_buffer source;
141 int source_size;
142 int return_bytearray = 0;
143 Py_buffer dict = {0};
144 static char *argnames[] = {
145 "source",
146 "mode",
147 "store_size",
148 "acceleration",
149 "compression",
150 "return_bytearray",
151 "dict",
152 NULL
153 };
154
155
156 #if IS_PY3
157 if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|spiipz*", argnames,
158 &source,
159 &mode, &store_size, &acceleration, &compression,
160 &return_bytearray, &dict))
161 {
162 return NULL;
163 }
164 #else
165 if (!PyArg_ParseTupleAndKeywords (args, kwargs, "s*|siiiiz*", argnames,
166 &source,
167 &mode, &store_size, &acceleration, &compression,
168 &return_bytearray, &dict))
169 {
170 return NULL;
171 }
172 #endif
173
174 if (source.len > INT_MAX)
175 {
176 PyBuffer_Release(&source);
177 PyBuffer_Release(&dict);
178 PyErr_Format(PyExc_OverflowError,
179 "Input too large for LZ4 API");
180 return NULL;
181 }
182
183 if (dict.len > INT_MAX)
184 {
185 PyBuffer_Release(&source);
186 PyBuffer_Release(&dict);
187 PyErr_Format(PyExc_OverflowError,
188 "Dictionary too large for LZ4 API");
189 return NULL;
190 }
191
192 source_size = (int) source.len;
193
194 if (!strncmp (mode, "default", sizeof ("default")))
195 {
196 comp = DEFAULT;
197 }
198 else if (!strncmp (mode, "fast", sizeof ("fast")))
199 {
200 comp = FAST;
201 }
202 else if (!strncmp (mode, "high_compression", sizeof ("high_compression")))
203 {
204 comp = HIGH_COMPRESSION;
205 }
206 else
207 {
208 PyBuffer_Release(&source);
209 PyBuffer_Release(&dict);
210 PyErr_Format (PyExc_ValueError,
211 "Invalid mode argument: %s. Must be one of: standard, fast, high_compression",
212 mode);
213 return NULL;
214 }
215
216 dest_size = LZ4_compressBound (source_size);
217
218 if (store_size)
219 {
220 total_size = dest_size + hdr_size;
221 }
222 else
223 {
224 total_size = dest_size;
225 }
226
227 dest = PyMem_Malloc (total_size * sizeof * dest);
228 if (dest == NULL)
229 {
230 return PyErr_NoMemory();
231 }
232
233 Py_BEGIN_ALLOW_THREADS
234
235 if (store_size)
236 {
237 store_le32 (dest, source_size);
238 dest_start = dest + hdr_size;
239 }
240 else
241 {
242 dest_start = dest;
243 }
244
245 output_size = lz4_compress_generic (comp, source.buf, dest_start, source_size,
246 (int) dest_size, dict.buf, (int) dict.len,
247 acceleration, compression);
248
249 Py_END_ALLOW_THREADS
250
251 PyBuffer_Release(&source);
252 PyBuffer_Release(&dict);
253
254 if (output_size <= 0)
255 {
256 PyErr_SetString (LZ4BlockError, "Compression failed");
257 PyMem_Free (dest);
258 return NULL;
259 }
260
261 if (store_size)
262 {
263 output_size += (int) hdr_size;
264 }
265
266 if (return_bytearray)
267 {
268 py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size);
269 }
270 else
271 {
272 py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size);
273 }
274
275 PyMem_Free (dest);
276
277 if (py_dest == NULL)
278 {
279 return PyErr_NoMemory ();
280 }
281
282 return py_dest;
283 }
284
285 static PyObject *
decompress(PyObject * Py_UNUSED (self),PyObject * args,PyObject * kwargs)286 decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)
287 {
288 Py_buffer source;
289 const char * source_start;
290 size_t source_size;
291 PyObject *py_dest;
292 char *dest;
293 int output_size;
294 size_t dest_size;
295 int uncompressed_size = -1;
296 int return_bytearray = 0;
297 Py_buffer dict = {0};
298 static char *argnames[] = {
299 "source",
300 "uncompressed_size",
301 "return_bytearray",
302 "dict",
303 NULL
304 };
305
306 #if IS_PY3
307 if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|ipz*", argnames,
308 &source, &uncompressed_size,
309 &return_bytearray, &dict))
310 {
311 return NULL;
312 }
313 #else
314 if (!PyArg_ParseTupleAndKeywords (args, kwargs, "s*|iiz*", argnames,
315 &source, &uncompressed_size,
316 &return_bytearray, &dict))
317 {
318 return NULL;
319 }
320 #endif
321
322 if (source.len > INT_MAX)
323 {
324 PyBuffer_Release(&source);
325 PyBuffer_Release(&dict);
326 PyErr_Format(PyExc_OverflowError,
327 "Input too large for LZ4 API");
328 return NULL;
329 }
330
331 if (dict.len > INT_MAX)
332 {
333 PyBuffer_Release(&source);
334 PyBuffer_Release(&dict);
335 PyErr_Format(PyExc_OverflowError,
336 "Dictionary too large for LZ4 API");
337 return NULL;
338 }
339
340 source_start = (const char *) source.buf;
341 source_size = (int) source.len;
342
343 if (uncompressed_size >= 0)
344 {
345 dest_size = uncompressed_size;
346 }
347 else
348 {
349 if (source_size < hdr_size)
350 {
351 PyBuffer_Release(&source);
352 PyBuffer_Release(&dict);
353 PyErr_SetString (PyExc_ValueError, "Input source data size too small");
354 return NULL;
355 }
356 dest_size = load_le32 (source_start);
357 source_start += hdr_size;
358 source_size -= hdr_size;
359 }
360
361 if (dest_size > INT_MAX)
362 {
363 PyBuffer_Release(&source);
364 PyBuffer_Release(&dict);
365 PyErr_Format (PyExc_ValueError, "Invalid size: 0x%zu",
366 dest_size);
367 return NULL;
368 }
369
370 dest = PyMem_Malloc (dest_size * sizeof * dest);
371 if (dest == NULL)
372 {
373 return PyErr_NoMemory();
374 }
375
376 Py_BEGIN_ALLOW_THREADS
377
378 output_size =
379 LZ4_decompress_safe_usingDict (source_start, dest, source_size, (int) dest_size,
380 dict.buf, (int) dict.len);
381
382 Py_END_ALLOW_THREADS
383
384 PyBuffer_Release(&source);
385 PyBuffer_Release(&dict);
386
387 if (output_size < 0)
388 {
389 PyErr_Format (LZ4BlockError,
390 "Decompression failed: corrupt input or insufficient space in destination buffer. Error code: %u",
391 -output_size);
392 PyMem_Free (dest);
393 return NULL;
394 }
395 else if (((size_t)output_size != dest_size) && (uncompressed_size < 0))
396 {
397 PyErr_Format (LZ4BlockError,
398 "Decompressor wrote %u bytes, but %zu bytes expected from header",
399 output_size, dest_size);
400 PyMem_Free (dest);
401 return NULL;
402 }
403
404 if (return_bytearray)
405 {
406 py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size);
407 }
408 else
409 {
410 py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size);
411 }
412
413 PyMem_Free (dest);
414
415 if (py_dest == NULL)
416 {
417 return PyErr_NoMemory ();
418 }
419
420 return py_dest;
421 }
422
423 PyDoc_STRVAR(compress__doc,
424 "compress(source, mode='default', acceleration=1, compression=0, return_bytearray=False)\n\n" \
425 "Compress source, returning the compressed data as a string.\n" \
426 "Raises an exception if any error occurs.\n" \
427 "\n" \
428 "Args:\n" \
429 " source (str, bytes or buffer-compatible object): Data to compress\n" \
430 "\n" \
431 "Keyword Args:\n" \
432 " mode (str): If ``'default'`` or unspecified use the default LZ4\n" \
433 " compression mode. Set to ``'fast'`` to use the fast compression\n" \
434 " LZ4 mode at the expense of compression. Set to\n" \
435 " ``'high_compression'`` to use the LZ4 high-compression mode at\n" \
436 " the exepense of speed.\n" \
437 " acceleration (int): When mode is set to ``'fast'`` this argument\n" \
438 " specifies the acceleration. The larger the acceleration, the\n" \
439 " faster the but the lower the compression. The default\n" \
440 " compression corresponds to a value of ``1``.\n" \
441 " compression (int): When mode is set to ``high_compression`` this\n" \
442 " argument specifies the compression. Valid values are between\n" \
443 " ``1`` and ``12``. Values between ``4-9`` are recommended, and\n" \
444 " ``9`` is the default.\n"
445 " store_size (bool): If ``True`` (the default) then the size of the\n" \
446 " uncompressed data is stored at the start of the compressed\n" \
447 " block.\n" \
448 " return_bytearray (bool): If ``False`` (the default) then the function\n" \
449 " will return a bytes object. If ``True``, then the function will\n" \
450 " return a bytearray object.\n\n" \
451 " dict (str, bytes or buffer-compatible object): If specified, perform\n" \
452 " compression using this initial dictionary.\n" \
453 "Returns:\n" \
454 " bytes or bytearray: Compressed data.\n");
455
456 PyDoc_STRVAR(decompress__doc,
457 "decompress(source, uncompressed_size=-1, return_bytearray=False)\n\n" \
458 "Decompress source, returning the uncompressed data as a string.\n" \
459 "Raises an exception if any error occurs.\n" \
460 "\n" \
461 "Args:\n" \
462 " source (str, bytes or buffer-compatible object): Data to decompress.\n" \
463 "\n" \
464 "Keyword Args:\n" \
465 " uncompressed_size (int): If not specified or negative, the uncompressed\n" \
466 " data size is read from the start of the source block. If specified,\n" \
467 " it is assumed that the full source data is compressed data. If this\n" \
468 " argument is specified, it is considered to be a maximum possible size\n" \
469 " for the buffer used to hold the uncompressed data, and so less data\n" \
470 " may be returned. If `uncompressed_size` is too small, `LZ4BlockError`\n" \
471 " will be raised. By catching `LZ4BlockError` it is possible to increase\n" \
472 " `uncompressed_size` and try again.\n" \
473 " return_bytearray (bool): If ``False`` (the default) then the function\n" \
474 " will return a bytes object. If ``True``, then the function will\n" \
475 " return a bytearray object.\n\n" \
476 " dict (str, bytes or buffer-compatible object): If specified, perform\n" \
477 " decompression using this initial dictionary.\n" \
478 "\n" \
479 "Returns:\n" \
480 " bytes or bytearray: Decompressed data.\n" \
481 "\n" \
482 "Raises:\n" \
483 " LZ4BlockError: raised if the call to the LZ4 library fails. This can be\n" \
484 " caused by `uncompressed_size` being too small, or invalid data.\n");
485
486 PyDoc_STRVAR(lz4block__doc,
487 "A Python wrapper for the LZ4 block protocol"
488 );
489
490 static PyMethodDef module_methods[] = {
491 {
492 "compress",
493 (PyCFunction) compress,
494 METH_VARARGS | METH_KEYWORDS,
495 compress__doc
496 },
497 {
498 "decompress",
499 (PyCFunction) decompress,
500 METH_VARARGS | METH_KEYWORDS,
501 decompress__doc
502 },
503 {
504 /* Sentinel */
505 NULL,
506 NULL,
507 0,
508 NULL
509 }
510 };
511
512 static struct PyModuleDef moduledef =
513 {
514 PyModuleDef_HEAD_INIT,
515 "_block",
516 lz4block__doc,
517 -1,
518 module_methods
519 };
520
MODULE_INIT_FUNC(_block)521 MODULE_INIT_FUNC (_block)
522 {
523 PyObject *module = PyModule_Create (&moduledef);
524
525 if (module == NULL)
526 return NULL;
527
528 PyModule_AddIntConstant (module, "HC_LEVEL_MIN", LZ4HC_CLEVEL_MIN);
529 PyModule_AddIntConstant (module, "HC_LEVEL_DEFAULT", LZ4HC_CLEVEL_DEFAULT);
530 PyModule_AddIntConstant (module, "HC_LEVEL_OPT_MIN", LZ4HC_CLEVEL_OPT_MIN);
531 PyModule_AddIntConstant (module, "HC_LEVEL_MAX", LZ4HC_CLEVEL_MAX);
532
533 LZ4BlockError = PyErr_NewExceptionWithDoc("_block.LZ4BlockError", "Call to LZ4 library failed.", NULL, NULL);
534 if (LZ4BlockError == NULL)
535 {
536 return NULL;
537 }
538 Py_INCREF(LZ4BlockError);
539 PyModule_AddObject(module, "LZ4BlockError", LZ4BlockError);
540
541 return module;
542 }
543