1 /*
2    Lizard - Fast LZ compression algorithm
3    Copyright (C) 2011-2016, Yann Collet
4    Copyright (C) 2016-2017, Przemyslaw Skibinski <inikep@gmail.com>
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31    You can contact the author at :
32     - Lizard source repository : https://github.com/inikep/lizard
33 */
34 
35 
36 /**************************************
37 *  Includes
38 **************************************/
39 //#define LIZARD_STATS 1 // 0=simple stats, 1=more, 2=full
40 #ifdef LIZARD_STATS
41     #include "test/lizard_stats.h"
42 #endif
43 #include "lizard_compress.h"
44 #include "lizard_decompress.h"
45 #include "lizard_common.h"
46 #include <stdio.h> // printf
47 #include <stdint.h> // intptr_t
48 
49 
50 /*-************************************
51 *  Local Structures and types
52 **************************************/
53 typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
54 typedef enum { full = 0, partial = 1 } earlyEnd_directive;
55 
56 #include "lizard_decompress_lz4.h"
57 #ifndef USE_LZ4_ONLY
58     #ifdef LIZARD_USE_TEST
59         #include "test/lizard_common_test.h"
60         #include "test/lizard_decompress_test.h"
61     #else
62         #include "lizard_decompress_liz.h"
63     #endif
64 #endif
65 #include "entropy/huf.h"
66 
67 
68 /*-*****************************
69 *  Decompression functions
70 *******************************/
71 
Lizard_readStream(int flag,const BYTE ** ip,const BYTE * const iend,BYTE * op,BYTE * const oend,const BYTE ** streamPtr,const BYTE ** streamEnd,int streamFlag)72 FORCE_INLINE size_t Lizard_readStream(int flag, const BYTE** ip, const BYTE* const iend, BYTE* op, BYTE* const oend, const BYTE** streamPtr, const BYTE** streamEnd, int streamFlag)
73 {
74     if (!flag) {
75         if (*ip > iend - 3) return 0;
76         *streamPtr = *ip + 3;
77         *streamEnd = *streamPtr + MEM_readLE24(*ip);
78         if (*streamEnd < *streamPtr) return 0;
79         *ip = *streamEnd;
80 #ifdef LIZARD_STATS
81         uncompr_stream[streamFlag] += *streamEnd-*streamPtr;
82 #else
83         (void)streamFlag;
84 #endif
85         return 1;
86     } else {
87 #ifndef LIZARD_NO_HUFFMAN
88         size_t res, streamLen, comprStreamLen;
89 
90         if (*ip > iend - 6) return 0;
91         streamLen = MEM_readLE24(*ip);
92         comprStreamLen = MEM_readLE24(*ip + 3);
93 
94         if ((op > oend - streamLen) || (*ip + comprStreamLen > iend - 6)) return 0;
95         res = HUF_decompress(op, streamLen, *ip + 6, comprStreamLen);
96         if (HUF_isError(res) || (res != streamLen)) return 0;
97 
98         *ip += comprStreamLen + 6;
99         *streamPtr = op;
100         *streamEnd = *streamPtr + streamLen;
101 #ifdef LIZARD_STATS
102         compr_stream[streamFlag] += comprStreamLen + 6;
103         decompr_stream[streamFlag] += *streamEnd-*streamPtr;
104 #endif
105         return 1;
106 #else
107         fprintf(stderr, "compiled with LIZARD_NO_HUFFMAN\n");
108         (void)op; (void)oend;
109         return 0;
110 #endif
111     }
112 }
113 
114 
Lizard_decompress_generic(const char * source,char * const dest,int inputSize,int outputSize,int partialDecoding,int targetOutputSize,int dict,const BYTE * const lowPrefix,const BYTE * const dictStart,const size_t dictSize)115 FORCE_INLINE int Lizard_decompress_generic(
116                  const char* source,
117                  char* const dest,
118                  int inputSize,
119                  int outputSize,         /* this value is the max size of Output Buffer. */
120                  int partialDecoding,    /* full, partial */
121                  int targetOutputSize,   /* only used if partialDecoding==partial */
122                  int dict,               /* noDict, withPrefix64k, usingExtDict */
123                  const BYTE* const lowPrefix,  /* == dest if dict == noDict */
124                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
125                  const size_t dictSize         /* note : = 0 if noDict */
126                  )
127 {
128     /* Local Variables */
129     const BYTE* ip = (const BYTE*) source, *istart = (const BYTE*) source;
130     const BYTE* const iend = ip + inputSize;
131     BYTE* op = (BYTE*) dest;
132     BYTE* const oend = op + outputSize;
133     BYTE* oexit = op + targetOutputSize;
134     Lizard_parameters params;
135     Lizard_dstream_t ctx;
136     BYTE* decompFlagsBase, *decompOff24Base, *decompOff16Base, *decompLiteralsBase = NULL;
137     int res, compressionLevel;
138 
139     if (inputSize < 1) { LIZARD_LOG_DECOMPRESS("inputSize=%d outputSize=%d targetOutputSize=%d partialDecoding=%d\n", inputSize, outputSize, targetOutputSize, partialDecoding); return 0; }
140 
141     compressionLevel = *ip++;
142 
143     if (compressionLevel < LIZARD_MIN_CLEVEL || compressionLevel > LIZARD_MAX_CLEVEL) {
144         LIZARD_LOG_DECOMPRESS("ERROR Lizard_decompress_generic inputSize=%d compressionLevel=%d\n", inputSize, compressionLevel);
145         return -1;
146     }
147 
148     LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic ip=%p inputSize=%d targetOutputSize=%d dest=%p outputSize=%d cLevel=%d dict=%d dictSize=%d dictStart=%p partialDecoding=%d\n", ip, inputSize, targetOutputSize, dest, outputSize, compressionLevel, dict, (int)dictSize, dictStart, partialDecoding);
149 
150     decompLiteralsBase = (BYTE*)malloc(4*LIZARD_HUF_BLOCK_SIZE);
151     if (!decompLiteralsBase) return -1;
152     decompFlagsBase = decompLiteralsBase + LIZARD_HUF_BLOCK_SIZE;
153     decompOff24Base = decompFlagsBase + LIZARD_HUF_BLOCK_SIZE;
154     decompOff16Base = decompOff24Base + LIZARD_HUF_BLOCK_SIZE;
155 
156 #ifdef LIZARD_STATS
157     init_stats();
158 #endif
159     (void)istart;
160 
161     while (ip < iend)
162     {
163         res = *ip++;
164         if (res == LIZARD_FLAG_UNCOMPRESSED) /* uncompressed */
165         {
166             uint32_t length;
167             if (ip > iend - 3) { LIZARD_LOG_DECOMPRESS("UNCOMPRESSED  ip[%p] > iend[%p] - 3\n", ip, iend); goto _output_error; }
168             length = MEM_readLE24(ip);
169             ip += 3;
170          //   printf("%d: total=%d block=%d UNCOMPRESSED op=%p oexit=%p oend=%p\n", (int)(op-(BYTE*)dest) ,(int)(ip-istart), length, op, oexit, oend);
171             if (ip + length > iend || op + length > oend) { LIZARD_LOG_DECOMPRESS("UNCOMPRESSED  ip[%p]+length[%d] > iend[%p]\n", ip, length, iend); goto _output_error; }
172             memcpy(op, ip, length);
173             op += length;
174             ip += length;
175             if ((partialDecoding) && (op >= oexit)) break;
176 #ifdef LIZARD_STATS
177             uncompr_stream[LIZARD_STREAM_UNCOMPRESSED] += length;
178 #endif
179             continue;
180         }
181 
182         if (res&LIZARD_FLAG_LEN) {
183             LIZARD_LOG_DECOMPRESS("res=%d\n", res); goto _output_error;
184         }
185 
186         if (ip > iend - 5*3) goto _output_error;
187         ctx.lenPtr = (const BYTE*)ip + 3;
188         ctx.lenEnd = ctx.lenPtr + MEM_readLE24(ip);
189         if (ctx.lenEnd < ctx.lenPtr || (ctx.lenEnd > iend - 3)) goto _output_error;
190 #ifdef LIZARD_STATS
191         uncompr_stream[LIZARD_STREAM_LEN] += ctx.lenEnd-ctx.lenPtr + 3;
192 #endif
193         ip = ctx.lenEnd;
194 
195         {   size_t streamLen;
196 #ifdef LIZARD_USE_LOGS
197             const BYTE* ipos;
198             size_t comprFlagsLen, comprLiteralsLen, total;
199 #endif
200             streamLen = Lizard_readStream(res&LIZARD_FLAG_OFFSET16, &ip, iend, decompOff16Base, decompOff16Base + LIZARD_HUF_BLOCK_SIZE, &ctx.offset16Ptr, &ctx.offset16End, LIZARD_STREAM_OFFSET16);
201             if (streamLen == 0) goto _output_error;
202 
203             streamLen = Lizard_readStream(res&LIZARD_FLAG_OFFSET24, &ip, iend, decompOff24Base, decompOff24Base + LIZARD_HUF_BLOCK_SIZE, &ctx.offset24Ptr, &ctx.offset24End, LIZARD_STREAM_OFFSET24);
204             if (streamLen == 0) goto _output_error;
205 
206 #ifdef LIZARD_USE_LOGS
207             ipos = ip;
208             streamLen = Lizard_readStream(res&LIZARD_FLAG_FLAGS, &ip, iend, decompFlagsBase, decompFlagsBase + LIZARD_HUF_BLOCK_SIZE, &ctx.flagsPtr, &ctx.flagsEnd, LIZARD_STREAM_FLAGS);
209             if (streamLen == 0) goto _output_error;
210             streamLen = (size_t)(ctx.flagsEnd-ctx.flagsPtr);
211             comprFlagsLen = ((size_t)(ip - ipos) + 3 >= streamLen) ? 0 : (size_t)(ip - ipos);
212             ipos = ip;
213 #else
214             streamLen = Lizard_readStream(res&LIZARD_FLAG_FLAGS, &ip, iend, decompFlagsBase, decompFlagsBase + LIZARD_HUF_BLOCK_SIZE, &ctx.flagsPtr, &ctx.flagsEnd, LIZARD_STREAM_FLAGS);
215             if (streamLen == 0) goto _output_error;
216 #endif
217 
218             streamLen = Lizard_readStream(res&LIZARD_FLAG_LITERALS, &ip, iend, decompLiteralsBase, decompLiteralsBase + LIZARD_HUF_BLOCK_SIZE, &ctx.literalsPtr, &ctx.literalsEnd, LIZARD_STREAM_LITERALS);
219             if (streamLen == 0) goto _output_error;
220 #ifdef LIZARD_USE_LOGS
221             streamLen = (size_t)(ctx.literalsEnd-ctx.literalsPtr);
222             comprLiteralsLen = ((size_t)(ip - ipos) + 3 >= streamLen) ? 0 : (size_t)(ip - ipos);
223             total = (size_t)(ip-(ctx.lenEnd-1));
224 #endif
225 
226             if (ip > iend) goto _output_error;
227 
228             LIZARD_LOG_DECOMPRESS("%d: total=%d block=%d flagsLen=%d(HUF=%d) literalsLen=%d(HUF=%d) offset16Len=%d offset24Len=%d lengthsLen=%d \n", (int)(op-(BYTE*)dest) ,(int)(ip-istart), (int)total,
229                         (int)(ctx.flagsEnd-ctx.flagsPtr), (int)comprFlagsLen, (int)(ctx.literalsEnd-ctx.literalsPtr), (int)comprLiteralsLen,
230                         (int)(ctx.offset16End-ctx.offset16Ptr), (int)(ctx.offset24End-ctx.offset24Ptr), (int)(ctx.lenEnd-ctx.lenPtr));
231         }
232 
233         ctx.last_off = -LIZARD_INIT_LAST_OFFSET;
234         params = Lizard_defaultParameters[compressionLevel - LIZARD_MIN_CLEVEL];
235         if (params.decompressType == Lizard_coderwords_LZ4)
236             res = Lizard_decompress_LZ4(&ctx, op, outputSize, partialDecoding, targetOutputSize, dict, lowPrefix, dictStart, dictSize, compressionLevel);
237         else
238 #ifdef USE_LZ4_ONLY
239             res = Lizard_decompress_LZ4(&ctx, op, outputSize, partialDecoding, targetOutputSize, dict, lowPrefix, dictStart, dictSize, compressionLevel);
240 #else
241             res = Lizard_decompress_LIZv1(&ctx, op, outputSize, partialDecoding, targetOutputSize, dict, lowPrefix, dictStart, dictSize, compressionLevel);
242 #endif
243         LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic res=%d inputSize=%d\n", res, (int)(ctx.literalsEnd-ctx.lenEnd));
244 
245         if (res <= 0) { free(decompLiteralsBase); return res; }
246 
247         op += res;
248         outputSize -= res;
249         if ((partialDecoding) && (op >= oexit)) break;
250     }
251 
252 #ifdef LIZARD_STATS
253     print_stats();
254 #endif
255 
256     LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic total=%d\n", (int)(op-(BYTE*)dest));
257     free(decompLiteralsBase);
258     return (int)(op-(BYTE*)dest);
259 
260 _output_error:
261     LIZARD_LOG_DECOMPRESS("Lizard_decompress_generic ERROR ip=%p iend=%p\n", ip, iend);
262     free(decompLiteralsBase);
263     return -1;
264 }
265 
266 
Lizard_decompress_safe(const char * source,char * dest,int compressedSize,int maxDecompressedSize)267 int Lizard_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
268 {
269     return Lizard_decompress_generic(source, dest, compressedSize, maxDecompressedSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
270 }
271 
Lizard_decompress_safe_partial(const char * source,char * dest,int compressedSize,int targetOutputSize,int maxDecompressedSize)272 int Lizard_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
273 {
274     return Lizard_decompress_generic(source, dest, compressedSize, maxDecompressedSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
275 }
276 
277 
278 /*===== streaming decompression functions =====*/
279 
280 
281 /*
282  * If you prefer dynamic allocation methods,
283  * Lizard_createStreamDecode()
284  * provides a pointer (void*) towards an initialized Lizard_streamDecode_t structure.
285  */
Lizard_createStreamDecode(void)286 Lizard_streamDecode_t* Lizard_createStreamDecode(void)
287 {
288     Lizard_streamDecode_t* lizards = (Lizard_streamDecode_t*) ALLOCATOR(1, sizeof(Lizard_streamDecode_t));
289     return lizards;
290 }
291 
Lizard_freeStreamDecode(Lizard_streamDecode_t * Lizard_stream)292 int Lizard_freeStreamDecode (Lizard_streamDecode_t* Lizard_stream)
293 {
294     FREEMEM(Lizard_stream);
295     return 0;
296 }
297 
298 /*!
299  * Lizard_setStreamDecode() :
300  * Use this function to instruct where to find the dictionary.
301  * This function is not necessary if previous data is still available where it was decoded.
302  * Loading a size of 0 is allowed (same effect as no dictionary).
303  * Return : 1 if OK, 0 if error
304  */
Lizard_setStreamDecode(Lizard_streamDecode_t * Lizard_streamDecode,const char * dictionary,int dictSize)305 int Lizard_setStreamDecode (Lizard_streamDecode_t* Lizard_streamDecode, const char* dictionary, int dictSize)
306 {
307     Lizard_streamDecode_t* lizardsd = (Lizard_streamDecode_t*) Lizard_streamDecode;
308     lizardsd->prefixSize = (size_t) dictSize;
309     lizardsd->prefixEnd = (const BYTE*) dictionary + dictSize;
310     lizardsd->externalDict = NULL;
311     lizardsd->extDictSize  = 0;
312     return 1;
313 }
314 
315 /*
316 *_continue() :
317     These decoding functions allow decompression of multiple blocks in "streaming" mode.
318     Previously decoded blocks must still be available at the memory position where they were decoded.
319     If it's not possible, save the relevant part of decoded data into a safe buffer,
320     and indicate where it stands using Lizard_setStreamDecode()
321 */
Lizard_decompress_safe_continue(Lizard_streamDecode_t * Lizard_streamDecode,const char * source,char * dest,int compressedSize,int maxOutputSize)322 int Lizard_decompress_safe_continue (Lizard_streamDecode_t* Lizard_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
323 {
324     Lizard_streamDecode_t* lizardsd = (Lizard_streamDecode_t*) Lizard_streamDecode;
325     int result;
326 
327     if (lizardsd->prefixEnd == (BYTE*)dest) {
328         result = Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize,
329                                         full, 0, usingExtDict, lizardsd->prefixEnd - lizardsd->prefixSize, lizardsd->externalDict, lizardsd->extDictSize);
330         if (result <= 0) return result;
331         lizardsd->prefixSize += result;
332         lizardsd->prefixEnd  += result;
333     } else {
334         lizardsd->extDictSize = lizardsd->prefixSize;
335         lizardsd->externalDict = lizardsd->prefixEnd - lizardsd->extDictSize;
336         result = Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize,
337                                         full, 0, usingExtDict, (BYTE*)dest, lizardsd->externalDict, lizardsd->extDictSize);
338         if (result <= 0) return result;
339         lizardsd->prefixSize = result;
340         lizardsd->prefixEnd  = (BYTE*)dest + result;
341     }
342 
343     return result;
344 }
345 
346 
347 /*
348 Advanced decoding functions :
349 *_usingDict() :
350     These decoding functions work the same as "_continue" ones,
351     the dictionary must be explicitly provided within parameters
352 */
353 
Lizard_decompress_safe_usingDict(const char * source,char * dest,int compressedSize,int maxOutputSize,const char * dictStart,int dictSize)354 int Lizard_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
355 {
356     if (dictSize==0)
357         return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
358     if (dictStart+dictSize == dest)
359     {
360         if (dictSize >= (int)(LIZARD_DICT_SIZE - 1))
361             return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, withPrefix64k, (BYTE*)dest-LIZARD_DICT_SIZE, NULL, 0);
362         return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
363     }
364     return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
365 }
366 
367 /* debug function */
Lizard_decompress_safe_forceExtDict(const char * source,char * dest,int compressedSize,int maxOutputSize,const char * dictStart,int dictSize)368 int Lizard_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
369 {
370     return Lizard_decompress_generic(source, dest, compressedSize, maxOutputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
371 }
372 
373