1 /*
2    LZ4 - Fast LZ compression algorithm
3    Copyright (C) 2011-2013, Yann Collet.
4    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are
8    met:
9 
10        * Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12        * Redistributions in binary form must reproduce the above
13    copyright notice, this list of conditions and the following disclaimer
14    in the documentation and/or other materials provided with the
15    distribution.
16 
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29    You can contact the author at :
30    - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31    - LZ4 source repository : http://code.google.com/p/lz4/
32 */
33 
34 /*
35 Note : this source file requires "hammer2_lz4_encoder.h"
36 */
37 
38 //**************************************
39 // Tuning parameters
40 //**************************************
41 // MEMORY_USAGE :
42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
43 // 16 -> 64KB; 20 -> 1MB; etc.)
44 // Increasing memory usage improves compression ratio
45 // Reduced memory usage can improve speed, due to cache effect
46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
47 #define MEMORY_USAGE 14
48 
49 // HEAPMODE :
50 // Select how default compression function will allocate memory for its
51 // hash table,
52 // in memory stack (0:default, fastest), or in memory heap (1:requires
53 // memory allocation (malloc)).
54 // Default allocation strategy is to use stack (HEAPMODE 0)
55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
56 #define HEAPMODE 1
57 
58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
59 // This will provide a small boost to performance for big endian cpu,
60 // but the resulting compressed stream will be incompatible with little-endian CPU.
61 // You can set this option to 1 in situations where data will remain within
62 // closed environment
63 // This option is useless on Little_Endian CPU (such as x86)
64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
65 
66 
67 //**************************************
68 // CPU Feature Detection
69 //**************************************
70 // 32 or 64 bits ?
71 #if (defined(__x86_64__) || defined(_M_X64))   // Detects 64 bits mode
72 #  define LZ4_ARCH64 1
73 #else
74 #  define LZ4_ARCH64 0
75 #endif
76 
77 //This reduced library code is only Little Endian compatible,
78 //if the need arises, please look for the appropriate defines in the
79 //original complete LZ4 library.
80 //Same is true for unaligned memory access which is enabled by default,
81 //hardware bit count, also enabled by default, and Microsoft/Visual
82 //Studio compilers.
83 
84 //**************************************
85 // Compiler Options
86 //**************************************
87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
88 /* "restrict" is a known keyword */
89 #else
90 #  define restrict // Disable restrict
91 #endif
92 
93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
94 
95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
96 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
97 #else
98 #  define expect(expr,value)    (expr)
99 #endif
100 
101 #define likely(expr)     expect((expr) != 0, 1)
102 #define unlikely(expr)   expect((expr) != 0, 0)
103 
104 
105 //**************************************
106 // Includes
107 //**************************************
108 #include "hammer2.h"
109 #include "hammer2_lz4.h"
110 //#include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h
111 
112 
113 //Declaration for kmalloc functions
114 /*
115 static MALLOC_DEFINE(C_HASHTABLE, "comphashtable",
116 	"A hash table used by LZ4 compression function.");
117 */
118 
119 
120 //**************************************
121 // Basic Types
122 //**************************************
123 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
124 # include <sys/stdint.h>
125   typedef uint8_t  BYTE;
126   typedef uint16_t U16;
127   typedef uint32_t U32;
128   typedef  int32_t S32;
129   typedef uint64_t U64;
130 #else
131   typedef unsigned char       BYTE;
132   typedef unsigned short      U16;
133   typedef unsigned int        U32;
134   typedef   signed int        S32;
135   typedef unsigned long long  U64;
136 #endif
137 
138 #if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
139 #  define _PACKED __attribute__ ((packed))
140 #else
141 #  define _PACKED
142 #endif
143 
144 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
145 #  pragma pack(push, 1)
146 #endif
147 
148 typedef struct _U16_S { U16 v; } _PACKED U16_S;
149 typedef struct _U32_S { U32 v; } _PACKED U32_S;
150 typedef struct _U64_S { U64 v; } _PACKED U64_S;
151 
152 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
153 #  pragma pack(pop)
154 #endif
155 
156 #define A64(x) (((U64_S *)(x))->v)
157 #define A32(x) (((U32_S *)(x))->v)
158 #define A16(x) (((U16_S *)(x))->v)
159 
160 
161 //**************************************
162 // Constants
163 //**************************************
164 #define HASHTABLESIZE (1 << MEMORY_USAGE)
165 
166 #define MINMATCH 4
167 
168 #define COPYLENGTH 8
169 #define LASTLITERALS 5
170 #define MFLIMIT (COPYLENGTH+MINMATCH)
171 #define MINLENGTH (MFLIMIT+1)
172 
173 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
174 #define SKIPSTRENGTH 6
175 // Increasing this value will make the compression run slower on
176 // incompressible data
177 
178 #define MAXD_LOG 16
179 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
180 
181 #define ML_BITS  4
182 #define ML_MASK  ((1U<<ML_BITS)-1)
183 #define RUN_BITS (8-ML_BITS)
184 #define RUN_MASK ((1U<<RUN_BITS)-1)
185 
186 
187 //**************************************
188 // Architecture-specific macros
189 //**************************************
190 #if LZ4_ARCH64   // 64-bit
191 #  define STEPSIZE 8
192 #  define UARCH U64
193 #  define AARCH A64
194 #  define LZ4_COPYSTEP(s,d)       A64(d) = A64(s); d+=8; s+=8;
195 #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d)
196 #  define LZ4_SECURECOPY(s,d,e)   if (d<e) LZ4_WILDCOPY(s,d,e)
197 #  define HTYPE                   U32
198 #  define INITBASE(base)          BYTE* base = ip
199 #else      // 32-bit
200 #  define STEPSIZE 4
201 #  define UARCH U32
202 #  define AARCH A32
203 #  define LZ4_COPYSTEP(s,d)       A32(d) = A32(s); d+=4; s+=4;
204 #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
205 #  define LZ4_SECURECOPY          LZ4_WILDCOPY
206 #  define HTYPE                   BYTE*
207 #  define INITBASE(base)          int base = 0
208 #endif
209 
210 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
211 #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
212 											v = lz4_bswap16(v);
213 											d = (s) - v; }
214 #  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i);
215 											v = lz4_bswap16(v);
216 											A16(p) = v;
217 											p+=2; }
218 #else      // Little Endian
219 #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
220 #  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
221 #endif
222 
223 
224 //**************************************
225 // Macros
226 //**************************************
227 #define LZ4_WILDCOPY(s,d,e)     do { LZ4_COPYPACKET(s,d) } while (d<e);
228 #define LZ4_BLINDCOPY(s,d,l)    { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
229 
230 
231 //****************************
232 // Private functions
233 //****************************
234 #if LZ4_ARCH64
235 
236 static
237 inline
238 int
239 LZ4_NbCommonBytes (register U64 val)
240 {
241 #if defined(LZ4_BIG_ENDIAN)
242     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
243     unsigned long r = 0;
244     _BitScanReverse64( &r, val );
245     return (int)(r>>3);
246     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
247     return (__builtin_clzll(val) >> 3);
248     #else
249     int r;
250     if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
251     if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
252     r += (!val);
253     return r;
254     #endif
255 #else
256     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
257     unsigned long r = 0;
258     _BitScanForward64( &r, val );
259     return (int)(r>>3);
260     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
261     return (__builtin_ctzll(val) >> 3);
262     #else
263     static int DeBruijnBytePos[64] = {
264 		0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
265 		1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
266 		1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
267 		6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
268 		2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
269 		2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
270 		7, 6, 7, 7 };
271     return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
272     #endif
273 #endif
274 }
275 
276 #else
277 
278 static
279 inline
280 int
281 LZ4_NbCommonBytes (register U32 val)
282 {
283 #if defined(LZ4_BIG_ENDIAN)
284 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
285     unsigned long r = 0;
286     _BitScanReverse( &r, val );
287     return (int)(r>>3);
288 #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
289     return (__builtin_clz(val) >> 3);
290 #  else
291     int r;
292     if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
293     r += (!val);
294     return r;
295 #  endif
296 #else
297 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
298     unsigned long r;
299     _BitScanForward( &r, val );
300     return (int)(r>>3);
301 #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
302     return (__builtin_ctz(val) >> 3);
303 #  else
304     static int DeBruijnBytePos[32] = {
305 		0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
306 		2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
307 		2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
308 		1, 1 };
309     return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
310 #  endif
311 #endif
312 }
313 
314 #endif
315 
316 
317 
318 //******************************
319 // Compression functions
320 //******************************
321 
322 #include "hammer2_lz4_encoder.h"
323 
324 /*
325 void* LZ4_createHeapMemory();
326 int LZ4_freeHeapMemory(void* ctx);
327 
328 Used to allocate and free hashTable memory
329 to be used by the LZ4_compress_heap* family of functions.
330 LZ4_createHeapMemory() returns NULL is memory allocation fails.
331 */
332 void*
333 LZ4_create(void)
334 {
335 	return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT);
336 }
337 
338 int
339 LZ4_free(void* ctx)
340 {
341 	kfree(ctx, C_HASHTABLE);
342 	return 0;
343 }
344 
345 int
346 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
347 {
348     void* ctx = LZ4_create();
349     int result;
350     if (ctx == NULL) return 0;    // Failed allocation => compression not done
351     if (inputSize < LZ4_64KLIMIT)
352         result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
353 			inputSize, maxOutputSize);
354     else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
355 			inputSize, maxOutputSize);
356     LZ4_free(ctx);
357     return result;
358 }
359 
360 
361 //****************************
362 // Decompression functions
363 //****************************
364 
365 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
366 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
367 typedef enum { full = 0, partial = 1 } exit_directive;
368 
369 
370 // This generic decompression function cover all use cases.
371 // It shall be instanciated several times, using different sets of directives
372 // Note that it is essential this generic function is really inlined,
373 // in order to remove useless branches during compilation optimisation.
374 static
375 inline
376 int LZ4_decompress_generic(
377                  char* source,
378                  char* dest,
379                  int inputSize,          //
380                  int outputSize,
381                  // OutputSize must be != 0; if endOnInput==endOnInputSize,
382                  // this value is the max size of Output Buffer.
383 
384                  int endOnInput,         // endOnOutputSize, endOnInputSize
385                  int prefix64k,          // noPrefix, withPrefix
386                  int partialDecoding,    // full, partial
387                  int targetOutputSize    // only used if partialDecoding==partial
388                  )
389 {
390     // Local Variables
391     BYTE* restrict ip = (BYTE*) source;
392     BYTE* ref;
393     BYTE* iend = ip + inputSize;
394 
395     BYTE* op = (BYTE*) dest;
396     BYTE* oend = op + outputSize;
397     BYTE* cpy;
398     BYTE* oexit = op + targetOutputSize;
399 
400     size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
401 #if LZ4_ARCH64
402     size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
403 #endif
404 
405 
406     // Special case
407     if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
408     // targetOutputSize too large, better decode everything
409     if unlikely(outputSize==0) goto _output_error;
410     // Empty output buffer
411 
412 
413     // Main Loop
414     while (1)
415     {
416         unsigned token;
417         size_t length;
418 
419         // get runlength
420         token = *ip++;
421         if ((length=(token>>ML_BITS)) == RUN_MASK)
422         {
423             unsigned s=255;
424             while (((endOnInput)?ip<iend:1) && (s==255))
425             {
426                 s = *ip++;
427                 length += s;
428             }
429         }
430 
431         // copy literals
432         cpy = op+length;
433         if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
434 			|| (ip+length>iend-(2+1+LASTLITERALS))) )
435             || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
436         {
437             if (partialDecoding)
438             {
439                 if (cpy > oend) goto _output_error;
440                 // Error : write attempt beyond end of output buffer
441                 if ((endOnInput) && (ip+length > iend)) goto _output_error;
442                 // Error : read attempt beyond end of input buffer
443             }
444             else
445             {
446                 if ((!endOnInput) && (cpy != oend)) goto _output_error;
447                 // Error : block decoding must stop exactly there,
448                 // due to parsing restrictions
449                 if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
450 					goto _output_error;
451 					// Error : not enough place for another match (min 4) + 5 literals
452             }
453             memcpy(op, ip, length);
454             ip += length;
455             op += length;
456             break;
457             // Necessarily EOF, due to parsing restrictions
458         }
459         LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
460 
461         // get offset
462         LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
463         if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
464 			goto _output_error;   // Error : offset outside destination buffer
465 
466         // get matchlength
467         if ((length=(token&ML_MASK)) == ML_MASK)
468         {
469             while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
470             // A minimum nb of input bytes must remain for LASTLITERALS + token
471             {
472                 unsigned s = *ip++;
473                 length += s;
474                 if (s==255) continue;
475                 break;
476             }
477         }
478 
479         // copy repeated sequence
480         if unlikely((op-ref)<STEPSIZE)
481         {
482 #if LZ4_ARCH64
483             size_t dec64 = dec64table[op-ref];
484 #else
485             const size_t dec64 = 0;
486 #endif
487             op[0] = ref[0];
488             op[1] = ref[1];
489             op[2] = ref[2];
490             op[3] = ref[3];
491             op += 4, ref += 4; ref -= dec32table[op-ref];
492             A32(op) = A32(ref);
493             op += STEPSIZE-4; ref -= dec64;
494         } else { LZ4_COPYSTEP(ref,op); }
495         cpy = op + length - (STEPSIZE-4);
496 
497         if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
498         {
499             if (cpy > oend-LASTLITERALS) goto _output_error;
500             // Error : last 5 bytes must be literals
501             LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
502             while(op<cpy) *op++=*ref++;
503             op=cpy;
504             continue;
505         }
506         LZ4_WILDCOPY(ref, op, cpy);
507         op=cpy;   // correction
508     }
509 
510     // end of decoding
511     if (endOnInput)
512        return (int) (((char*)op)-dest);     // Nb of output bytes decoded
513     else
514        return (int) (((char*)ip)-source);   // Nb of input bytes read
515 
516     // Overflow error detected
517 _output_error:
518     return (int) (-(((char*)ip)-source))-1;
519 }
520 
521 
522 int
523 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
524 {
525     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
526 							endOnInputSize, noPrefix, full, 0);
527 }
528