xref: /dragonfly/sbin/hammer2/lz4/hammer2_lz4.c (revision 556932ec)
1 /*
2    LZ4 - Fast LZ compression algorithm
3    Copyright (C) 2011-2013, Yann Collet.
4    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are
8    met:
9 
10        * Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12        * Redistributions in binary form must reproduce the above
13    copyright notice, this list of conditions and the following disclaimer
14    in the documentation and/or other materials provided with the
15    distribution.
16 
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29    You can contact the author at :
30    - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31    - LZ4 source repository : http://code.google.com/p/lz4/
32 */
33 
34 /*
35 Note : this source file requires "hammer2_lz4_encoder.h"
36 */
37 
38 //**************************************
39 // Tuning parameters
40 //**************************************
41 // MEMORY_USAGE :
42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
43 // 16 -> 64KB; 20 -> 1MB; etc.)
44 // Increasing memory usage improves compression ratio
45 // Reduced memory usage can improve speed, due to cache effect
46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
47 #define MEMORY_USAGE 14
48 
49 // HEAPMODE :
50 // Select how default compression function will allocate memory for its
51 // hash table,
52 // in memory stack (0:default, fastest), or in memory heap (1:requires
53 // memory allocation (malloc)).
54 // Default allocation strategy is to use stack (HEAPMODE 0)
55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
56 #define HEAPMODE 1
57 
58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
59 // This will provide a small boost to performance for big endian cpu,
60 // but the resulting compressed stream will be incompatible with little-endian CPU.
61 // You can set this option to 1 in situations where data will remain within
62 // closed environment
63 // This option is useless on Little_Endian CPU (such as x86)
64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
65 
66 
67 //**************************************
68 // CPU Feature Detection
69 //**************************************
70 // 32 or 64 bits ?
71 #if (defined(__x86_64__) || defined(_M_X64))   // Detects 64 bits mode
72 #  define LZ4_ARCH64 1
73 #else
74 #  define LZ4_ARCH64 0
75 #endif
76 
77 //This reduced library code is only Little Endian compatible,
78 //if the need arises, please look for the appropriate defines in the
79 //original complete LZ4 library.
80 //Same is true for unaligned memory access which is enabled by default,
81 //hardware bit count, also enabled by default, and Microsoft/Visual
82 //Studio compilers.
83 
84 //**************************************
85 // Compiler Options
86 //**************************************
87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
88 /* "restrict" is a known keyword */
89 #else
90 #  define restrict // Disable restrict
91 #endif
92 
93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
94 
95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
96 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
97 #else
98 #  define expect(expr,value)    (expr)
99 #endif
100 
101 #define likely(expr)     expect((expr) != 0, 1)
102 #define unlikely(expr)   expect((expr) != 0, 0)
103 
104 
105 //**************************************
106 // Includes
107 //**************************************
108 #include <stdio.h>
109 #include <stdlib.h>
110 #include <unistd.h>
111 #include <string.h>
112 
113 #include "hammer2_lz4.h"
114 
115 //**************************************
116 // Basic Types
117 //**************************************
118 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
119 # include <sys/stdint.h>
120   typedef uint8_t  BYTE;
121   typedef uint16_t U16;
122   typedef uint32_t U32;
123   typedef  int32_t S32;
124   typedef uint64_t U64;
125 #else
126   typedef unsigned char       BYTE;
127   typedef unsigned short      U16;
128   typedef unsigned int        U32;
129   typedef   signed int        S32;
130   typedef unsigned long long  U64;
131 #endif
132 
133 #if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
134 #  define _PACKED __attribute__ ((packed))
135 #else
136 #  define _PACKED
137 #endif
138 
139 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
140 #  pragma pack(push, 1)
141 #endif
142 
143 typedef struct _U16_S { U16 v; } _PACKED U16_S;
144 typedef struct _U32_S { U32 v; } _PACKED U32_S;
145 typedef struct _U64_S { U64 v; } _PACKED U64_S;
146 
147 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
148 #  pragma pack(pop)
149 #endif
150 
151 #define A64(x) (((U64_S *)(x))->v)
152 #define A32(x) (((U32_S *)(x))->v)
153 #define A16(x) (((U16_S *)(x))->v)
154 
155 
156 //**************************************
157 // Constants
158 //**************************************
159 #define HASHTABLESIZE (1 << MEMORY_USAGE)
160 
161 #define MINMATCH 4
162 
163 #define COPYLENGTH 8
164 #define LASTLITERALS 5
165 #define MFLIMIT (COPYLENGTH+MINMATCH)
166 #define MINLENGTH (MFLIMIT+1)
167 
168 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
169 #define SKIPSTRENGTH 6
170 // Increasing this value will make the compression run slower on
171 // incompressible data
172 
173 #define MAXD_LOG 16
174 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
175 
176 #define ML_BITS  4
177 #define ML_MASK  ((1U<<ML_BITS)-1)
178 #define RUN_BITS (8-ML_BITS)
179 #define RUN_MASK ((1U<<RUN_BITS)-1)
180 
181 
182 //**************************************
183 // Architecture-specific macros
184 //**************************************
185 #if LZ4_ARCH64   // 64-bit
186 #  define STEPSIZE 8
187 #  define UARCH U64
188 #  define AARCH A64
189 #  define LZ4_COPYSTEP(s,d)       A64(d) = A64(s); d+=8; s+=8;
190 #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d)
191 #  define LZ4_SECURECOPY(s,d,e)   if (d<e) LZ4_WILDCOPY(s,d,e)
192 #  define HTYPE                   U32
193 #  define INITBASE(base)          BYTE* base = ip
194 #else      // 32-bit
195 #  define STEPSIZE 4
196 #  define UARCH U32
197 #  define AARCH A32
198 #  define LZ4_COPYSTEP(s,d)       A32(d) = A32(s); d+=4; s+=4;
199 #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
200 #  define LZ4_SECURECOPY          LZ4_WILDCOPY
201 #  define HTYPE                   BYTE*
202 #  define INITBASE(base)          int base = 0
203 #endif
204 
205 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
206 #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
207 											v = lz4_bswap16(v);
208 											d = (s) - v; }
209 #  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i);
210 											v = lz4_bswap16(v);
211 											A16(p) = v;
212 											p+=2; }
213 #else      // Little Endian
214 #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
215 #  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
216 #endif
217 
218 
219 //**************************************
220 // Macros
221 //**************************************
222 #define LZ4_WILDCOPY(s,d,e)     do { LZ4_COPYPACKET(s,d) } while (d<e);
223 #define LZ4_BLINDCOPY(s,d,l)    { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
224 
225 
226 //****************************
227 // Private functions
228 //****************************
229 #if LZ4_ARCH64
230 
231 static
232 inline
233 int
234 LZ4_NbCommonBytes (register U64 val)
235 {
236 #if defined(LZ4_BIG_ENDIAN)
237     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
238     unsigned long r = 0;
239     _BitScanReverse64( &r, val );
240     return (int)(r>>3);
241     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
242     return (__builtin_clzll(val) >> 3);
243     #else
244     int r;
245     if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
246     if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
247     r += (!val);
248     return r;
249     #endif
250 #else
251     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
252     unsigned long r = 0;
253     _BitScanForward64( &r, val );
254     return (int)(r>>3);
255     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
256     return (__builtin_ctzll(val) >> 3);
257     #else
258     static int DeBruijnBytePos[64] = {
259 		0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
260 		1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
261 		1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
262 		6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
263 		2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
264 		2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
265 		7, 6, 7, 7 };
266     return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
267     #endif
268 #endif
269 }
270 
271 #else
272 
273 static
274 inline
275 int
276 LZ4_NbCommonBytes (register U32 val)
277 {
278 #if defined(LZ4_BIG_ENDIAN)
279 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
280     unsigned long r = 0;
281     _BitScanReverse( &r, val );
282     return (int)(r>>3);
283 #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
284     return (__builtin_clz(val) >> 3);
285 #  else
286     int r;
287     if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
288     r += (!val);
289     return r;
290 #  endif
291 #else
292 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
293     unsigned long r;
294     _BitScanForward( &r, val );
295     return (int)(r>>3);
296 #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
297     return (__builtin_ctz(val) >> 3);
298 #  else
299     static int DeBruijnBytePos[32] = {
300 		0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
301 		2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
302 		2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
303 		1, 1 };
304     return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
305 #  endif
306 #endif
307 }
308 
309 #endif
310 
311 
312 
313 //******************************
314 // Compression functions
315 //******************************
316 
317 #include "hammer2_lz4_encoder.h"
318 
319 /*
320 void* LZ4_createHeapMemory();
321 int LZ4_freeHeapMemory(void* ctx);
322 
323 Used to allocate and free hashTable memory
324 to be used by the LZ4_compress_heap* family of functions.
325 LZ4_createHeapMemory() returns NULL is memory allocation fails.
326 */
327 void*
328 LZ4_create(void)
329 {
330 	return malloc(HASHTABLESIZE);
331 }
332 
333 int
334 LZ4_free(void* ctx)
335 {
336 	free(ctx);
337 	return 0;
338 }
339 
340 int
341 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
342 {
343     void* ctx = LZ4_create();
344     int result;
345     if (ctx == NULL) return 0;    // Failed allocation => compression not done
346     if (inputSize < LZ4_64KLIMIT)
347         result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
348 			inputSize, maxOutputSize);
349     else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
350 			inputSize, maxOutputSize);
351     LZ4_free(ctx);
352     return result;
353 }
354 
355 
356 //****************************
357 // Decompression functions
358 //****************************
359 
360 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
361 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
362 typedef enum { full = 0, partial = 1 } exit_directive;
363 
364 
365 // This generic decompression function cover all use cases.
366 // It shall be instanciated several times, using different sets of directives
367 // Note that it is essential this generic function is really inlined,
368 // in order to remove useless branches during compilation optimisation.
369 static
370 inline
371 int LZ4_decompress_generic(
372                  char* source,
373                  char* dest,
374                  int inputSize,          //
375                  int outputSize,
376                  // OutputSize must be != 0; if endOnInput==endOnInputSize,
377                  // this value is the max size of Output Buffer.
378 
379                  int endOnInput,         // endOnOutputSize, endOnInputSize
380                  int prefix64k,          // noPrefix, withPrefix
381                  int partialDecoding,    // full, partial
382                  int targetOutputSize    // only used if partialDecoding==partial
383                  )
384 {
385     // Local Variables
386     BYTE* restrict ip = (BYTE*) source;
387     BYTE* ref;
388     BYTE* iend = ip + inputSize;
389 
390     BYTE* op = (BYTE*) dest;
391     BYTE* oend = op + outputSize;
392     BYTE* cpy;
393     BYTE* oexit = op + targetOutputSize;
394 
395     size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
396 #if LZ4_ARCH64
397     size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
398 #endif
399 
400 
401     // Special case
402     if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
403     // targetOutputSize too large, better decode everything
404     if unlikely(outputSize==0) goto _output_error;
405     // Empty output buffer
406 
407 
408     // Main Loop
409     while (1)
410     {
411         unsigned token;
412         size_t length;
413 
414         // get runlength
415         token = *ip++;
416         if ((length=(token>>ML_BITS)) == RUN_MASK)
417         {
418             unsigned s=255;
419             while (((endOnInput)?ip<iend:1) && (s==255))
420             {
421                 s = *ip++;
422                 length += s;
423             }
424         }
425 
426         // copy literals
427         cpy = op+length;
428         if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
429 			|| (ip+length>iend-(2+1+LASTLITERALS))) )
430             || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
431         {
432             if (partialDecoding)
433             {
434                 if (cpy > oend) goto _output_error;
435                 // Error : write attempt beyond end of output buffer
436                 if ((endOnInput) && (ip+length > iend)) goto _output_error;
437                 // Error : read attempt beyond end of input buffer
438             }
439             else
440             {
441                 if ((!endOnInput) && (cpy != oend)) goto _output_error;
442                 // Error : block decoding must stop exactly there,
443                 // due to parsing restrictions
444                 if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
445 					goto _output_error;
446 					// Error : not enough place for another match (min 4) + 5 literals
447             }
448             memcpy(op, ip, length);
449             ip += length;
450             op += length;
451             break;
452             // Necessarily EOF, due to parsing restrictions
453         }
454         LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
455 
456         // get offset
457         LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
458         if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
459 			goto _output_error;   // Error : offset outside destination buffer
460 
461         // get matchlength
462         if ((length=(token&ML_MASK)) == ML_MASK)
463         {
464             while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
465             // A minimum nb of input bytes must remain for LASTLITERALS + token
466             {
467                 unsigned s = *ip++;
468                 length += s;
469                 if (s==255) continue;
470                 break;
471             }
472         }
473 
474         // copy repeated sequence
475         if unlikely((op-ref)<STEPSIZE)
476         {
477 #if LZ4_ARCH64
478             size_t dec64 = dec64table[op-ref];
479 #else
480             const size_t dec64 = 0;
481 #endif
482             op[0] = ref[0];
483             op[1] = ref[1];
484             op[2] = ref[2];
485             op[3] = ref[3];
486             op += 4, ref += 4; ref -= dec32table[op-ref];
487             A32(op) = A32(ref);
488             op += STEPSIZE-4; ref -= dec64;
489         } else { LZ4_COPYSTEP(ref,op); }
490         cpy = op + length - (STEPSIZE-4);
491 
492         if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
493         {
494             if (cpy > oend-LASTLITERALS) goto _output_error;
495             // Error : last 5 bytes must be literals
496             LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
497             while(op<cpy) *op++=*ref++;
498             op=cpy;
499             continue;
500         }
501         LZ4_WILDCOPY(ref, op, cpy);
502         op=cpy;   // correction
503     }
504 
505     // end of decoding
506     if (endOnInput)
507        return (int) (((char*)op)-dest);     // Nb of output bytes decoded
508     else
509        return (int) (((char*)ip)-source);   // Nb of input bytes read
510 
511     // Overflow error detected
512 _output_error:
513     return (int) (-(((char*)ip)-source))-1;
514 }
515 
516 
517 int
518 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
519 {
520     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
521 							endOnInputSize, noPrefix, full, 0);
522 }
523