xref: /dragonfly/sys/vfs/hammer2/hammer2_lz4.c (revision eaf07fa9)
1 /*
2    LZ4 - Fast LZ compression algorithm
3    Copyright (C) 2011-2013, Yann Collet.
4    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions are
8    met:
9 
10        * Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12        * Redistributions in binary form must reproduce the above
13    copyright notice, this list of conditions and the following disclaimer
14    in the documentation and/or other materials provided with the
15    distribution.
16 
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29    You can contact the author at :
30    - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31    - LZ4 source repository : http://code.google.com/p/lz4/
32 */
33 
34 /*
35 Note : this source file requires "hammer2_lz4_encoder.h"
36 */
37 
38 //**************************************
39 // Tuning parameters
40 //**************************************
41 // MEMORY_USAGE :
42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
43 // 16 -> 64KB; 20 -> 1MB; etc.)
44 // Increasing memory usage improves compression ratio
45 // Reduced memory usage can improve speed, due to cache effect
46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
47 #define MEMORY_USAGE 14
48 
49 // HEAPMODE :
50 // Select how default compression function will allocate memory for its
51 // hash table,
52 // in memory stack (0:default, fastest), or in memory heap (1:requires
53 // memory allocation (malloc)).
54 // Default allocation strategy is to use stack (HEAPMODE 0)
55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
56 #define HEAPMODE 1
57 
58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
59 // This will provide a small boost to performance for big endian cpu,
60 // but the resulting compressed stream will be incompatible with little-endian CPU.
61 // You can set this option to 1 in situations where data will remain within
62 // closed environment
63 // This option is useless on Little_Endian CPU (such as x86)
64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
65 
66 
67 //**************************************
68 // CPU Feature Detection
69 //**************************************
70 // 32 or 64 bits ?
71 #if (defined(__x86_64__) || defined(_M_X64))   // Detects 64 bits mode
72 #  define LZ4_ARCH64 1
73 #else
74 #  define LZ4_ARCH64 0
75 #endif
76 
77 //This reduced library code is only Little Endian compatible,
78 //if the need arises, please look for the appropriate defines in the
79 //original complete LZ4 library.
80 //Same is true for unaligned memory access which is enabled by default,
81 //hardware bit count, also enabled by default, and Microsoft/Visual
82 //Studio compilers.
83 
84 //**************************************
85 // Compiler Options
86 //**************************************
87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
88 /* "restrict" is a known keyword */
89 #else
90 #  define restrict // Disable restrict
91 #endif
92 
93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
94 
95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
96 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
97 #else
98 #  define expect(expr,value)    (expr)
99 #endif
100 
101 #define likely(expr)     expect((expr) != 0, 1)
102 #define unlikely(expr)   expect((expr) != 0, 0)
103 
104 
105 //**************************************
106 // Includes
107 //**************************************
108 #include "hammer2.h"
109 #include "hammer2_lz4.h"
110 #include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h
111 
112 
113 //Declaration for kmalloc functions
114 static MALLOC_DEFINE(C_HASHTABLE, "comphashtable",
115 	"A hash table used by LZ4 compression function.");
116 
117 
118 //**************************************
119 // Basic Types
120 //**************************************
121 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
122 # include <sys/stdint.h>
123   typedef uint8_t  BYTE;
124   typedef uint16_t U16;
125   typedef uint32_t U32;
126   typedef  int32_t S32;
127   typedef uint64_t U64;
128 #else
129   typedef unsigned char       BYTE;
130   typedef unsigned short      U16;
131   typedef unsigned int        U32;
132   typedef   signed int        S32;
133   typedef unsigned long long  U64;
134 #endif
135 
136 #if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
137 #  define _PACKED __attribute__ ((packed))
138 #else
139 #  define _PACKED
140 #endif
141 
142 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
143 #  pragma pack(push, 1)
144 #endif
145 
146 typedef struct _U16_S { U16 v; } _PACKED U16_S;
147 typedef struct _U32_S { U32 v; } _PACKED U32_S;
148 typedef struct _U64_S { U64 v; } _PACKED U64_S;
149 
150 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
151 #  pragma pack(pop)
152 #endif
153 
154 #define A64(x) (((U64_S *)(x))->v)
155 #define A32(x) (((U32_S *)(x))->v)
156 #define A16(x) (((U16_S *)(x))->v)
157 
158 
159 //**************************************
160 // Constants
161 //**************************************
162 #define HASHTABLESIZE (1 << MEMORY_USAGE)
163 
164 #define MINMATCH 4
165 
166 #define COPYLENGTH 8
167 #define LASTLITERALS 5
168 #define MFLIMIT (COPYLENGTH+MINMATCH)
169 #define MINLENGTH (MFLIMIT+1)
170 
171 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
172 #define SKIPSTRENGTH 6
173 // Increasing this value will make the compression run slower on
174 // incompressible data
175 
176 #define MAXD_LOG 16
177 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
178 
179 #define ML_BITS  4
180 #define ML_MASK  ((1U<<ML_BITS)-1)
181 #define RUN_BITS (8-ML_BITS)
182 #define RUN_MASK ((1U<<RUN_BITS)-1)
183 
184 
185 //**************************************
186 // Architecture-specific macros
187 //**************************************
188 #if LZ4_ARCH64   // 64-bit
189 #  define STEPSIZE 8
190 #  define UARCH U64
191 #  define AARCH A64
192 #  define LZ4_COPYSTEP(s,d)       A64(d) = A64(s); d+=8; s+=8;
193 #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d)
194 #  define LZ4_SECURECOPY(s,d,e)   if (d<e) LZ4_WILDCOPY(s,d,e)
195 #  define HTYPE                   U32
196 #  define INITBASE(base)          BYTE* base = ip
197 #else      // 32-bit
198 #  define STEPSIZE 4
199 #  define UARCH U32
200 #  define AARCH A32
201 #  define LZ4_COPYSTEP(s,d)       A32(d) = A32(s); d+=4; s+=4;
202 #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
203 #  define LZ4_SECURECOPY          LZ4_WILDCOPY
204 #  define HTYPE                   BYTE*
205 #  define INITBASE(base)          int base = 0
206 #endif
207 
208 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
209 #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
210 											v = lz4_bswap16(v);
211 											d = (s) - v; }
212 #  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i);
213 											v = lz4_bswap16(v);
214 											A16(p) = v;
215 											p+=2; }
216 #else      // Little Endian
217 #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
218 #  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
219 #endif
220 
221 
222 //**************************************
223 // Macros
224 //**************************************
225 #define LZ4_WILDCOPY(s,d,e)     do { LZ4_COPYPACKET(s,d) } while (d<e);
226 #define LZ4_BLINDCOPY(s,d,l)    { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
227 
228 
229 //****************************
230 // Private functions
231 //****************************
232 #if LZ4_ARCH64
233 
234 static
235 inline
236 int
237 LZ4_NbCommonBytes (register U64 val)
238 {
239 #if defined(LZ4_BIG_ENDIAN)
240     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
241     unsigned long r = 0;
242     _BitScanReverse64( &r, val );
243     return (int)(r>>3);
244     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
245     return (__builtin_clzll(val) >> 3);
246     #else
247     int r;
248     if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
249     if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
250     r += (!val);
251     return r;
252     #endif
253 #else
254     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
255     unsigned long r = 0;
256     _BitScanForward64( &r, val );
257     return (int)(r>>3);
258     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
259     return (__builtin_ctzll(val) >> 3);
260     #else
261     static int DeBruijnBytePos[64] = {
262 		0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
263 		1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
264 		1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
265 		6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
266 		2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
267 		2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
268 		7, 6, 7, 7 };
269     return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
270     #endif
271 #endif
272 }
273 
274 #else
275 
276 static
277 inline
278 int
279 LZ4_NbCommonBytes (register U32 val)
280 {
281 #if defined(LZ4_BIG_ENDIAN)
282 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
283     unsigned long r = 0;
284     _BitScanReverse( &r, val );
285     return (int)(r>>3);
286 #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
287     return (__builtin_clz(val) >> 3);
288 #  else
289     int r;
290     if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
291     r += (!val);
292     return r;
293 #  endif
294 #else
295 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
296     unsigned long r;
297     _BitScanForward( &r, val );
298     return (int)(r>>3);
299 #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
300     return (__builtin_ctz(val) >> 3);
301 #  else
302     static int DeBruijnBytePos[32] = {
303 		0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
304 		2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
305 		2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
306 		1, 1 };
307     return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
308 #  endif
309 #endif
310 }
311 
312 #endif
313 
314 
315 
316 //******************************
317 // Compression functions
318 //******************************
319 
320 #include "hammer2_lz4_encoder.h"
321 
322 /*
323 void* LZ4_createHeapMemory();
324 int LZ4_freeHeapMemory(void* ctx);
325 
326 Used to allocate and free hashTable memory
327 to be used by the LZ4_compress_heap* family of functions.
328 LZ4_createHeapMemory() returns NULL is memory allocation fails.
329 */
330 void*
331 LZ4_create(void)
332 {
333 	return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT);
334 }
335 
336 int
337 LZ4_free(void* ctx)
338 {
339 	kfree(ctx, C_HASHTABLE);
340 	return 0;
341 }
342 
343 int
344 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
345 {
346     void* ctx = LZ4_create();
347     int result;
348     if (ctx == NULL) return 0;    // Failed allocation => compression not done
349     if (inputSize < LZ4_64KLIMIT)
350         result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
351 			inputSize, maxOutputSize);
352     else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
353 			inputSize, maxOutputSize);
354     LZ4_free(ctx);
355     return result;
356 }
357 
358 
359 //****************************
360 // Decompression functions
361 //****************************
362 
363 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
364 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
365 typedef enum { full = 0, partial = 1 } exit_directive;
366 
367 
368 // This generic decompression function cover all use cases.
369 // It shall be instanciated several times, using different sets of directives
370 // Note that it is essential this generic function is really inlined,
371 // in order to remove useless branches during compilation optimisation.
372 static
373 inline
374 int LZ4_decompress_generic(
375                  char* source,
376                  char* dest,
377                  int inputSize,          //
378                  int outputSize,
379                  // OutputSize must be != 0; if endOnInput==endOnInputSize,
380                  // this value is the max size of Output Buffer.
381 
382                  int endOnInput,         // endOnOutputSize, endOnInputSize
383                  int prefix64k,          // noPrefix, withPrefix
384                  int partialDecoding,    // full, partial
385                  int targetOutputSize    // only used if partialDecoding==partial
386                  )
387 {
388     // Local Variables
389     BYTE* restrict ip = (BYTE*) source;
390     BYTE* ref;
391     BYTE* iend = ip + inputSize;
392 
393     BYTE* op = (BYTE*) dest;
394     BYTE* oend = op + outputSize;
395     BYTE* cpy;
396     BYTE* oexit = op + targetOutputSize;
397 
398     size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
399 #if LZ4_ARCH64
400     size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
401 #endif
402 
403 
404     // Special case
405     if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
406     // targetOutputSize too large, better decode everything
407     if unlikely(outputSize==0) goto _output_error;
408     // Empty output buffer
409 
410 
411     // Main Loop
412     while (1)
413     {
414         unsigned token;
415         size_t length;
416 
417         // get runlength
418         token = *ip++;
419         if ((length=(token>>ML_BITS)) == RUN_MASK)
420         {
421             unsigned s=255;
422             while (((endOnInput)?ip<iend:1) && (s==255))
423             {
424                 s = *ip++;
425                 length += s;
426             }
427         }
428 
429         // copy literals
430         cpy = op+length;
431         if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
432 			|| (ip+length>iend-(2+1+LASTLITERALS))) )
433             || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
434         {
435             if (partialDecoding)
436             {
437                 if (cpy > oend) goto _output_error;
438                 // Error : write attempt beyond end of output buffer
439                 if ((endOnInput) && (ip+length > iend)) goto _output_error;
440                 // Error : read attempt beyond end of input buffer
441             }
442             else
443             {
444                 if ((!endOnInput) && (cpy != oend)) goto _output_error;
445                 // Error : block decoding must stop exactly there,
446                 // due to parsing restrictions
447                 if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
448 					goto _output_error;
449 					// Error : not enough place for another match (min 4) + 5 literals
450             }
451             memcpy(op, ip, length);
452             ip += length;
453             op += length;
454             break;
455             // Necessarily EOF, due to parsing restrictions
456         }
457         LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
458 
459         // get offset
460         LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
461         if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
462 			goto _output_error;   // Error : offset outside destination buffer
463 
464         // get matchlength
465         if ((length=(token&ML_MASK)) == ML_MASK)
466         {
467             while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
468             // A minimum nb of input bytes must remain for LASTLITERALS + token
469             {
470                 unsigned s = *ip++;
471                 length += s;
472                 if (s==255) continue;
473                 break;
474             }
475         }
476 
477         // copy repeated sequence
478         if unlikely((op-ref)<STEPSIZE)
479         {
480 #if LZ4_ARCH64
481             size_t dec64 = dec64table[op-ref];
482 #else
483             const size_t dec64 = 0;
484 #endif
485             op[0] = ref[0];
486             op[1] = ref[1];
487             op[2] = ref[2];
488             op[3] = ref[3];
489             op += 4, ref += 4; ref -= dec32table[op-ref];
490             A32(op) = A32(ref);
491             op += STEPSIZE-4; ref -= dec64;
492         } else { LZ4_COPYSTEP(ref,op); }
493         cpy = op + length - (STEPSIZE-4);
494 
495         if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
496         {
497             if (cpy > oend-LASTLITERALS) goto _output_error;
498             // Error : last 5 bytes must be literals
499             LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
500             while(op<cpy) *op++=*ref++;
501             op=cpy;
502             continue;
503         }
504         LZ4_WILDCOPY(ref, op, cpy);
505         op=cpy;   // correction
506     }
507 
508     // end of decoding
509     if (endOnInput)
510        return (int) (((char*)op)-dest);     // Nb of output bytes decoded
511     else
512        return (int) (((char*)ip)-source);   // Nb of input bytes read
513 
514     // Overflow error detected
515 _output_error:
516     return (int) (-(((char*)ip)-source))-1;
517 }
518 
519 
520 int
521 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
522 {
523     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
524 							endOnInputSize, noPrefix, full, 0);
525 }
526