1*2d60b848STomohiro Kusumi /*
2*2d60b848STomohiro Kusumi    LZ4 - Fast LZ compression algorithm
3*2d60b848STomohiro Kusumi    Copyright (C) 2011-2013, Yann Collet.
4*2d60b848STomohiro Kusumi    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5*2d60b848STomohiro Kusumi 
6*2d60b848STomohiro Kusumi    Redistribution and use in source and binary forms, with or without
7*2d60b848STomohiro Kusumi    modification, are permitted provided that the following conditions are
8*2d60b848STomohiro Kusumi    met:
9*2d60b848STomohiro Kusumi 
10*2d60b848STomohiro Kusumi        * Redistributions of source code must retain the above copyright
11*2d60b848STomohiro Kusumi    notice, this list of conditions and the following disclaimer.
12*2d60b848STomohiro Kusumi        * Redistributions in binary form must reproduce the above
13*2d60b848STomohiro Kusumi    copyright notice, this list of conditions and the following disclaimer
14*2d60b848STomohiro Kusumi    in the documentation and/or other materials provided with the
15*2d60b848STomohiro Kusumi    distribution.
16*2d60b848STomohiro Kusumi 
17*2d60b848STomohiro Kusumi    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*2d60b848STomohiro Kusumi    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*2d60b848STomohiro Kusumi    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*2d60b848STomohiro Kusumi    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*2d60b848STomohiro Kusumi    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*2d60b848STomohiro Kusumi    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*2d60b848STomohiro Kusumi    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*2d60b848STomohiro Kusumi    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*2d60b848STomohiro Kusumi    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*2d60b848STomohiro Kusumi    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*2d60b848STomohiro Kusumi    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*2d60b848STomohiro Kusumi 
29*2d60b848STomohiro Kusumi    You can contact the author at :
30*2d60b848STomohiro Kusumi    - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31*2d60b848STomohiro Kusumi    - LZ4 source repository : http://code.google.com/p/lz4/
32*2d60b848STomohiro Kusumi */
33*2d60b848STomohiro Kusumi 
34*2d60b848STomohiro Kusumi /*
35*2d60b848STomohiro Kusumi Note : this source file requires "hammer2_lz4_encoder.h"
36*2d60b848STomohiro Kusumi */
37*2d60b848STomohiro Kusumi 
38*2d60b848STomohiro Kusumi //**************************************
39*2d60b848STomohiro Kusumi // Tuning parameters
40*2d60b848STomohiro Kusumi //**************************************
41*2d60b848STomohiro Kusumi // MEMORY_USAGE :
42*2d60b848STomohiro Kusumi // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
43*2d60b848STomohiro Kusumi // 16 -> 64KB; 20 -> 1MB; etc.)
44*2d60b848STomohiro Kusumi // Increasing memory usage improves compression ratio
45*2d60b848STomohiro Kusumi // Reduced memory usage can improve speed, due to cache effect
46*2d60b848STomohiro Kusumi // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
47*2d60b848STomohiro Kusumi #define MEMORY_USAGE 14
48*2d60b848STomohiro Kusumi 
49*2d60b848STomohiro Kusumi // HEAPMODE :
50*2d60b848STomohiro Kusumi // Select how default compression function will allocate memory for its
51*2d60b848STomohiro Kusumi // hash table,
52*2d60b848STomohiro Kusumi // in memory stack (0:default, fastest), or in memory heap (1:requires
53*2d60b848STomohiro Kusumi // memory allocation (malloc)).
54*2d60b848STomohiro Kusumi // Default allocation strategy is to use stack (HEAPMODE 0)
55*2d60b848STomohiro Kusumi // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
56*2d60b848STomohiro Kusumi #define HEAPMODE 1
57*2d60b848STomohiro Kusumi 
58*2d60b848STomohiro Kusumi // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
59*2d60b848STomohiro Kusumi // This will provide a small boost to performance for big endian cpu,
60*2d60b848STomohiro Kusumi // but the resulting compressed stream will be incompatible with little-endian CPU.
61*2d60b848STomohiro Kusumi // You can set this option to 1 in situations where data will remain within
62*2d60b848STomohiro Kusumi // closed environment
63*2d60b848STomohiro Kusumi // This option is useless on Little_Endian CPU (such as x86)
64*2d60b848STomohiro Kusumi //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
65*2d60b848STomohiro Kusumi 
66*2d60b848STomohiro Kusumi 
67*2d60b848STomohiro Kusumi //**************************************
68*2d60b848STomohiro Kusumi // CPU Feature Detection
69*2d60b848STomohiro Kusumi //**************************************
70*2d60b848STomohiro Kusumi // 32 or 64 bits ?
71*2d60b848STomohiro Kusumi #if (defined(__x86_64__) || defined(_M_X64))   // Detects 64 bits mode
72*2d60b848STomohiro Kusumi #  define LZ4_ARCH64 1
73*2d60b848STomohiro Kusumi #else
74*2d60b848STomohiro Kusumi #  define LZ4_ARCH64 0
75*2d60b848STomohiro Kusumi #endif
76*2d60b848STomohiro Kusumi 
77*2d60b848STomohiro Kusumi //This reduced library code is only Little Endian compatible,
78*2d60b848STomohiro Kusumi //if the need arises, please look for the appropriate defines in the
79*2d60b848STomohiro Kusumi //original complete LZ4 library.
80*2d60b848STomohiro Kusumi //Same is true for unaligned memory access which is enabled by default,
81*2d60b848STomohiro Kusumi //hardware bit count, also enabled by default, and Microsoft/Visual
82*2d60b848STomohiro Kusumi //Studio compilers.
83*2d60b848STomohiro Kusumi 
84*2d60b848STomohiro Kusumi //**************************************
85*2d60b848STomohiro Kusumi // Compiler Options
86*2d60b848STomohiro Kusumi //**************************************
87*2d60b848STomohiro Kusumi #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
88*2d60b848STomohiro Kusumi /* "restrict" is a known keyword */
89*2d60b848STomohiro Kusumi #else
90*2d60b848STomohiro Kusumi #  define restrict // Disable restrict
91*2d60b848STomohiro Kusumi #endif
92*2d60b848STomohiro Kusumi 
93*2d60b848STomohiro Kusumi #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
94*2d60b848STomohiro Kusumi 
95*2d60b848STomohiro Kusumi #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
96*2d60b848STomohiro Kusumi #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
97*2d60b848STomohiro Kusumi #else
98*2d60b848STomohiro Kusumi #  define expect(expr,value)    (expr)
99*2d60b848STomohiro Kusumi #endif
100*2d60b848STomohiro Kusumi 
101*2d60b848STomohiro Kusumi #define likely(expr)     expect((expr) != 0, 1)
102*2d60b848STomohiro Kusumi #define unlikely(expr)   expect((expr) != 0, 0)
103*2d60b848STomohiro Kusumi 
104*2d60b848STomohiro Kusumi 
105*2d60b848STomohiro Kusumi //**************************************
106*2d60b848STomohiro Kusumi // Includes
107*2d60b848STomohiro Kusumi //**************************************
108*2d60b848STomohiro Kusumi #include "hammer2.h"
109*2d60b848STomohiro Kusumi #include "hammer2_lz4.h"
110*2d60b848STomohiro Kusumi //#include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h
111*2d60b848STomohiro Kusumi 
112*2d60b848STomohiro Kusumi 
113*2d60b848STomohiro Kusumi //Declaration for kmalloc functions
114*2d60b848STomohiro Kusumi /*
115*2d60b848STomohiro Kusumi static MALLOC_DEFINE(C_HASHTABLE, "comphashtable",
116*2d60b848STomohiro Kusumi 	"A hash table used by LZ4 compression function.");
117*2d60b848STomohiro Kusumi */
118*2d60b848STomohiro Kusumi 
119*2d60b848STomohiro Kusumi 
120*2d60b848STomohiro Kusumi //**************************************
121*2d60b848STomohiro Kusumi // Basic Types
122*2d60b848STomohiro Kusumi //**************************************
123*2d60b848STomohiro Kusumi #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
124*2d60b848STomohiro Kusumi # include <sys/stdint.h>
125*2d60b848STomohiro Kusumi   typedef uint8_t  BYTE;
126*2d60b848STomohiro Kusumi   typedef uint16_t U16;
127*2d60b848STomohiro Kusumi   typedef uint32_t U32;
128*2d60b848STomohiro Kusumi   typedef  int32_t S32;
129*2d60b848STomohiro Kusumi   typedef uint64_t U64;
130*2d60b848STomohiro Kusumi #else
131*2d60b848STomohiro Kusumi   typedef unsigned char       BYTE;
132*2d60b848STomohiro Kusumi   typedef unsigned short      U16;
133*2d60b848STomohiro Kusumi   typedef unsigned int        U32;
134*2d60b848STomohiro Kusumi   typedef   signed int        S32;
135*2d60b848STomohiro Kusumi   typedef unsigned long long  U64;
136*2d60b848STomohiro Kusumi #endif
137*2d60b848STomohiro Kusumi 
138*2d60b848STomohiro Kusumi #if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
139*2d60b848STomohiro Kusumi #  define _PACKED __attribute__ ((packed))
140*2d60b848STomohiro Kusumi #else
141*2d60b848STomohiro Kusumi #  define _PACKED
142*2d60b848STomohiro Kusumi #endif
143*2d60b848STomohiro Kusumi 
144*2d60b848STomohiro Kusumi #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
145*2d60b848STomohiro Kusumi #  pragma pack(push, 1)
146*2d60b848STomohiro Kusumi #endif
147*2d60b848STomohiro Kusumi 
148*2d60b848STomohiro Kusumi typedef struct _U16_S { U16 v; } _PACKED U16_S;
149*2d60b848STomohiro Kusumi typedef struct _U32_S { U32 v; } _PACKED U32_S;
150*2d60b848STomohiro Kusumi typedef struct _U64_S { U64 v; } _PACKED U64_S;
151*2d60b848STomohiro Kusumi 
152*2d60b848STomohiro Kusumi #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
153*2d60b848STomohiro Kusumi #  pragma pack(pop)
154*2d60b848STomohiro Kusumi #endif
155*2d60b848STomohiro Kusumi 
156*2d60b848STomohiro Kusumi #define A64(x) (((U64_S *)(x))->v)
157*2d60b848STomohiro Kusumi #define A32(x) (((U32_S *)(x))->v)
158*2d60b848STomohiro Kusumi #define A16(x) (((U16_S *)(x))->v)
159*2d60b848STomohiro Kusumi 
160*2d60b848STomohiro Kusumi 
161*2d60b848STomohiro Kusumi //**************************************
162*2d60b848STomohiro Kusumi // Constants
163*2d60b848STomohiro Kusumi //**************************************
164*2d60b848STomohiro Kusumi #define HASHTABLESIZE (1 << MEMORY_USAGE)
165*2d60b848STomohiro Kusumi 
166*2d60b848STomohiro Kusumi #define MINMATCH 4
167*2d60b848STomohiro Kusumi 
168*2d60b848STomohiro Kusumi #define COPYLENGTH 8
169*2d60b848STomohiro Kusumi #define LASTLITERALS 5
170*2d60b848STomohiro Kusumi #define MFLIMIT (COPYLENGTH+MINMATCH)
171*2d60b848STomohiro Kusumi #define MINLENGTH (MFLIMIT+1)
172*2d60b848STomohiro Kusumi 
173*2d60b848STomohiro Kusumi #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
174*2d60b848STomohiro Kusumi #define SKIPSTRENGTH 6
175*2d60b848STomohiro Kusumi // Increasing this value will make the compression run slower on
176*2d60b848STomohiro Kusumi // incompressible data
177*2d60b848STomohiro Kusumi 
178*2d60b848STomohiro Kusumi #define MAXD_LOG 16
179*2d60b848STomohiro Kusumi #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
180*2d60b848STomohiro Kusumi 
181*2d60b848STomohiro Kusumi #define ML_BITS  4
182*2d60b848STomohiro Kusumi #define ML_MASK  ((1U<<ML_BITS)-1)
183*2d60b848STomohiro Kusumi #define RUN_BITS (8-ML_BITS)
184*2d60b848STomohiro Kusumi #define RUN_MASK ((1U<<RUN_BITS)-1)
185*2d60b848STomohiro Kusumi 
186*2d60b848STomohiro Kusumi 
187*2d60b848STomohiro Kusumi //**************************************
188*2d60b848STomohiro Kusumi // Architecture-specific macros
189*2d60b848STomohiro Kusumi //**************************************
190*2d60b848STomohiro Kusumi #if LZ4_ARCH64   // 64-bit
191*2d60b848STomohiro Kusumi #  define STEPSIZE 8
192*2d60b848STomohiro Kusumi #  define UARCH U64
193*2d60b848STomohiro Kusumi #  define AARCH A64
194*2d60b848STomohiro Kusumi #  define LZ4_COPYSTEP(s,d)       A64(d) = A64(s); d+=8; s+=8;
195*2d60b848STomohiro Kusumi #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d)
196*2d60b848STomohiro Kusumi #  define LZ4_SECURECOPY(s,d,e)   if (d<e) LZ4_WILDCOPY(s,d,e)
197*2d60b848STomohiro Kusumi #  define HTYPE                   U32
198*2d60b848STomohiro Kusumi #  define INITBASE(base)          BYTE* base = ip
199*2d60b848STomohiro Kusumi #else      // 32-bit
200*2d60b848STomohiro Kusumi #  define STEPSIZE 4
201*2d60b848STomohiro Kusumi #  define UARCH U32
202*2d60b848STomohiro Kusumi #  define AARCH A32
203*2d60b848STomohiro Kusumi #  define LZ4_COPYSTEP(s,d)       A32(d) = A32(s); d+=4; s+=4;
204*2d60b848STomohiro Kusumi #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
205*2d60b848STomohiro Kusumi #  define LZ4_SECURECOPY          LZ4_WILDCOPY
206*2d60b848STomohiro Kusumi #  define HTYPE                   BYTE*
207*2d60b848STomohiro Kusumi #  define INITBASE(base)          int base = 0
208*2d60b848STomohiro Kusumi #endif
209*2d60b848STomohiro Kusumi 
210*2d60b848STomohiro Kusumi #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
211*2d60b848STomohiro Kusumi #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
212*2d60b848STomohiro Kusumi 											v = lz4_bswap16(v);
213*2d60b848STomohiro Kusumi 											d = (s) - v; }
214*2d60b848STomohiro Kusumi #  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i);
215*2d60b848STomohiro Kusumi 											v = lz4_bswap16(v);
216*2d60b848STomohiro Kusumi 											A16(p) = v;
217*2d60b848STomohiro Kusumi 											p+=2; }
218*2d60b848STomohiro Kusumi #else      // Little Endian
219*2d60b848STomohiro Kusumi #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
220*2d60b848STomohiro Kusumi #  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
221*2d60b848STomohiro Kusumi #endif
222*2d60b848STomohiro Kusumi 
223*2d60b848STomohiro Kusumi 
224*2d60b848STomohiro Kusumi //**************************************
225*2d60b848STomohiro Kusumi // Macros
226*2d60b848STomohiro Kusumi //**************************************
227*2d60b848STomohiro Kusumi #define LZ4_WILDCOPY(s,d,e)     do { LZ4_COPYPACKET(s,d) } while (d<e);
228*2d60b848STomohiro Kusumi #define LZ4_BLINDCOPY(s,d,l)    { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
229*2d60b848STomohiro Kusumi 
230*2d60b848STomohiro Kusumi 
231*2d60b848STomohiro Kusumi //****************************
232*2d60b848STomohiro Kusumi // Private functions
233*2d60b848STomohiro Kusumi //****************************
234*2d60b848STomohiro Kusumi #if LZ4_ARCH64
235*2d60b848STomohiro Kusumi 
236*2d60b848STomohiro Kusumi static
237*2d60b848STomohiro Kusumi inline
238*2d60b848STomohiro Kusumi int
239*2d60b848STomohiro Kusumi LZ4_NbCommonBytes (register U64 val)
240*2d60b848STomohiro Kusumi {
241*2d60b848STomohiro Kusumi #if defined(LZ4_BIG_ENDIAN)
242*2d60b848STomohiro Kusumi     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
243*2d60b848STomohiro Kusumi     unsigned long r = 0;
244*2d60b848STomohiro Kusumi     _BitScanReverse64( &r, val );
245*2d60b848STomohiro Kusumi     return (int)(r>>3);
246*2d60b848STomohiro Kusumi     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
247*2d60b848STomohiro Kusumi     return (__builtin_clzll(val) >> 3);
248*2d60b848STomohiro Kusumi     #else
249*2d60b848STomohiro Kusumi     int r;
250*2d60b848STomohiro Kusumi     if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
251*2d60b848STomohiro Kusumi     if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
252*2d60b848STomohiro Kusumi     r += (!val);
253*2d60b848STomohiro Kusumi     return r;
254*2d60b848STomohiro Kusumi     #endif
255*2d60b848STomohiro Kusumi #else
256*2d60b848STomohiro Kusumi     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
257*2d60b848STomohiro Kusumi     unsigned long r = 0;
258*2d60b848STomohiro Kusumi     _BitScanForward64( &r, val );
259*2d60b848STomohiro Kusumi     return (int)(r>>3);
260*2d60b848STomohiro Kusumi     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
261*2d60b848STomohiro Kusumi     return (__builtin_ctzll(val) >> 3);
262*2d60b848STomohiro Kusumi     #else
263*2d60b848STomohiro Kusumi     static int DeBruijnBytePos[64] = {
264*2d60b848STomohiro Kusumi 		0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
265*2d60b848STomohiro Kusumi 		1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
266*2d60b848STomohiro Kusumi 		1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
267*2d60b848STomohiro Kusumi 		6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
268*2d60b848STomohiro Kusumi 		2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
269*2d60b848STomohiro Kusumi 		2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
270*2d60b848STomohiro Kusumi 		7, 6, 7, 7 };
271*2d60b848STomohiro Kusumi     return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
272*2d60b848STomohiro Kusumi     #endif
273*2d60b848STomohiro Kusumi #endif
274*2d60b848STomohiro Kusumi }
275*2d60b848STomohiro Kusumi 
276*2d60b848STomohiro Kusumi #else
277*2d60b848STomohiro Kusumi 
278*2d60b848STomohiro Kusumi static
279*2d60b848STomohiro Kusumi inline
280*2d60b848STomohiro Kusumi int
281*2d60b848STomohiro Kusumi LZ4_NbCommonBytes (register U32 val)
282*2d60b848STomohiro Kusumi {
283*2d60b848STomohiro Kusumi #if defined(LZ4_BIG_ENDIAN)
284*2d60b848STomohiro Kusumi #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
285*2d60b848STomohiro Kusumi     unsigned long r = 0;
286*2d60b848STomohiro Kusumi     _BitScanReverse( &r, val );
287*2d60b848STomohiro Kusumi     return (int)(r>>3);
288*2d60b848STomohiro Kusumi #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
289*2d60b848STomohiro Kusumi     return (__builtin_clz(val) >> 3);
290*2d60b848STomohiro Kusumi #  else
291*2d60b848STomohiro Kusumi     int r;
292*2d60b848STomohiro Kusumi     if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
293*2d60b848STomohiro Kusumi     r += (!val);
294*2d60b848STomohiro Kusumi     return r;
295*2d60b848STomohiro Kusumi #  endif
296*2d60b848STomohiro Kusumi #else
297*2d60b848STomohiro Kusumi #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
298*2d60b848STomohiro Kusumi     unsigned long r;
299*2d60b848STomohiro Kusumi     _BitScanForward( &r, val );
300*2d60b848STomohiro Kusumi     return (int)(r>>3);
301*2d60b848STomohiro Kusumi #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
302*2d60b848STomohiro Kusumi     return (__builtin_ctz(val) >> 3);
303*2d60b848STomohiro Kusumi #  else
304*2d60b848STomohiro Kusumi     static int DeBruijnBytePos[32] = {
305*2d60b848STomohiro Kusumi 		0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
306*2d60b848STomohiro Kusumi 		2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
307*2d60b848STomohiro Kusumi 		2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
308*2d60b848STomohiro Kusumi 		1, 1 };
309*2d60b848STomohiro Kusumi     return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
310*2d60b848STomohiro Kusumi #  endif
311*2d60b848STomohiro Kusumi #endif
312*2d60b848STomohiro Kusumi }
313*2d60b848STomohiro Kusumi 
314*2d60b848STomohiro Kusumi #endif
315*2d60b848STomohiro Kusumi 
316*2d60b848STomohiro Kusumi 
317*2d60b848STomohiro Kusumi 
318*2d60b848STomohiro Kusumi //******************************
319*2d60b848STomohiro Kusumi // Compression functions
320*2d60b848STomohiro Kusumi //******************************
321*2d60b848STomohiro Kusumi 
322*2d60b848STomohiro Kusumi #include "hammer2_lz4_encoder.h"
323*2d60b848STomohiro Kusumi 
324*2d60b848STomohiro Kusumi /*
325*2d60b848STomohiro Kusumi void* LZ4_createHeapMemory();
326*2d60b848STomohiro Kusumi int LZ4_freeHeapMemory(void* ctx);
327*2d60b848STomohiro Kusumi 
328*2d60b848STomohiro Kusumi Used to allocate and free hashTable memory
329*2d60b848STomohiro Kusumi to be used by the LZ4_compress_heap* family of functions.
330*2d60b848STomohiro Kusumi LZ4_createHeapMemory() returns NULL is memory allocation fails.
331*2d60b848STomohiro Kusumi */
332*2d60b848STomohiro Kusumi void*
333*2d60b848STomohiro Kusumi LZ4_create(void)
334*2d60b848STomohiro Kusumi {
335*2d60b848STomohiro Kusumi 	return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT);
336*2d60b848STomohiro Kusumi }
337*2d60b848STomohiro Kusumi 
338*2d60b848STomohiro Kusumi int
339*2d60b848STomohiro Kusumi LZ4_free(void* ctx)
340*2d60b848STomohiro Kusumi {
341*2d60b848STomohiro Kusumi 	kfree(ctx, C_HASHTABLE);
342*2d60b848STomohiro Kusumi 	return 0;
343*2d60b848STomohiro Kusumi }
344*2d60b848STomohiro Kusumi 
345*2d60b848STomohiro Kusumi int
346*2d60b848STomohiro Kusumi LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
347*2d60b848STomohiro Kusumi {
348*2d60b848STomohiro Kusumi     void* ctx = LZ4_create();
349*2d60b848STomohiro Kusumi     int result;
350*2d60b848STomohiro Kusumi     if (ctx == NULL) return 0;    // Failed allocation => compression not done
351*2d60b848STomohiro Kusumi     if (inputSize < LZ4_64KLIMIT)
352*2d60b848STomohiro Kusumi         result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
353*2d60b848STomohiro Kusumi 			inputSize, maxOutputSize);
354*2d60b848STomohiro Kusumi     else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
355*2d60b848STomohiro Kusumi 			inputSize, maxOutputSize);
356*2d60b848STomohiro Kusumi     LZ4_free(ctx);
357*2d60b848STomohiro Kusumi     return result;
358*2d60b848STomohiro Kusumi }
359*2d60b848STomohiro Kusumi 
360*2d60b848STomohiro Kusumi 
361*2d60b848STomohiro Kusumi //****************************
362*2d60b848STomohiro Kusumi // Decompression functions
363*2d60b848STomohiro Kusumi //****************************
364*2d60b848STomohiro Kusumi 
365*2d60b848STomohiro Kusumi typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
366*2d60b848STomohiro Kusumi typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
367*2d60b848STomohiro Kusumi typedef enum { full = 0, partial = 1 } exit_directive;
368*2d60b848STomohiro Kusumi 
369*2d60b848STomohiro Kusumi 
370*2d60b848STomohiro Kusumi // This generic decompression function cover all use cases.
371*2d60b848STomohiro Kusumi // It shall be instanciated several times, using different sets of directives
372*2d60b848STomohiro Kusumi // Note that it is essential this generic function is really inlined,
373*2d60b848STomohiro Kusumi // in order to remove useless branches during compilation optimisation.
374*2d60b848STomohiro Kusumi static
375*2d60b848STomohiro Kusumi inline
376*2d60b848STomohiro Kusumi int LZ4_decompress_generic(
377*2d60b848STomohiro Kusumi                  char* source,
378*2d60b848STomohiro Kusumi                  char* dest,
379*2d60b848STomohiro Kusumi                  int inputSize,          //
380*2d60b848STomohiro Kusumi                  int outputSize,
381*2d60b848STomohiro Kusumi                  // OutputSize must be != 0; if endOnInput==endOnInputSize,
382*2d60b848STomohiro Kusumi                  // this value is the max size of Output Buffer.
383*2d60b848STomohiro Kusumi 
384*2d60b848STomohiro Kusumi                  int endOnInput,         // endOnOutputSize, endOnInputSize
385*2d60b848STomohiro Kusumi                  int prefix64k,          // noPrefix, withPrefix
386*2d60b848STomohiro Kusumi                  int partialDecoding,    // full, partial
387*2d60b848STomohiro Kusumi                  int targetOutputSize    // only used if partialDecoding==partial
388*2d60b848STomohiro Kusumi                  )
389*2d60b848STomohiro Kusumi {
390*2d60b848STomohiro Kusumi     // Local Variables
391*2d60b848STomohiro Kusumi     BYTE* restrict ip = (BYTE*) source;
392*2d60b848STomohiro Kusumi     BYTE* ref;
393*2d60b848STomohiro Kusumi     BYTE* iend = ip + inputSize;
394*2d60b848STomohiro Kusumi 
395*2d60b848STomohiro Kusumi     BYTE* op = (BYTE*) dest;
396*2d60b848STomohiro Kusumi     BYTE* oend = op + outputSize;
397*2d60b848STomohiro Kusumi     BYTE* cpy;
398*2d60b848STomohiro Kusumi     BYTE* oexit = op + targetOutputSize;
399*2d60b848STomohiro Kusumi 
400*2d60b848STomohiro Kusumi     size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
401*2d60b848STomohiro Kusumi #if LZ4_ARCH64
402*2d60b848STomohiro Kusumi     size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
403*2d60b848STomohiro Kusumi #endif
404*2d60b848STomohiro Kusumi 
405*2d60b848STomohiro Kusumi 
406*2d60b848STomohiro Kusumi     // Special case
407*2d60b848STomohiro Kusumi     if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
408*2d60b848STomohiro Kusumi     // targetOutputSize too large, better decode everything
409*2d60b848STomohiro Kusumi     if unlikely(outputSize==0) goto _output_error;
410*2d60b848STomohiro Kusumi     // Empty output buffer
411*2d60b848STomohiro Kusumi 
412*2d60b848STomohiro Kusumi 
413*2d60b848STomohiro Kusumi     // Main Loop
414*2d60b848STomohiro Kusumi     while (1)
415*2d60b848STomohiro Kusumi     {
416*2d60b848STomohiro Kusumi         unsigned token;
417*2d60b848STomohiro Kusumi         size_t length;
418*2d60b848STomohiro Kusumi 
419*2d60b848STomohiro Kusumi         // get runlength
420*2d60b848STomohiro Kusumi         token = *ip++;
421*2d60b848STomohiro Kusumi         if ((length=(token>>ML_BITS)) == RUN_MASK)
422*2d60b848STomohiro Kusumi         {
423*2d60b848STomohiro Kusumi             unsigned s=255;
424*2d60b848STomohiro Kusumi             while (((endOnInput)?ip<iend:1) && (s==255))
425*2d60b848STomohiro Kusumi             {
426*2d60b848STomohiro Kusumi                 s = *ip++;
427*2d60b848STomohiro Kusumi                 length += s;
428*2d60b848STomohiro Kusumi             }
429*2d60b848STomohiro Kusumi         }
430*2d60b848STomohiro Kusumi 
431*2d60b848STomohiro Kusumi         // copy literals
432*2d60b848STomohiro Kusumi         cpy = op+length;
433*2d60b848STomohiro Kusumi         if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
434*2d60b848STomohiro Kusumi 			|| (ip+length>iend-(2+1+LASTLITERALS))) )
435*2d60b848STomohiro Kusumi             || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
436*2d60b848STomohiro Kusumi         {
437*2d60b848STomohiro Kusumi             if (partialDecoding)
438*2d60b848STomohiro Kusumi             {
439*2d60b848STomohiro Kusumi                 if (cpy > oend) goto _output_error;
440*2d60b848STomohiro Kusumi                 // Error : write attempt beyond end of output buffer
441*2d60b848STomohiro Kusumi                 if ((endOnInput) && (ip+length > iend)) goto _output_error;
442*2d60b848STomohiro Kusumi                 // Error : read attempt beyond end of input buffer
443*2d60b848STomohiro Kusumi             }
444*2d60b848STomohiro Kusumi             else
445*2d60b848STomohiro Kusumi             {
446*2d60b848STomohiro Kusumi                 if ((!endOnInput) && (cpy != oend)) goto _output_error;
447*2d60b848STomohiro Kusumi                 // Error : block decoding must stop exactly there,
448*2d60b848STomohiro Kusumi                 // due to parsing restrictions
449*2d60b848STomohiro Kusumi                 if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
450*2d60b848STomohiro Kusumi 					goto _output_error;
451*2d60b848STomohiro Kusumi 					// Error : not enough place for another match (min 4) + 5 literals
452*2d60b848STomohiro Kusumi             }
453*2d60b848STomohiro Kusumi             memcpy(op, ip, length);
454*2d60b848STomohiro Kusumi             ip += length;
455*2d60b848STomohiro Kusumi             op += length;
456*2d60b848STomohiro Kusumi             break;
457*2d60b848STomohiro Kusumi             // Necessarily EOF, due to parsing restrictions
458*2d60b848STomohiro Kusumi         }
459*2d60b848STomohiro Kusumi         LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
460*2d60b848STomohiro Kusumi 
461*2d60b848STomohiro Kusumi         // get offset
462*2d60b848STomohiro Kusumi         LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
463*2d60b848STomohiro Kusumi         if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
464*2d60b848STomohiro Kusumi 			goto _output_error;   // Error : offset outside destination buffer
465*2d60b848STomohiro Kusumi 
466*2d60b848STomohiro Kusumi         // get matchlength
467*2d60b848STomohiro Kusumi         if ((length=(token&ML_MASK)) == ML_MASK)
468*2d60b848STomohiro Kusumi         {
469*2d60b848STomohiro Kusumi             while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
470*2d60b848STomohiro Kusumi             // A minimum nb of input bytes must remain for LASTLITERALS + token
471*2d60b848STomohiro Kusumi             {
472*2d60b848STomohiro Kusumi                 unsigned s = *ip++;
473*2d60b848STomohiro Kusumi                 length += s;
474*2d60b848STomohiro Kusumi                 if (s==255) continue;
475*2d60b848STomohiro Kusumi                 break;
476*2d60b848STomohiro Kusumi             }
477*2d60b848STomohiro Kusumi         }
478*2d60b848STomohiro Kusumi 
479*2d60b848STomohiro Kusumi         // copy repeated sequence
480*2d60b848STomohiro Kusumi         if unlikely((op-ref)<STEPSIZE)
481*2d60b848STomohiro Kusumi         {
482*2d60b848STomohiro Kusumi #if LZ4_ARCH64
483*2d60b848STomohiro Kusumi             size_t dec64 = dec64table[op-ref];
484*2d60b848STomohiro Kusumi #else
485*2d60b848STomohiro Kusumi             const size_t dec64 = 0;
486*2d60b848STomohiro Kusumi #endif
487*2d60b848STomohiro Kusumi             op[0] = ref[0];
488*2d60b848STomohiro Kusumi             op[1] = ref[1];
489*2d60b848STomohiro Kusumi             op[2] = ref[2];
490*2d60b848STomohiro Kusumi             op[3] = ref[3];
491*2d60b848STomohiro Kusumi             op += 4, ref += 4; ref -= dec32table[op-ref];
492*2d60b848STomohiro Kusumi             A32(op) = A32(ref);
493*2d60b848STomohiro Kusumi             op += STEPSIZE-4; ref -= dec64;
494*2d60b848STomohiro Kusumi         } else { LZ4_COPYSTEP(ref,op); }
495*2d60b848STomohiro Kusumi         cpy = op + length - (STEPSIZE-4);
496*2d60b848STomohiro Kusumi 
497*2d60b848STomohiro Kusumi         if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
498*2d60b848STomohiro Kusumi         {
499*2d60b848STomohiro Kusumi             if (cpy > oend-LASTLITERALS) goto _output_error;
500*2d60b848STomohiro Kusumi             // Error : last 5 bytes must be literals
501*2d60b848STomohiro Kusumi             LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
502*2d60b848STomohiro Kusumi             while(op<cpy) *op++=*ref++;
503*2d60b848STomohiro Kusumi             op=cpy;
504*2d60b848STomohiro Kusumi             continue;
505*2d60b848STomohiro Kusumi         }
506*2d60b848STomohiro Kusumi         LZ4_WILDCOPY(ref, op, cpy);
507*2d60b848STomohiro Kusumi         op=cpy;   // correction
508*2d60b848STomohiro Kusumi     }
509*2d60b848STomohiro Kusumi 
510*2d60b848STomohiro Kusumi     // end of decoding
511*2d60b848STomohiro Kusumi     if (endOnInput)
512*2d60b848STomohiro Kusumi        return (int) (((char*)op)-dest);     // Nb of output bytes decoded
513*2d60b848STomohiro Kusumi     else
514*2d60b848STomohiro Kusumi        return (int) (((char*)ip)-source);   // Nb of input bytes read
515*2d60b848STomohiro Kusumi 
516*2d60b848STomohiro Kusumi     // Overflow error detected
517*2d60b848STomohiro Kusumi _output_error:
518*2d60b848STomohiro Kusumi     return (int) (-(((char*)ip)-source))-1;
519*2d60b848STomohiro Kusumi }
520*2d60b848STomohiro Kusumi 
521*2d60b848STomohiro Kusumi 
522*2d60b848STomohiro Kusumi int
523*2d60b848STomohiro Kusumi LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
524*2d60b848STomohiro Kusumi {
525*2d60b848STomohiro Kusumi     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
526*2d60b848STomohiro Kusumi 							endOnInputSize, noPrefix, full, 0);
527*2d60b848STomohiro Kusumi }
528