xref: /dragonfly/sbin/hammer2/lz4/hammer2_lz4.c (revision 3aa7d58a)
1*3aa7d58aSMatthew Dillon /*
2*3aa7d58aSMatthew Dillon    LZ4 - Fast LZ compression algorithm
3*3aa7d58aSMatthew Dillon    Copyright (C) 2011-2013, Yann Collet.
4*3aa7d58aSMatthew Dillon    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5*3aa7d58aSMatthew Dillon 
6*3aa7d58aSMatthew Dillon    Redistribution and use in source and binary forms, with or without
7*3aa7d58aSMatthew Dillon    modification, are permitted provided that the following conditions are
8*3aa7d58aSMatthew Dillon    met:
9*3aa7d58aSMatthew Dillon 
10*3aa7d58aSMatthew Dillon        * Redistributions of source code must retain the above copyright
11*3aa7d58aSMatthew Dillon    notice, this list of conditions and the following disclaimer.
12*3aa7d58aSMatthew Dillon        * Redistributions in binary form must reproduce the above
13*3aa7d58aSMatthew Dillon    copyright notice, this list of conditions and the following disclaimer
14*3aa7d58aSMatthew Dillon    in the documentation and/or other materials provided with the
15*3aa7d58aSMatthew Dillon    distribution.
16*3aa7d58aSMatthew Dillon 
17*3aa7d58aSMatthew Dillon    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*3aa7d58aSMatthew Dillon    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*3aa7d58aSMatthew Dillon    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*3aa7d58aSMatthew Dillon    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*3aa7d58aSMatthew Dillon    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*3aa7d58aSMatthew Dillon    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*3aa7d58aSMatthew Dillon    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*3aa7d58aSMatthew Dillon    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*3aa7d58aSMatthew Dillon    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*3aa7d58aSMatthew Dillon    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*3aa7d58aSMatthew Dillon    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*3aa7d58aSMatthew Dillon 
29*3aa7d58aSMatthew Dillon    You can contact the author at :
30*3aa7d58aSMatthew Dillon    - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31*3aa7d58aSMatthew Dillon    - LZ4 source repository : http://code.google.com/p/lz4/
32*3aa7d58aSMatthew Dillon */
33*3aa7d58aSMatthew Dillon 
34*3aa7d58aSMatthew Dillon /*
35*3aa7d58aSMatthew Dillon Note : this source file requires "hammer2_lz4_encoder.h"
36*3aa7d58aSMatthew Dillon */
37*3aa7d58aSMatthew Dillon 
38*3aa7d58aSMatthew Dillon //**************************************
39*3aa7d58aSMatthew Dillon // Tuning parameters
40*3aa7d58aSMatthew Dillon //**************************************
41*3aa7d58aSMatthew Dillon // MEMORY_USAGE :
42*3aa7d58aSMatthew Dillon // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
43*3aa7d58aSMatthew Dillon // 16 -> 64KB; 20 -> 1MB; etc.)
44*3aa7d58aSMatthew Dillon // Increasing memory usage improves compression ratio
45*3aa7d58aSMatthew Dillon // Reduced memory usage can improve speed, due to cache effect
46*3aa7d58aSMatthew Dillon // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
47*3aa7d58aSMatthew Dillon #define MEMORY_USAGE 14
48*3aa7d58aSMatthew Dillon 
49*3aa7d58aSMatthew Dillon // HEAPMODE :
50*3aa7d58aSMatthew Dillon // Select how default compression function will allocate memory for its
51*3aa7d58aSMatthew Dillon // hash table,
52*3aa7d58aSMatthew Dillon // in memory stack (0:default, fastest), or in memory heap (1:requires
53*3aa7d58aSMatthew Dillon // memory allocation (malloc)).
54*3aa7d58aSMatthew Dillon // Default allocation strategy is to use stack (HEAPMODE 0)
55*3aa7d58aSMatthew Dillon // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
56*3aa7d58aSMatthew Dillon #define HEAPMODE 1
57*3aa7d58aSMatthew Dillon 
58*3aa7d58aSMatthew Dillon // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
59*3aa7d58aSMatthew Dillon // This will provide a small boost to performance for big endian cpu,
60*3aa7d58aSMatthew Dillon // but the resulting compressed stream will be incompatible with little-endian CPU.
61*3aa7d58aSMatthew Dillon // You can set this option to 1 in situations where data will remain within
62*3aa7d58aSMatthew Dillon // closed environment
63*3aa7d58aSMatthew Dillon // This option is useless on Little_Endian CPU (such as x86)
64*3aa7d58aSMatthew Dillon //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
65*3aa7d58aSMatthew Dillon 
66*3aa7d58aSMatthew Dillon 
67*3aa7d58aSMatthew Dillon //**************************************
68*3aa7d58aSMatthew Dillon // CPU Feature Detection
69*3aa7d58aSMatthew Dillon //**************************************
70*3aa7d58aSMatthew Dillon // 32 or 64 bits ?
71*3aa7d58aSMatthew Dillon #if (defined(__x86_64__) || defined(_M_X64))   // Detects 64 bits mode
72*3aa7d58aSMatthew Dillon #  define LZ4_ARCH64 1
73*3aa7d58aSMatthew Dillon #else
74*3aa7d58aSMatthew Dillon #  define LZ4_ARCH64 0
75*3aa7d58aSMatthew Dillon #endif
76*3aa7d58aSMatthew Dillon 
77*3aa7d58aSMatthew Dillon //This reduced library code is only Little Endian compatible,
78*3aa7d58aSMatthew Dillon //if the need arises, please look for the appropriate defines in the
79*3aa7d58aSMatthew Dillon //original complete LZ4 library.
80*3aa7d58aSMatthew Dillon //Same is true for unaligned memory access which is enabled by default,
81*3aa7d58aSMatthew Dillon //hardware bit count, also enabled by default, and Microsoft/Visual
82*3aa7d58aSMatthew Dillon //Studio compilers.
83*3aa7d58aSMatthew Dillon 
84*3aa7d58aSMatthew Dillon //**************************************
85*3aa7d58aSMatthew Dillon // Compiler Options
86*3aa7d58aSMatthew Dillon //**************************************
87*3aa7d58aSMatthew Dillon #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
88*3aa7d58aSMatthew Dillon /* "restrict" is a known keyword */
89*3aa7d58aSMatthew Dillon #else
90*3aa7d58aSMatthew Dillon #  define restrict // Disable restrict
91*3aa7d58aSMatthew Dillon #endif
92*3aa7d58aSMatthew Dillon 
93*3aa7d58aSMatthew Dillon #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
94*3aa7d58aSMatthew Dillon 
95*3aa7d58aSMatthew Dillon #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
96*3aa7d58aSMatthew Dillon #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
97*3aa7d58aSMatthew Dillon #else
98*3aa7d58aSMatthew Dillon #  define expect(expr,value)    (expr)
99*3aa7d58aSMatthew Dillon #endif
100*3aa7d58aSMatthew Dillon 
101*3aa7d58aSMatthew Dillon #define likely(expr)     expect((expr) != 0, 1)
102*3aa7d58aSMatthew Dillon #define unlikely(expr)   expect((expr) != 0, 0)
103*3aa7d58aSMatthew Dillon 
104*3aa7d58aSMatthew Dillon 
105*3aa7d58aSMatthew Dillon //**************************************
106*3aa7d58aSMatthew Dillon // Includes
107*3aa7d58aSMatthew Dillon //**************************************
108*3aa7d58aSMatthew Dillon #include <stdio.h>
109*3aa7d58aSMatthew Dillon #include <stdlib.h>
110*3aa7d58aSMatthew Dillon #include <unistd.h>
111*3aa7d58aSMatthew Dillon #include <string.h>
112*3aa7d58aSMatthew Dillon 
113*3aa7d58aSMatthew Dillon #include "hammer2_lz4.h"
114*3aa7d58aSMatthew Dillon 
115*3aa7d58aSMatthew Dillon //**************************************
116*3aa7d58aSMatthew Dillon // Basic Types
117*3aa7d58aSMatthew Dillon //**************************************
118*3aa7d58aSMatthew Dillon #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
119*3aa7d58aSMatthew Dillon # include <sys/stdint.h>
120*3aa7d58aSMatthew Dillon   typedef uint8_t  BYTE;
121*3aa7d58aSMatthew Dillon   typedef uint16_t U16;
122*3aa7d58aSMatthew Dillon   typedef uint32_t U32;
123*3aa7d58aSMatthew Dillon   typedef  int32_t S32;
124*3aa7d58aSMatthew Dillon   typedef uint64_t U64;
125*3aa7d58aSMatthew Dillon #else
126*3aa7d58aSMatthew Dillon   typedef unsigned char       BYTE;
127*3aa7d58aSMatthew Dillon   typedef unsigned short      U16;
128*3aa7d58aSMatthew Dillon   typedef unsigned int        U32;
129*3aa7d58aSMatthew Dillon   typedef   signed int        S32;
130*3aa7d58aSMatthew Dillon   typedef unsigned long long  U64;
131*3aa7d58aSMatthew Dillon #endif
132*3aa7d58aSMatthew Dillon 
133*3aa7d58aSMatthew Dillon #if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
134*3aa7d58aSMatthew Dillon #  define _PACKED __attribute__ ((packed))
135*3aa7d58aSMatthew Dillon #else
136*3aa7d58aSMatthew Dillon #  define _PACKED
137*3aa7d58aSMatthew Dillon #endif
138*3aa7d58aSMatthew Dillon 
139*3aa7d58aSMatthew Dillon #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
140*3aa7d58aSMatthew Dillon #  pragma pack(push, 1)
141*3aa7d58aSMatthew Dillon #endif
142*3aa7d58aSMatthew Dillon 
143*3aa7d58aSMatthew Dillon typedef struct _U16_S { U16 v; } _PACKED U16_S;
144*3aa7d58aSMatthew Dillon typedef struct _U32_S { U32 v; } _PACKED U32_S;
145*3aa7d58aSMatthew Dillon typedef struct _U64_S { U64 v; } _PACKED U64_S;
146*3aa7d58aSMatthew Dillon 
147*3aa7d58aSMatthew Dillon #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
148*3aa7d58aSMatthew Dillon #  pragma pack(pop)
149*3aa7d58aSMatthew Dillon #endif
150*3aa7d58aSMatthew Dillon 
151*3aa7d58aSMatthew Dillon #define A64(x) (((U64_S *)(x))->v)
152*3aa7d58aSMatthew Dillon #define A32(x) (((U32_S *)(x))->v)
153*3aa7d58aSMatthew Dillon #define A16(x) (((U16_S *)(x))->v)
154*3aa7d58aSMatthew Dillon 
155*3aa7d58aSMatthew Dillon 
156*3aa7d58aSMatthew Dillon //**************************************
157*3aa7d58aSMatthew Dillon // Constants
158*3aa7d58aSMatthew Dillon //**************************************
159*3aa7d58aSMatthew Dillon #define HASHTABLESIZE (1 << MEMORY_USAGE)
160*3aa7d58aSMatthew Dillon 
161*3aa7d58aSMatthew Dillon #define MINMATCH 4
162*3aa7d58aSMatthew Dillon 
163*3aa7d58aSMatthew Dillon #define COPYLENGTH 8
164*3aa7d58aSMatthew Dillon #define LASTLITERALS 5
165*3aa7d58aSMatthew Dillon #define MFLIMIT (COPYLENGTH+MINMATCH)
166*3aa7d58aSMatthew Dillon #define MINLENGTH (MFLIMIT+1)
167*3aa7d58aSMatthew Dillon 
168*3aa7d58aSMatthew Dillon #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
169*3aa7d58aSMatthew Dillon #define SKIPSTRENGTH 6
170*3aa7d58aSMatthew Dillon // Increasing this value will make the compression run slower on
171*3aa7d58aSMatthew Dillon // incompressible data
172*3aa7d58aSMatthew Dillon 
173*3aa7d58aSMatthew Dillon #define MAXD_LOG 16
174*3aa7d58aSMatthew Dillon #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
175*3aa7d58aSMatthew Dillon 
176*3aa7d58aSMatthew Dillon #define ML_BITS  4
177*3aa7d58aSMatthew Dillon #define ML_MASK  ((1U<<ML_BITS)-1)
178*3aa7d58aSMatthew Dillon #define RUN_BITS (8-ML_BITS)
179*3aa7d58aSMatthew Dillon #define RUN_MASK ((1U<<RUN_BITS)-1)
180*3aa7d58aSMatthew Dillon 
181*3aa7d58aSMatthew Dillon 
182*3aa7d58aSMatthew Dillon //**************************************
183*3aa7d58aSMatthew Dillon // Architecture-specific macros
184*3aa7d58aSMatthew Dillon //**************************************
185*3aa7d58aSMatthew Dillon #if LZ4_ARCH64   // 64-bit
186*3aa7d58aSMatthew Dillon #  define STEPSIZE 8
187*3aa7d58aSMatthew Dillon #  define UARCH U64
188*3aa7d58aSMatthew Dillon #  define AARCH A64
189*3aa7d58aSMatthew Dillon #  define LZ4_COPYSTEP(s,d)       A64(d) = A64(s); d+=8; s+=8;
190*3aa7d58aSMatthew Dillon #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d)
191*3aa7d58aSMatthew Dillon #  define LZ4_SECURECOPY(s,d,e)   if (d<e) LZ4_WILDCOPY(s,d,e)
192*3aa7d58aSMatthew Dillon #  define HTYPE                   U32
193*3aa7d58aSMatthew Dillon #  define INITBASE(base)          BYTE* base = ip
194*3aa7d58aSMatthew Dillon #else      // 32-bit
195*3aa7d58aSMatthew Dillon #  define STEPSIZE 4
196*3aa7d58aSMatthew Dillon #  define UARCH U32
197*3aa7d58aSMatthew Dillon #  define AARCH A32
198*3aa7d58aSMatthew Dillon #  define LZ4_COPYSTEP(s,d)       A32(d) = A32(s); d+=4; s+=4;
199*3aa7d58aSMatthew Dillon #  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
200*3aa7d58aSMatthew Dillon #  define LZ4_SECURECOPY          LZ4_WILDCOPY
201*3aa7d58aSMatthew Dillon #  define HTYPE                   BYTE*
202*3aa7d58aSMatthew Dillon #  define INITBASE(base)          int base = 0
203*3aa7d58aSMatthew Dillon #endif
204*3aa7d58aSMatthew Dillon 
205*3aa7d58aSMatthew Dillon #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
206*3aa7d58aSMatthew Dillon #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
207*3aa7d58aSMatthew Dillon 											v = lz4_bswap16(v);
208*3aa7d58aSMatthew Dillon 											d = (s) - v; }
209*3aa7d58aSMatthew Dillon #  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i);
210*3aa7d58aSMatthew Dillon 											v = lz4_bswap16(v);
211*3aa7d58aSMatthew Dillon 											A16(p) = v;
212*3aa7d58aSMatthew Dillon 											p+=2; }
213*3aa7d58aSMatthew Dillon #else      // Little Endian
214*3aa7d58aSMatthew Dillon #  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
215*3aa7d58aSMatthew Dillon #  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
216*3aa7d58aSMatthew Dillon #endif
217*3aa7d58aSMatthew Dillon 
218*3aa7d58aSMatthew Dillon 
219*3aa7d58aSMatthew Dillon //**************************************
220*3aa7d58aSMatthew Dillon // Macros
221*3aa7d58aSMatthew Dillon //**************************************
222*3aa7d58aSMatthew Dillon #define LZ4_WILDCOPY(s,d,e)     do { LZ4_COPYPACKET(s,d) } while (d<e);
223*3aa7d58aSMatthew Dillon #define LZ4_BLINDCOPY(s,d,l)    { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
224*3aa7d58aSMatthew Dillon 
225*3aa7d58aSMatthew Dillon 
226*3aa7d58aSMatthew Dillon //****************************
227*3aa7d58aSMatthew Dillon // Private functions
228*3aa7d58aSMatthew Dillon //****************************
229*3aa7d58aSMatthew Dillon #if LZ4_ARCH64
230*3aa7d58aSMatthew Dillon 
231*3aa7d58aSMatthew Dillon static
232*3aa7d58aSMatthew Dillon inline
233*3aa7d58aSMatthew Dillon int
234*3aa7d58aSMatthew Dillon LZ4_NbCommonBytes (register U64 val)
235*3aa7d58aSMatthew Dillon {
236*3aa7d58aSMatthew Dillon #if defined(LZ4_BIG_ENDIAN)
237*3aa7d58aSMatthew Dillon     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
238*3aa7d58aSMatthew Dillon     unsigned long r = 0;
239*3aa7d58aSMatthew Dillon     _BitScanReverse64( &r, val );
240*3aa7d58aSMatthew Dillon     return (int)(r>>3);
241*3aa7d58aSMatthew Dillon     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
242*3aa7d58aSMatthew Dillon     return (__builtin_clzll(val) >> 3);
243*3aa7d58aSMatthew Dillon     #else
244*3aa7d58aSMatthew Dillon     int r;
245*3aa7d58aSMatthew Dillon     if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
246*3aa7d58aSMatthew Dillon     if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
247*3aa7d58aSMatthew Dillon     r += (!val);
248*3aa7d58aSMatthew Dillon     return r;
249*3aa7d58aSMatthew Dillon     #endif
250*3aa7d58aSMatthew Dillon #else
251*3aa7d58aSMatthew Dillon     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
252*3aa7d58aSMatthew Dillon     unsigned long r = 0;
253*3aa7d58aSMatthew Dillon     _BitScanForward64( &r, val );
254*3aa7d58aSMatthew Dillon     return (int)(r>>3);
255*3aa7d58aSMatthew Dillon     #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
256*3aa7d58aSMatthew Dillon     return (__builtin_ctzll(val) >> 3);
257*3aa7d58aSMatthew Dillon     #else
258*3aa7d58aSMatthew Dillon     static int DeBruijnBytePos[64] = {
259*3aa7d58aSMatthew Dillon 		0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
260*3aa7d58aSMatthew Dillon 		1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
261*3aa7d58aSMatthew Dillon 		1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
262*3aa7d58aSMatthew Dillon 		6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
263*3aa7d58aSMatthew Dillon 		2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
264*3aa7d58aSMatthew Dillon 		2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
265*3aa7d58aSMatthew Dillon 		7, 6, 7, 7 };
266*3aa7d58aSMatthew Dillon     return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
267*3aa7d58aSMatthew Dillon     #endif
268*3aa7d58aSMatthew Dillon #endif
269*3aa7d58aSMatthew Dillon }
270*3aa7d58aSMatthew Dillon 
271*3aa7d58aSMatthew Dillon #else
272*3aa7d58aSMatthew Dillon 
273*3aa7d58aSMatthew Dillon static
274*3aa7d58aSMatthew Dillon inline
275*3aa7d58aSMatthew Dillon int
276*3aa7d58aSMatthew Dillon LZ4_NbCommonBytes (register U32 val)
277*3aa7d58aSMatthew Dillon {
278*3aa7d58aSMatthew Dillon #if defined(LZ4_BIG_ENDIAN)
279*3aa7d58aSMatthew Dillon #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
280*3aa7d58aSMatthew Dillon     unsigned long r = 0;
281*3aa7d58aSMatthew Dillon     _BitScanReverse( &r, val );
282*3aa7d58aSMatthew Dillon     return (int)(r>>3);
283*3aa7d58aSMatthew Dillon #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
284*3aa7d58aSMatthew Dillon     return (__builtin_clz(val) >> 3);
285*3aa7d58aSMatthew Dillon #  else
286*3aa7d58aSMatthew Dillon     int r;
287*3aa7d58aSMatthew Dillon     if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
288*3aa7d58aSMatthew Dillon     r += (!val);
289*3aa7d58aSMatthew Dillon     return r;
290*3aa7d58aSMatthew Dillon #  endif
291*3aa7d58aSMatthew Dillon #else
292*3aa7d58aSMatthew Dillon #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
293*3aa7d58aSMatthew Dillon     unsigned long r;
294*3aa7d58aSMatthew Dillon     _BitScanForward( &r, val );
295*3aa7d58aSMatthew Dillon     return (int)(r>>3);
296*3aa7d58aSMatthew Dillon #  elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
297*3aa7d58aSMatthew Dillon     return (__builtin_ctz(val) >> 3);
298*3aa7d58aSMatthew Dillon #  else
299*3aa7d58aSMatthew Dillon     static int DeBruijnBytePos[32] = {
300*3aa7d58aSMatthew Dillon 		0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
301*3aa7d58aSMatthew Dillon 		2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
302*3aa7d58aSMatthew Dillon 		2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
303*3aa7d58aSMatthew Dillon 		1, 1 };
304*3aa7d58aSMatthew Dillon     return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
305*3aa7d58aSMatthew Dillon #  endif
306*3aa7d58aSMatthew Dillon #endif
307*3aa7d58aSMatthew Dillon }
308*3aa7d58aSMatthew Dillon 
309*3aa7d58aSMatthew Dillon #endif
310*3aa7d58aSMatthew Dillon 
311*3aa7d58aSMatthew Dillon 
312*3aa7d58aSMatthew Dillon 
313*3aa7d58aSMatthew Dillon //******************************
314*3aa7d58aSMatthew Dillon // Compression functions
315*3aa7d58aSMatthew Dillon //******************************
316*3aa7d58aSMatthew Dillon 
317*3aa7d58aSMatthew Dillon #include "hammer2_lz4_encoder.h"
318*3aa7d58aSMatthew Dillon 
319*3aa7d58aSMatthew Dillon /*
320*3aa7d58aSMatthew Dillon void* LZ4_createHeapMemory();
321*3aa7d58aSMatthew Dillon int LZ4_freeHeapMemory(void* ctx);
322*3aa7d58aSMatthew Dillon 
323*3aa7d58aSMatthew Dillon Used to allocate and free hashTable memory
324*3aa7d58aSMatthew Dillon to be used by the LZ4_compress_heap* family of functions.
325*3aa7d58aSMatthew Dillon LZ4_createHeapMemory() returns NULL is memory allocation fails.
326*3aa7d58aSMatthew Dillon */
327*3aa7d58aSMatthew Dillon void*
328*3aa7d58aSMatthew Dillon LZ4_create(void)
329*3aa7d58aSMatthew Dillon {
330*3aa7d58aSMatthew Dillon 	return malloc(HASHTABLESIZE);
331*3aa7d58aSMatthew Dillon }
332*3aa7d58aSMatthew Dillon 
333*3aa7d58aSMatthew Dillon int
334*3aa7d58aSMatthew Dillon LZ4_free(void* ctx)
335*3aa7d58aSMatthew Dillon {
336*3aa7d58aSMatthew Dillon 	free(ctx);
337*3aa7d58aSMatthew Dillon 	return 0;
338*3aa7d58aSMatthew Dillon }
339*3aa7d58aSMatthew Dillon 
340*3aa7d58aSMatthew Dillon int
341*3aa7d58aSMatthew Dillon LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
342*3aa7d58aSMatthew Dillon {
343*3aa7d58aSMatthew Dillon     void* ctx = LZ4_create();
344*3aa7d58aSMatthew Dillon     int result;
345*3aa7d58aSMatthew Dillon     if (ctx == NULL) return 0;    // Failed allocation => compression not done
346*3aa7d58aSMatthew Dillon     if (inputSize < LZ4_64KLIMIT)
347*3aa7d58aSMatthew Dillon         result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
348*3aa7d58aSMatthew Dillon 			inputSize, maxOutputSize);
349*3aa7d58aSMatthew Dillon     else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
350*3aa7d58aSMatthew Dillon 			inputSize, maxOutputSize);
351*3aa7d58aSMatthew Dillon     LZ4_free(ctx);
352*3aa7d58aSMatthew Dillon     return result;
353*3aa7d58aSMatthew Dillon }
354*3aa7d58aSMatthew Dillon 
355*3aa7d58aSMatthew Dillon 
356*3aa7d58aSMatthew Dillon //****************************
357*3aa7d58aSMatthew Dillon // Decompression functions
358*3aa7d58aSMatthew Dillon //****************************
359*3aa7d58aSMatthew Dillon 
360*3aa7d58aSMatthew Dillon typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
361*3aa7d58aSMatthew Dillon typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
362*3aa7d58aSMatthew Dillon typedef enum { full = 0, partial = 1 } exit_directive;
363*3aa7d58aSMatthew Dillon 
364*3aa7d58aSMatthew Dillon 
365*3aa7d58aSMatthew Dillon // This generic decompression function cover all use cases.
366*3aa7d58aSMatthew Dillon // It shall be instanciated several times, using different sets of directives
367*3aa7d58aSMatthew Dillon // Note that it is essential this generic function is really inlined,
368*3aa7d58aSMatthew Dillon // in order to remove useless branches during compilation optimisation.
369*3aa7d58aSMatthew Dillon static
370*3aa7d58aSMatthew Dillon inline
371*3aa7d58aSMatthew Dillon int LZ4_decompress_generic(
372*3aa7d58aSMatthew Dillon                  char* source,
373*3aa7d58aSMatthew Dillon                  char* dest,
374*3aa7d58aSMatthew Dillon                  int inputSize,          //
375*3aa7d58aSMatthew Dillon                  int outputSize,
376*3aa7d58aSMatthew Dillon                  // OutputSize must be != 0; if endOnInput==endOnInputSize,
377*3aa7d58aSMatthew Dillon                  // this value is the max size of Output Buffer.
378*3aa7d58aSMatthew Dillon 
379*3aa7d58aSMatthew Dillon                  int endOnInput,         // endOnOutputSize, endOnInputSize
380*3aa7d58aSMatthew Dillon                  int prefix64k,          // noPrefix, withPrefix
381*3aa7d58aSMatthew Dillon                  int partialDecoding,    // full, partial
382*3aa7d58aSMatthew Dillon                  int targetOutputSize    // only used if partialDecoding==partial
383*3aa7d58aSMatthew Dillon                  )
384*3aa7d58aSMatthew Dillon {
385*3aa7d58aSMatthew Dillon     // Local Variables
386*3aa7d58aSMatthew Dillon     BYTE* restrict ip = (BYTE*) source;
387*3aa7d58aSMatthew Dillon     BYTE* ref;
388*3aa7d58aSMatthew Dillon     BYTE* iend = ip + inputSize;
389*3aa7d58aSMatthew Dillon 
390*3aa7d58aSMatthew Dillon     BYTE* op = (BYTE*) dest;
391*3aa7d58aSMatthew Dillon     BYTE* oend = op + outputSize;
392*3aa7d58aSMatthew Dillon     BYTE* cpy;
393*3aa7d58aSMatthew Dillon     BYTE* oexit = op + targetOutputSize;
394*3aa7d58aSMatthew Dillon 
395*3aa7d58aSMatthew Dillon     size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
396*3aa7d58aSMatthew Dillon #if LZ4_ARCH64
397*3aa7d58aSMatthew Dillon     size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
398*3aa7d58aSMatthew Dillon #endif
399*3aa7d58aSMatthew Dillon 
400*3aa7d58aSMatthew Dillon 
401*3aa7d58aSMatthew Dillon     // Special case
402*3aa7d58aSMatthew Dillon     if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
403*3aa7d58aSMatthew Dillon     // targetOutputSize too large, better decode everything
404*3aa7d58aSMatthew Dillon     if unlikely(outputSize==0) goto _output_error;
405*3aa7d58aSMatthew Dillon     // Empty output buffer
406*3aa7d58aSMatthew Dillon 
407*3aa7d58aSMatthew Dillon 
408*3aa7d58aSMatthew Dillon     // Main Loop
409*3aa7d58aSMatthew Dillon     while (1)
410*3aa7d58aSMatthew Dillon     {
411*3aa7d58aSMatthew Dillon         unsigned token;
412*3aa7d58aSMatthew Dillon         size_t length;
413*3aa7d58aSMatthew Dillon 
414*3aa7d58aSMatthew Dillon         // get runlength
415*3aa7d58aSMatthew Dillon         token = *ip++;
416*3aa7d58aSMatthew Dillon         if ((length=(token>>ML_BITS)) == RUN_MASK)
417*3aa7d58aSMatthew Dillon         {
418*3aa7d58aSMatthew Dillon             unsigned s=255;
419*3aa7d58aSMatthew Dillon             while (((endOnInput)?ip<iend:1) && (s==255))
420*3aa7d58aSMatthew Dillon             {
421*3aa7d58aSMatthew Dillon                 s = *ip++;
422*3aa7d58aSMatthew Dillon                 length += s;
423*3aa7d58aSMatthew Dillon             }
424*3aa7d58aSMatthew Dillon         }
425*3aa7d58aSMatthew Dillon 
426*3aa7d58aSMatthew Dillon         // copy literals
427*3aa7d58aSMatthew Dillon         cpy = op+length;
428*3aa7d58aSMatthew Dillon         if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
429*3aa7d58aSMatthew Dillon 			|| (ip+length>iend-(2+1+LASTLITERALS))) )
430*3aa7d58aSMatthew Dillon             || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
431*3aa7d58aSMatthew Dillon         {
432*3aa7d58aSMatthew Dillon             if (partialDecoding)
433*3aa7d58aSMatthew Dillon             {
434*3aa7d58aSMatthew Dillon                 if (cpy > oend) goto _output_error;
435*3aa7d58aSMatthew Dillon                 // Error : write attempt beyond end of output buffer
436*3aa7d58aSMatthew Dillon                 if ((endOnInput) && (ip+length > iend)) goto _output_error;
437*3aa7d58aSMatthew Dillon                 // Error : read attempt beyond end of input buffer
438*3aa7d58aSMatthew Dillon             }
439*3aa7d58aSMatthew Dillon             else
440*3aa7d58aSMatthew Dillon             {
441*3aa7d58aSMatthew Dillon                 if ((!endOnInput) && (cpy != oend)) goto _output_error;
442*3aa7d58aSMatthew Dillon                 // Error : block decoding must stop exactly there,
443*3aa7d58aSMatthew Dillon                 // due to parsing restrictions
444*3aa7d58aSMatthew Dillon                 if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
445*3aa7d58aSMatthew Dillon 					goto _output_error;
446*3aa7d58aSMatthew Dillon 					// Error : not enough place for another match (min 4) + 5 literals
447*3aa7d58aSMatthew Dillon             }
448*3aa7d58aSMatthew Dillon             memcpy(op, ip, length);
449*3aa7d58aSMatthew Dillon             ip += length;
450*3aa7d58aSMatthew Dillon             op += length;
451*3aa7d58aSMatthew Dillon             break;
452*3aa7d58aSMatthew Dillon             // Necessarily EOF, due to parsing restrictions
453*3aa7d58aSMatthew Dillon         }
454*3aa7d58aSMatthew Dillon         LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
455*3aa7d58aSMatthew Dillon 
456*3aa7d58aSMatthew Dillon         // get offset
457*3aa7d58aSMatthew Dillon         LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
458*3aa7d58aSMatthew Dillon         if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
459*3aa7d58aSMatthew Dillon 			goto _output_error;   // Error : offset outside destination buffer
460*3aa7d58aSMatthew Dillon 
461*3aa7d58aSMatthew Dillon         // get matchlength
462*3aa7d58aSMatthew Dillon         if ((length=(token&ML_MASK)) == ML_MASK)
463*3aa7d58aSMatthew Dillon         {
464*3aa7d58aSMatthew Dillon             while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
465*3aa7d58aSMatthew Dillon             // A minimum nb of input bytes must remain for LASTLITERALS + token
466*3aa7d58aSMatthew Dillon             {
467*3aa7d58aSMatthew Dillon                 unsigned s = *ip++;
468*3aa7d58aSMatthew Dillon                 length += s;
469*3aa7d58aSMatthew Dillon                 if (s==255) continue;
470*3aa7d58aSMatthew Dillon                 break;
471*3aa7d58aSMatthew Dillon             }
472*3aa7d58aSMatthew Dillon         }
473*3aa7d58aSMatthew Dillon 
474*3aa7d58aSMatthew Dillon         // copy repeated sequence
475*3aa7d58aSMatthew Dillon         if unlikely((op-ref)<STEPSIZE)
476*3aa7d58aSMatthew Dillon         {
477*3aa7d58aSMatthew Dillon #if LZ4_ARCH64
478*3aa7d58aSMatthew Dillon             size_t dec64 = dec64table[op-ref];
479*3aa7d58aSMatthew Dillon #else
480*3aa7d58aSMatthew Dillon             const size_t dec64 = 0;
481*3aa7d58aSMatthew Dillon #endif
482*3aa7d58aSMatthew Dillon             op[0] = ref[0];
483*3aa7d58aSMatthew Dillon             op[1] = ref[1];
484*3aa7d58aSMatthew Dillon             op[2] = ref[2];
485*3aa7d58aSMatthew Dillon             op[3] = ref[3];
486*3aa7d58aSMatthew Dillon             op += 4, ref += 4; ref -= dec32table[op-ref];
487*3aa7d58aSMatthew Dillon             A32(op) = A32(ref);
488*3aa7d58aSMatthew Dillon             op += STEPSIZE-4; ref -= dec64;
489*3aa7d58aSMatthew Dillon         } else { LZ4_COPYSTEP(ref,op); }
490*3aa7d58aSMatthew Dillon         cpy = op + length - (STEPSIZE-4);
491*3aa7d58aSMatthew Dillon 
492*3aa7d58aSMatthew Dillon         if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
493*3aa7d58aSMatthew Dillon         {
494*3aa7d58aSMatthew Dillon             if (cpy > oend-LASTLITERALS) goto _output_error;
495*3aa7d58aSMatthew Dillon             // Error : last 5 bytes must be literals
496*3aa7d58aSMatthew Dillon             LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
497*3aa7d58aSMatthew Dillon             while(op<cpy) *op++=*ref++;
498*3aa7d58aSMatthew Dillon             op=cpy;
499*3aa7d58aSMatthew Dillon             continue;
500*3aa7d58aSMatthew Dillon         }
501*3aa7d58aSMatthew Dillon         LZ4_WILDCOPY(ref, op, cpy);
502*3aa7d58aSMatthew Dillon         op=cpy;   // correction
503*3aa7d58aSMatthew Dillon     }
504*3aa7d58aSMatthew Dillon 
505*3aa7d58aSMatthew Dillon     // end of decoding
506*3aa7d58aSMatthew Dillon     if (endOnInput)
507*3aa7d58aSMatthew Dillon        return (int) (((char*)op)-dest);     // Nb of output bytes decoded
508*3aa7d58aSMatthew Dillon     else
509*3aa7d58aSMatthew Dillon        return (int) (((char*)ip)-source);   // Nb of input bytes read
510*3aa7d58aSMatthew Dillon 
511*3aa7d58aSMatthew Dillon     // Overflow error detected
512*3aa7d58aSMatthew Dillon _output_error:
513*3aa7d58aSMatthew Dillon     return (int) (-(((char*)ip)-source))-1;
514*3aa7d58aSMatthew Dillon }
515*3aa7d58aSMatthew Dillon 
516*3aa7d58aSMatthew Dillon 
517*3aa7d58aSMatthew Dillon int
518*3aa7d58aSMatthew Dillon LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
519*3aa7d58aSMatthew Dillon {
520*3aa7d58aSMatthew Dillon     return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
521*3aa7d58aSMatthew Dillon 							endOnInputSize, noPrefix, full, 0);
522*3aa7d58aSMatthew Dillon }
523