1*2d60b848STomohiro Kusumi /* 2*2d60b848STomohiro Kusumi LZ4 - Fast LZ compression algorithm 3*2d60b848STomohiro Kusumi Copyright (C) 2011-2013, Yann Collet. 4*2d60b848STomohiro Kusumi BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5*2d60b848STomohiro Kusumi 6*2d60b848STomohiro Kusumi Redistribution and use in source and binary forms, with or without 7*2d60b848STomohiro Kusumi modification, are permitted provided that the following conditions are 8*2d60b848STomohiro Kusumi met: 9*2d60b848STomohiro Kusumi 10*2d60b848STomohiro Kusumi * Redistributions of source code must retain the above copyright 11*2d60b848STomohiro Kusumi notice, this list of conditions and the following disclaimer. 12*2d60b848STomohiro Kusumi * Redistributions in binary form must reproduce the above 13*2d60b848STomohiro Kusumi copyright notice, this list of conditions and the following disclaimer 14*2d60b848STomohiro Kusumi in the documentation and/or other materials provided with the 15*2d60b848STomohiro Kusumi distribution. 16*2d60b848STomohiro Kusumi 17*2d60b848STomohiro Kusumi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18*2d60b848STomohiro Kusumi "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19*2d60b848STomohiro Kusumi LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20*2d60b848STomohiro Kusumi A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21*2d60b848STomohiro Kusumi OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22*2d60b848STomohiro Kusumi SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23*2d60b848STomohiro Kusumi LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24*2d60b848STomohiro Kusumi DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25*2d60b848STomohiro Kusumi THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26*2d60b848STomohiro Kusumi (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27*2d60b848STomohiro Kusumi OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*2d60b848STomohiro Kusumi 29*2d60b848STomohiro Kusumi You can contact the author at : 30*2d60b848STomohiro Kusumi - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31*2d60b848STomohiro Kusumi - LZ4 source repository : http://code.google.com/p/lz4/ 32*2d60b848STomohiro Kusumi */ 33*2d60b848STomohiro Kusumi 34*2d60b848STomohiro Kusumi /* 35*2d60b848STomohiro Kusumi Note : this source file requires "hammer2_lz4_encoder.h" 36*2d60b848STomohiro Kusumi */ 37*2d60b848STomohiro Kusumi 38*2d60b848STomohiro Kusumi //************************************** 39*2d60b848STomohiro Kusumi // Tuning parameters 40*2d60b848STomohiro Kusumi //************************************** 41*2d60b848STomohiro Kusumi // MEMORY_USAGE : 42*2d60b848STomohiro Kusumi // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB; 43*2d60b848STomohiro Kusumi // 16 -> 64KB; 20 -> 1MB; etc.) 44*2d60b848STomohiro Kusumi // Increasing memory usage improves compression ratio 45*2d60b848STomohiro Kusumi // Reduced memory usage can improve speed, due to cache effect 46*2d60b848STomohiro Kusumi // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 47*2d60b848STomohiro Kusumi #define MEMORY_USAGE 14 48*2d60b848STomohiro Kusumi 49*2d60b848STomohiro Kusumi // HEAPMODE : 50*2d60b848STomohiro Kusumi // Select how default compression function will allocate memory for its 51*2d60b848STomohiro Kusumi // hash table, 52*2d60b848STomohiro Kusumi // in memory stack (0:default, fastest), or in memory heap (1:requires 53*2d60b848STomohiro Kusumi // memory allocation (malloc)). 54*2d60b848STomohiro Kusumi // Default allocation strategy is to use stack (HEAPMODE 0) 55*2d60b848STomohiro Kusumi // Note : explicit functions *_stack* and *_heap* are unaffected by this setting 56*2d60b848STomohiro Kusumi #define HEAPMODE 1 57*2d60b848STomohiro Kusumi 58*2d60b848STomohiro Kusumi // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : 59*2d60b848STomohiro Kusumi // This will provide a small boost to performance for big endian cpu, 60*2d60b848STomohiro Kusumi // but the resulting compressed stream will be incompatible with little-endian CPU. 61*2d60b848STomohiro Kusumi // You can set this option to 1 in situations where data will remain within 62*2d60b848STomohiro Kusumi // closed environment 63*2d60b848STomohiro Kusumi // This option is useless on Little_Endian CPU (such as x86) 64*2d60b848STomohiro Kusumi //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 65*2d60b848STomohiro Kusumi 66*2d60b848STomohiro Kusumi 67*2d60b848STomohiro Kusumi //************************************** 68*2d60b848STomohiro Kusumi // CPU Feature Detection 69*2d60b848STomohiro Kusumi //************************************** 70*2d60b848STomohiro Kusumi // 32 or 64 bits ? 71*2d60b848STomohiro Kusumi #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode 72*2d60b848STomohiro Kusumi # define LZ4_ARCH64 1 73*2d60b848STomohiro Kusumi #else 74*2d60b848STomohiro Kusumi # define LZ4_ARCH64 0 75*2d60b848STomohiro Kusumi #endif 76*2d60b848STomohiro Kusumi 77*2d60b848STomohiro Kusumi //This reduced library code is only Little Endian compatible, 78*2d60b848STomohiro Kusumi //if the need arises, please look for the appropriate defines in the 79*2d60b848STomohiro Kusumi //original complete LZ4 library. 80*2d60b848STomohiro Kusumi //Same is true for unaligned memory access which is enabled by default, 81*2d60b848STomohiro Kusumi //hardware bit count, also enabled by default, and Microsoft/Visual 82*2d60b848STomohiro Kusumi //Studio compilers. 83*2d60b848STomohiro Kusumi 84*2d60b848STomohiro Kusumi //************************************** 85*2d60b848STomohiro Kusumi // Compiler Options 86*2d60b848STomohiro Kusumi //************************************** 87*2d60b848STomohiro Kusumi #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 88*2d60b848STomohiro Kusumi /* "restrict" is a known keyword */ 89*2d60b848STomohiro Kusumi #else 90*2d60b848STomohiro Kusumi # define restrict // Disable restrict 91*2d60b848STomohiro Kusumi #endif 92*2d60b848STomohiro Kusumi 93*2d60b848STomohiro Kusumi #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 94*2d60b848STomohiro Kusumi 95*2d60b848STomohiro Kusumi #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 96*2d60b848STomohiro Kusumi # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 97*2d60b848STomohiro Kusumi #else 98*2d60b848STomohiro Kusumi # define expect(expr,value) (expr) 99*2d60b848STomohiro Kusumi #endif 100*2d60b848STomohiro Kusumi 101*2d60b848STomohiro Kusumi #define likely(expr) expect((expr) != 0, 1) 102*2d60b848STomohiro Kusumi #define unlikely(expr) expect((expr) != 0, 0) 103*2d60b848STomohiro Kusumi 104*2d60b848STomohiro Kusumi 105*2d60b848STomohiro Kusumi //************************************** 106*2d60b848STomohiro Kusumi // Includes 107*2d60b848STomohiro Kusumi //************************************** 108*2d60b848STomohiro Kusumi #include "hammer2.h" 109*2d60b848STomohiro Kusumi #include "hammer2_lz4.h" 110*2d60b848STomohiro Kusumi //#include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h 111*2d60b848STomohiro Kusumi 112*2d60b848STomohiro Kusumi 113*2d60b848STomohiro Kusumi //Declaration for kmalloc functions 114*2d60b848STomohiro Kusumi /* 115*2d60b848STomohiro Kusumi static MALLOC_DEFINE(C_HASHTABLE, "comphashtable", 116*2d60b848STomohiro Kusumi "A hash table used by LZ4 compression function."); 117*2d60b848STomohiro Kusumi */ 118*2d60b848STomohiro Kusumi 119*2d60b848STomohiro Kusumi 120*2d60b848STomohiro Kusumi //************************************** 121*2d60b848STomohiro Kusumi // Basic Types 122*2d60b848STomohiro Kusumi //************************************** 123*2d60b848STomohiro Kusumi #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 124*2d60b848STomohiro Kusumi # include <sys/stdint.h> 125*2d60b848STomohiro Kusumi typedef uint8_t BYTE; 126*2d60b848STomohiro Kusumi typedef uint16_t U16; 127*2d60b848STomohiro Kusumi typedef uint32_t U32; 128*2d60b848STomohiro Kusumi typedef int32_t S32; 129*2d60b848STomohiro Kusumi typedef uint64_t U64; 130*2d60b848STomohiro Kusumi #else 131*2d60b848STomohiro Kusumi typedef unsigned char BYTE; 132*2d60b848STomohiro Kusumi typedef unsigned short U16; 133*2d60b848STomohiro Kusumi typedef unsigned int U32; 134*2d60b848STomohiro Kusumi typedef signed int S32; 135*2d60b848STomohiro Kusumi typedef unsigned long long U64; 136*2d60b848STomohiro Kusumi #endif 137*2d60b848STomohiro Kusumi 138*2d60b848STomohiro Kusumi #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 139*2d60b848STomohiro Kusumi # define _PACKED __attribute__ ((packed)) 140*2d60b848STomohiro Kusumi #else 141*2d60b848STomohiro Kusumi # define _PACKED 142*2d60b848STomohiro Kusumi #endif 143*2d60b848STomohiro Kusumi 144*2d60b848STomohiro Kusumi #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 145*2d60b848STomohiro Kusumi # pragma pack(push, 1) 146*2d60b848STomohiro Kusumi #endif 147*2d60b848STomohiro Kusumi 148*2d60b848STomohiro Kusumi typedef struct _U16_S { U16 v; } _PACKED U16_S; 149*2d60b848STomohiro Kusumi typedef struct _U32_S { U32 v; } _PACKED U32_S; 150*2d60b848STomohiro Kusumi typedef struct _U64_S { U64 v; } _PACKED U64_S; 151*2d60b848STomohiro Kusumi 152*2d60b848STomohiro Kusumi #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 153*2d60b848STomohiro Kusumi # pragma pack(pop) 154*2d60b848STomohiro Kusumi #endif 155*2d60b848STomohiro Kusumi 156*2d60b848STomohiro Kusumi #define A64(x) (((U64_S *)(x))->v) 157*2d60b848STomohiro Kusumi #define A32(x) (((U32_S *)(x))->v) 158*2d60b848STomohiro Kusumi #define A16(x) (((U16_S *)(x))->v) 159*2d60b848STomohiro Kusumi 160*2d60b848STomohiro Kusumi 161*2d60b848STomohiro Kusumi //************************************** 162*2d60b848STomohiro Kusumi // Constants 163*2d60b848STomohiro Kusumi //************************************** 164*2d60b848STomohiro Kusumi #define HASHTABLESIZE (1 << MEMORY_USAGE) 165*2d60b848STomohiro Kusumi 166*2d60b848STomohiro Kusumi #define MINMATCH 4 167*2d60b848STomohiro Kusumi 168*2d60b848STomohiro Kusumi #define COPYLENGTH 8 169*2d60b848STomohiro Kusumi #define LASTLITERALS 5 170*2d60b848STomohiro Kusumi #define MFLIMIT (COPYLENGTH+MINMATCH) 171*2d60b848STomohiro Kusumi #define MINLENGTH (MFLIMIT+1) 172*2d60b848STomohiro Kusumi 173*2d60b848STomohiro Kusumi #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1)) 174*2d60b848STomohiro Kusumi #define SKIPSTRENGTH 6 175*2d60b848STomohiro Kusumi // Increasing this value will make the compression run slower on 176*2d60b848STomohiro Kusumi // incompressible data 177*2d60b848STomohiro Kusumi 178*2d60b848STomohiro Kusumi #define MAXD_LOG 16 179*2d60b848STomohiro Kusumi #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 180*2d60b848STomohiro Kusumi 181*2d60b848STomohiro Kusumi #define ML_BITS 4 182*2d60b848STomohiro Kusumi #define ML_MASK ((1U<<ML_BITS)-1) 183*2d60b848STomohiro Kusumi #define RUN_BITS (8-ML_BITS) 184*2d60b848STomohiro Kusumi #define RUN_MASK ((1U<<RUN_BITS)-1) 185*2d60b848STomohiro Kusumi 186*2d60b848STomohiro Kusumi 187*2d60b848STomohiro Kusumi //************************************** 188*2d60b848STomohiro Kusumi // Architecture-specific macros 189*2d60b848STomohiro Kusumi //************************************** 190*2d60b848STomohiro Kusumi #if LZ4_ARCH64 // 64-bit 191*2d60b848STomohiro Kusumi # define STEPSIZE 8 192*2d60b848STomohiro Kusumi # define UARCH U64 193*2d60b848STomohiro Kusumi # define AARCH A64 194*2d60b848STomohiro Kusumi # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; 195*2d60b848STomohiro Kusumi # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) 196*2d60b848STomohiro Kusumi # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e) 197*2d60b848STomohiro Kusumi # define HTYPE U32 198*2d60b848STomohiro Kusumi # define INITBASE(base) BYTE* base = ip 199*2d60b848STomohiro Kusumi #else // 32-bit 200*2d60b848STomohiro Kusumi # define STEPSIZE 4 201*2d60b848STomohiro Kusumi # define UARCH U32 202*2d60b848STomohiro Kusumi # define AARCH A32 203*2d60b848STomohiro Kusumi # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; 204*2d60b848STomohiro Kusumi # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); 205*2d60b848STomohiro Kusumi # define LZ4_SECURECOPY LZ4_WILDCOPY 206*2d60b848STomohiro Kusumi # define HTYPE BYTE* 207*2d60b848STomohiro Kusumi # define INITBASE(base) int base = 0 208*2d60b848STomohiro Kusumi #endif 209*2d60b848STomohiro Kusumi 210*2d60b848STomohiro Kusumi #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 211*2d60b848STomohiro Kusumi # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); 212*2d60b848STomohiro Kusumi v = lz4_bswap16(v); 213*2d60b848STomohiro Kusumi d = (s) - v; } 214*2d60b848STomohiro Kusumi # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); 215*2d60b848STomohiro Kusumi v = lz4_bswap16(v); 216*2d60b848STomohiro Kusumi A16(p) = v; 217*2d60b848STomohiro Kusumi p+=2; } 218*2d60b848STomohiro Kusumi #else // Little Endian 219*2d60b848STomohiro Kusumi # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } 220*2d60b848STomohiro Kusumi # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } 221*2d60b848STomohiro Kusumi #endif 222*2d60b848STomohiro Kusumi 223*2d60b848STomohiro Kusumi 224*2d60b848STomohiro Kusumi //************************************** 225*2d60b848STomohiro Kusumi // Macros 226*2d60b848STomohiro Kusumi //************************************** 227*2d60b848STomohiro Kusumi #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); 228*2d60b848STomohiro Kusumi #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; } 229*2d60b848STomohiro Kusumi 230*2d60b848STomohiro Kusumi 231*2d60b848STomohiro Kusumi //**************************** 232*2d60b848STomohiro Kusumi // Private functions 233*2d60b848STomohiro Kusumi //**************************** 234*2d60b848STomohiro Kusumi #if LZ4_ARCH64 235*2d60b848STomohiro Kusumi 236*2d60b848STomohiro Kusumi static 237*2d60b848STomohiro Kusumi inline 238*2d60b848STomohiro Kusumi int 239*2d60b848STomohiro Kusumi LZ4_NbCommonBytes (register U64 val) 240*2d60b848STomohiro Kusumi { 241*2d60b848STomohiro Kusumi #if defined(LZ4_BIG_ENDIAN) 242*2d60b848STomohiro Kusumi #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 243*2d60b848STomohiro Kusumi unsigned long r = 0; 244*2d60b848STomohiro Kusumi _BitScanReverse64( &r, val ); 245*2d60b848STomohiro Kusumi return (int)(r>>3); 246*2d60b848STomohiro Kusumi #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 247*2d60b848STomohiro Kusumi return (__builtin_clzll(val) >> 3); 248*2d60b848STomohiro Kusumi #else 249*2d60b848STomohiro Kusumi int r; 250*2d60b848STomohiro Kusumi if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 251*2d60b848STomohiro Kusumi if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 252*2d60b848STomohiro Kusumi r += (!val); 253*2d60b848STomohiro Kusumi return r; 254*2d60b848STomohiro Kusumi #endif 255*2d60b848STomohiro Kusumi #else 256*2d60b848STomohiro Kusumi #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 257*2d60b848STomohiro Kusumi unsigned long r = 0; 258*2d60b848STomohiro Kusumi _BitScanForward64( &r, val ); 259*2d60b848STomohiro Kusumi return (int)(r>>3); 260*2d60b848STomohiro Kusumi #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 261*2d60b848STomohiro Kusumi return (__builtin_ctzll(val) >> 3); 262*2d60b848STomohiro Kusumi #else 263*2d60b848STomohiro Kusumi static int DeBruijnBytePos[64] = { 264*2d60b848STomohiro Kusumi 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 265*2d60b848STomohiro Kusumi 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 266*2d60b848STomohiro Kusumi 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 267*2d60b848STomohiro Kusumi 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 268*2d60b848STomohiro Kusumi 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 269*2d60b848STomohiro Kusumi 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 270*2d60b848STomohiro Kusumi 7, 6, 7, 7 }; 271*2d60b848STomohiro Kusumi return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; 272*2d60b848STomohiro Kusumi #endif 273*2d60b848STomohiro Kusumi #endif 274*2d60b848STomohiro Kusumi } 275*2d60b848STomohiro Kusumi 276*2d60b848STomohiro Kusumi #else 277*2d60b848STomohiro Kusumi 278*2d60b848STomohiro Kusumi static 279*2d60b848STomohiro Kusumi inline 280*2d60b848STomohiro Kusumi int 281*2d60b848STomohiro Kusumi LZ4_NbCommonBytes (register U32 val) 282*2d60b848STomohiro Kusumi { 283*2d60b848STomohiro Kusumi #if defined(LZ4_BIG_ENDIAN) 284*2d60b848STomohiro Kusumi # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 285*2d60b848STomohiro Kusumi unsigned long r = 0; 286*2d60b848STomohiro Kusumi _BitScanReverse( &r, val ); 287*2d60b848STomohiro Kusumi return (int)(r>>3); 288*2d60b848STomohiro Kusumi # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 289*2d60b848STomohiro Kusumi return (__builtin_clz(val) >> 3); 290*2d60b848STomohiro Kusumi # else 291*2d60b848STomohiro Kusumi int r; 292*2d60b848STomohiro Kusumi if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 293*2d60b848STomohiro Kusumi r += (!val); 294*2d60b848STomohiro Kusumi return r; 295*2d60b848STomohiro Kusumi # endif 296*2d60b848STomohiro Kusumi #else 297*2d60b848STomohiro Kusumi # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 298*2d60b848STomohiro Kusumi unsigned long r; 299*2d60b848STomohiro Kusumi _BitScanForward( &r, val ); 300*2d60b848STomohiro Kusumi return (int)(r>>3); 301*2d60b848STomohiro Kusumi # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 302*2d60b848STomohiro Kusumi return (__builtin_ctz(val) >> 3); 303*2d60b848STomohiro Kusumi # else 304*2d60b848STomohiro Kusumi static int DeBruijnBytePos[32] = { 305*2d60b848STomohiro Kusumi 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 306*2d60b848STomohiro Kusumi 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 307*2d60b848STomohiro Kusumi 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 308*2d60b848STomohiro Kusumi 1, 1 }; 309*2d60b848STomohiro Kusumi return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 310*2d60b848STomohiro Kusumi # endif 311*2d60b848STomohiro Kusumi #endif 312*2d60b848STomohiro Kusumi } 313*2d60b848STomohiro Kusumi 314*2d60b848STomohiro Kusumi #endif 315*2d60b848STomohiro Kusumi 316*2d60b848STomohiro Kusumi 317*2d60b848STomohiro Kusumi 318*2d60b848STomohiro Kusumi //****************************** 319*2d60b848STomohiro Kusumi // Compression functions 320*2d60b848STomohiro Kusumi //****************************** 321*2d60b848STomohiro Kusumi 322*2d60b848STomohiro Kusumi #include "hammer2_lz4_encoder.h" 323*2d60b848STomohiro Kusumi 324*2d60b848STomohiro Kusumi /* 325*2d60b848STomohiro Kusumi void* LZ4_createHeapMemory(); 326*2d60b848STomohiro Kusumi int LZ4_freeHeapMemory(void* ctx); 327*2d60b848STomohiro Kusumi 328*2d60b848STomohiro Kusumi Used to allocate and free hashTable memory 329*2d60b848STomohiro Kusumi to be used by the LZ4_compress_heap* family of functions. 330*2d60b848STomohiro Kusumi LZ4_createHeapMemory() returns NULL is memory allocation fails. 331*2d60b848STomohiro Kusumi */ 332*2d60b848STomohiro Kusumi void* 333*2d60b848STomohiro Kusumi LZ4_create(void) 334*2d60b848STomohiro Kusumi { 335*2d60b848STomohiro Kusumi return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT); 336*2d60b848STomohiro Kusumi } 337*2d60b848STomohiro Kusumi 338*2d60b848STomohiro Kusumi int 339*2d60b848STomohiro Kusumi LZ4_free(void* ctx) 340*2d60b848STomohiro Kusumi { 341*2d60b848STomohiro Kusumi kfree(ctx, C_HASHTABLE); 342*2d60b848STomohiro Kusumi return 0; 343*2d60b848STomohiro Kusumi } 344*2d60b848STomohiro Kusumi 345*2d60b848STomohiro Kusumi int 346*2d60b848STomohiro Kusumi LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize) 347*2d60b848STomohiro Kusumi { 348*2d60b848STomohiro Kusumi void* ctx = LZ4_create(); 349*2d60b848STomohiro Kusumi int result; 350*2d60b848STomohiro Kusumi if (ctx == NULL) return 0; // Failed allocation => compression not done 351*2d60b848STomohiro Kusumi if (inputSize < LZ4_64KLIMIT) 352*2d60b848STomohiro Kusumi result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, 353*2d60b848STomohiro Kusumi inputSize, maxOutputSize); 354*2d60b848STomohiro Kusumi else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, 355*2d60b848STomohiro Kusumi inputSize, maxOutputSize); 356*2d60b848STomohiro Kusumi LZ4_free(ctx); 357*2d60b848STomohiro Kusumi return result; 358*2d60b848STomohiro Kusumi } 359*2d60b848STomohiro Kusumi 360*2d60b848STomohiro Kusumi 361*2d60b848STomohiro Kusumi //**************************** 362*2d60b848STomohiro Kusumi // Decompression functions 363*2d60b848STomohiro Kusumi //**************************** 364*2d60b848STomohiro Kusumi 365*2d60b848STomohiro Kusumi typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; 366*2d60b848STomohiro Kusumi typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive; 367*2d60b848STomohiro Kusumi typedef enum { full = 0, partial = 1 } exit_directive; 368*2d60b848STomohiro Kusumi 369*2d60b848STomohiro Kusumi 370*2d60b848STomohiro Kusumi // This generic decompression function cover all use cases. 371*2d60b848STomohiro Kusumi // It shall be instanciated several times, using different sets of directives 372*2d60b848STomohiro Kusumi // Note that it is essential this generic function is really inlined, 373*2d60b848STomohiro Kusumi // in order to remove useless branches during compilation optimisation. 374*2d60b848STomohiro Kusumi static 375*2d60b848STomohiro Kusumi inline 376*2d60b848STomohiro Kusumi int LZ4_decompress_generic( 377*2d60b848STomohiro Kusumi char* source, 378*2d60b848STomohiro Kusumi char* dest, 379*2d60b848STomohiro Kusumi int inputSize, // 380*2d60b848STomohiro Kusumi int outputSize, 381*2d60b848STomohiro Kusumi // OutputSize must be != 0; if endOnInput==endOnInputSize, 382*2d60b848STomohiro Kusumi // this value is the max size of Output Buffer. 383*2d60b848STomohiro Kusumi 384*2d60b848STomohiro Kusumi int endOnInput, // endOnOutputSize, endOnInputSize 385*2d60b848STomohiro Kusumi int prefix64k, // noPrefix, withPrefix 386*2d60b848STomohiro Kusumi int partialDecoding, // full, partial 387*2d60b848STomohiro Kusumi int targetOutputSize // only used if partialDecoding==partial 388*2d60b848STomohiro Kusumi ) 389*2d60b848STomohiro Kusumi { 390*2d60b848STomohiro Kusumi // Local Variables 391*2d60b848STomohiro Kusumi BYTE* restrict ip = (BYTE*) source; 392*2d60b848STomohiro Kusumi BYTE* ref; 393*2d60b848STomohiro Kusumi BYTE* iend = ip + inputSize; 394*2d60b848STomohiro Kusumi 395*2d60b848STomohiro Kusumi BYTE* op = (BYTE*) dest; 396*2d60b848STomohiro Kusumi BYTE* oend = op + outputSize; 397*2d60b848STomohiro Kusumi BYTE* cpy; 398*2d60b848STomohiro Kusumi BYTE* oexit = op + targetOutputSize; 399*2d60b848STomohiro Kusumi 400*2d60b848STomohiro Kusumi size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 401*2d60b848STomohiro Kusumi #if LZ4_ARCH64 402*2d60b848STomohiro Kusumi size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 403*2d60b848STomohiro Kusumi #endif 404*2d60b848STomohiro Kusumi 405*2d60b848STomohiro Kusumi 406*2d60b848STomohiro Kusumi // Special case 407*2d60b848STomohiro Kusumi if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; 408*2d60b848STomohiro Kusumi // targetOutputSize too large, better decode everything 409*2d60b848STomohiro Kusumi if unlikely(outputSize==0) goto _output_error; 410*2d60b848STomohiro Kusumi // Empty output buffer 411*2d60b848STomohiro Kusumi 412*2d60b848STomohiro Kusumi 413*2d60b848STomohiro Kusumi // Main Loop 414*2d60b848STomohiro Kusumi while (1) 415*2d60b848STomohiro Kusumi { 416*2d60b848STomohiro Kusumi unsigned token; 417*2d60b848STomohiro Kusumi size_t length; 418*2d60b848STomohiro Kusumi 419*2d60b848STomohiro Kusumi // get runlength 420*2d60b848STomohiro Kusumi token = *ip++; 421*2d60b848STomohiro Kusumi if ((length=(token>>ML_BITS)) == RUN_MASK) 422*2d60b848STomohiro Kusumi { 423*2d60b848STomohiro Kusumi unsigned s=255; 424*2d60b848STomohiro Kusumi while (((endOnInput)?ip<iend:1) && (s==255)) 425*2d60b848STomohiro Kusumi { 426*2d60b848STomohiro Kusumi s = *ip++; 427*2d60b848STomohiro Kusumi length += s; 428*2d60b848STomohiro Kusumi } 429*2d60b848STomohiro Kusumi } 430*2d60b848STomohiro Kusumi 431*2d60b848STomohiro Kusumi // copy literals 432*2d60b848STomohiro Kusumi cpy = op+length; 433*2d60b848STomohiro Kusumi if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) 434*2d60b848STomohiro Kusumi || (ip+length>iend-(2+1+LASTLITERALS))) ) 435*2d60b848STomohiro Kusumi || ((!endOnInput) && (cpy>oend-COPYLENGTH))) 436*2d60b848STomohiro Kusumi { 437*2d60b848STomohiro Kusumi if (partialDecoding) 438*2d60b848STomohiro Kusumi { 439*2d60b848STomohiro Kusumi if (cpy > oend) goto _output_error; 440*2d60b848STomohiro Kusumi // Error : write attempt beyond end of output buffer 441*2d60b848STomohiro Kusumi if ((endOnInput) && (ip+length > iend)) goto _output_error; 442*2d60b848STomohiro Kusumi // Error : read attempt beyond end of input buffer 443*2d60b848STomohiro Kusumi } 444*2d60b848STomohiro Kusumi else 445*2d60b848STomohiro Kusumi { 446*2d60b848STomohiro Kusumi if ((!endOnInput) && (cpy != oend)) goto _output_error; 447*2d60b848STomohiro Kusumi // Error : block decoding must stop exactly there, 448*2d60b848STomohiro Kusumi // due to parsing restrictions 449*2d60b848STomohiro Kusumi if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) 450*2d60b848STomohiro Kusumi goto _output_error; 451*2d60b848STomohiro Kusumi // Error : not enough place for another match (min 4) + 5 literals 452*2d60b848STomohiro Kusumi } 453*2d60b848STomohiro Kusumi memcpy(op, ip, length); 454*2d60b848STomohiro Kusumi ip += length; 455*2d60b848STomohiro Kusumi op += length; 456*2d60b848STomohiro Kusumi break; 457*2d60b848STomohiro Kusumi // Necessarily EOF, due to parsing restrictions 458*2d60b848STomohiro Kusumi } 459*2d60b848STomohiro Kusumi LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; 460*2d60b848STomohiro Kusumi 461*2d60b848STomohiro Kusumi // get offset 462*2d60b848STomohiro Kusumi LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; 463*2d60b848STomohiro Kusumi if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest)) 464*2d60b848STomohiro Kusumi goto _output_error; // Error : offset outside destination buffer 465*2d60b848STomohiro Kusumi 466*2d60b848STomohiro Kusumi // get matchlength 467*2d60b848STomohiro Kusumi if ((length=(token&ML_MASK)) == ML_MASK) 468*2d60b848STomohiro Kusumi { 469*2d60b848STomohiro Kusumi while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1) 470*2d60b848STomohiro Kusumi // A minimum nb of input bytes must remain for LASTLITERALS + token 471*2d60b848STomohiro Kusumi { 472*2d60b848STomohiro Kusumi unsigned s = *ip++; 473*2d60b848STomohiro Kusumi length += s; 474*2d60b848STomohiro Kusumi if (s==255) continue; 475*2d60b848STomohiro Kusumi break; 476*2d60b848STomohiro Kusumi } 477*2d60b848STomohiro Kusumi } 478*2d60b848STomohiro Kusumi 479*2d60b848STomohiro Kusumi // copy repeated sequence 480*2d60b848STomohiro Kusumi if unlikely((op-ref)<STEPSIZE) 481*2d60b848STomohiro Kusumi { 482*2d60b848STomohiro Kusumi #if LZ4_ARCH64 483*2d60b848STomohiro Kusumi size_t dec64 = dec64table[op-ref]; 484*2d60b848STomohiro Kusumi #else 485*2d60b848STomohiro Kusumi const size_t dec64 = 0; 486*2d60b848STomohiro Kusumi #endif 487*2d60b848STomohiro Kusumi op[0] = ref[0]; 488*2d60b848STomohiro Kusumi op[1] = ref[1]; 489*2d60b848STomohiro Kusumi op[2] = ref[2]; 490*2d60b848STomohiro Kusumi op[3] = ref[3]; 491*2d60b848STomohiro Kusumi op += 4, ref += 4; ref -= dec32table[op-ref]; 492*2d60b848STomohiro Kusumi A32(op) = A32(ref); 493*2d60b848STomohiro Kusumi op += STEPSIZE-4; ref -= dec64; 494*2d60b848STomohiro Kusumi } else { LZ4_COPYSTEP(ref,op); } 495*2d60b848STomohiro Kusumi cpy = op + length - (STEPSIZE-4); 496*2d60b848STomohiro Kusumi 497*2d60b848STomohiro Kusumi if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) 498*2d60b848STomohiro Kusumi { 499*2d60b848STomohiro Kusumi if (cpy > oend-LASTLITERALS) goto _output_error; 500*2d60b848STomohiro Kusumi // Error : last 5 bytes must be literals 501*2d60b848STomohiro Kusumi LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); 502*2d60b848STomohiro Kusumi while(op<cpy) *op++=*ref++; 503*2d60b848STomohiro Kusumi op=cpy; 504*2d60b848STomohiro Kusumi continue; 505*2d60b848STomohiro Kusumi } 506*2d60b848STomohiro Kusumi LZ4_WILDCOPY(ref, op, cpy); 507*2d60b848STomohiro Kusumi op=cpy; // correction 508*2d60b848STomohiro Kusumi } 509*2d60b848STomohiro Kusumi 510*2d60b848STomohiro Kusumi // end of decoding 511*2d60b848STomohiro Kusumi if (endOnInput) 512*2d60b848STomohiro Kusumi return (int) (((char*)op)-dest); // Nb of output bytes decoded 513*2d60b848STomohiro Kusumi else 514*2d60b848STomohiro Kusumi return (int) (((char*)ip)-source); // Nb of input bytes read 515*2d60b848STomohiro Kusumi 516*2d60b848STomohiro Kusumi // Overflow error detected 517*2d60b848STomohiro Kusumi _output_error: 518*2d60b848STomohiro Kusumi return (int) (-(((char*)ip)-source))-1; 519*2d60b848STomohiro Kusumi } 520*2d60b848STomohiro Kusumi 521*2d60b848STomohiro Kusumi 522*2d60b848STomohiro Kusumi int 523*2d60b848STomohiro Kusumi LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize) 524*2d60b848STomohiro Kusumi { 525*2d60b848STomohiro Kusumi return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, 526*2d60b848STomohiro Kusumi endOnInputSize, noPrefix, full, 0); 527*2d60b848STomohiro Kusumi } 528