1*3aa7d58aSMatthew Dillon /* 2*3aa7d58aSMatthew Dillon LZ4 - Fast LZ compression algorithm 3*3aa7d58aSMatthew Dillon Copyright (C) 2011-2013, Yann Collet. 4*3aa7d58aSMatthew Dillon BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5*3aa7d58aSMatthew Dillon 6*3aa7d58aSMatthew Dillon Redistribution and use in source and binary forms, with or without 7*3aa7d58aSMatthew Dillon modification, are permitted provided that the following conditions are 8*3aa7d58aSMatthew Dillon met: 9*3aa7d58aSMatthew Dillon 10*3aa7d58aSMatthew Dillon * Redistributions of source code must retain the above copyright 11*3aa7d58aSMatthew Dillon notice, this list of conditions and the following disclaimer. 12*3aa7d58aSMatthew Dillon * Redistributions in binary form must reproduce the above 13*3aa7d58aSMatthew Dillon copyright notice, this list of conditions and the following disclaimer 14*3aa7d58aSMatthew Dillon in the documentation and/or other materials provided with the 15*3aa7d58aSMatthew Dillon distribution. 16*3aa7d58aSMatthew Dillon 17*3aa7d58aSMatthew Dillon THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18*3aa7d58aSMatthew Dillon "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19*3aa7d58aSMatthew Dillon LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20*3aa7d58aSMatthew Dillon A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21*3aa7d58aSMatthew Dillon OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22*3aa7d58aSMatthew Dillon SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23*3aa7d58aSMatthew Dillon LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24*3aa7d58aSMatthew Dillon DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25*3aa7d58aSMatthew Dillon THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26*3aa7d58aSMatthew Dillon (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27*3aa7d58aSMatthew Dillon OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*3aa7d58aSMatthew Dillon 29*3aa7d58aSMatthew Dillon You can contact the author at : 30*3aa7d58aSMatthew Dillon - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31*3aa7d58aSMatthew Dillon - LZ4 source repository : http://code.google.com/p/lz4/ 32*3aa7d58aSMatthew Dillon */ 33*3aa7d58aSMatthew Dillon 34*3aa7d58aSMatthew Dillon /* 35*3aa7d58aSMatthew Dillon Note : this source file requires "hammer2_lz4_encoder.h" 36*3aa7d58aSMatthew Dillon */ 37*3aa7d58aSMatthew Dillon 38*3aa7d58aSMatthew Dillon //************************************** 39*3aa7d58aSMatthew Dillon // Tuning parameters 40*3aa7d58aSMatthew Dillon //************************************** 41*3aa7d58aSMatthew Dillon // MEMORY_USAGE : 42*3aa7d58aSMatthew Dillon // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB; 43*3aa7d58aSMatthew Dillon // 16 -> 64KB; 20 -> 1MB; etc.) 44*3aa7d58aSMatthew Dillon // Increasing memory usage improves compression ratio 45*3aa7d58aSMatthew Dillon // Reduced memory usage can improve speed, due to cache effect 46*3aa7d58aSMatthew Dillon // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 47*3aa7d58aSMatthew Dillon #define MEMORY_USAGE 14 48*3aa7d58aSMatthew Dillon 49*3aa7d58aSMatthew Dillon // HEAPMODE : 50*3aa7d58aSMatthew Dillon // Select how default compression function will allocate memory for its 51*3aa7d58aSMatthew Dillon // hash table, 52*3aa7d58aSMatthew Dillon // in memory stack (0:default, fastest), or in memory heap (1:requires 53*3aa7d58aSMatthew Dillon // memory allocation (malloc)). 54*3aa7d58aSMatthew Dillon // Default allocation strategy is to use stack (HEAPMODE 0) 55*3aa7d58aSMatthew Dillon // Note : explicit functions *_stack* and *_heap* are unaffected by this setting 56*3aa7d58aSMatthew Dillon #define HEAPMODE 1 57*3aa7d58aSMatthew Dillon 58*3aa7d58aSMatthew Dillon // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : 59*3aa7d58aSMatthew Dillon // This will provide a small boost to performance for big endian cpu, 60*3aa7d58aSMatthew Dillon // but the resulting compressed stream will be incompatible with little-endian CPU. 61*3aa7d58aSMatthew Dillon // You can set this option to 1 in situations where data will remain within 62*3aa7d58aSMatthew Dillon // closed environment 63*3aa7d58aSMatthew Dillon // This option is useless on Little_Endian CPU (such as x86) 64*3aa7d58aSMatthew Dillon //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 65*3aa7d58aSMatthew Dillon 66*3aa7d58aSMatthew Dillon 67*3aa7d58aSMatthew Dillon //************************************** 68*3aa7d58aSMatthew Dillon // CPU Feature Detection 69*3aa7d58aSMatthew Dillon //************************************** 70*3aa7d58aSMatthew Dillon // 32 or 64 bits ? 71*3aa7d58aSMatthew Dillon #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode 72*3aa7d58aSMatthew Dillon # define LZ4_ARCH64 1 73*3aa7d58aSMatthew Dillon #else 74*3aa7d58aSMatthew Dillon # define LZ4_ARCH64 0 75*3aa7d58aSMatthew Dillon #endif 76*3aa7d58aSMatthew Dillon 77*3aa7d58aSMatthew Dillon //This reduced library code is only Little Endian compatible, 78*3aa7d58aSMatthew Dillon //if the need arises, please look for the appropriate defines in the 79*3aa7d58aSMatthew Dillon //original complete LZ4 library. 80*3aa7d58aSMatthew Dillon //Same is true for unaligned memory access which is enabled by default, 81*3aa7d58aSMatthew Dillon //hardware bit count, also enabled by default, and Microsoft/Visual 82*3aa7d58aSMatthew Dillon //Studio compilers. 83*3aa7d58aSMatthew Dillon 84*3aa7d58aSMatthew Dillon //************************************** 85*3aa7d58aSMatthew Dillon // Compiler Options 86*3aa7d58aSMatthew Dillon //************************************** 87*3aa7d58aSMatthew Dillon #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 88*3aa7d58aSMatthew Dillon /* "restrict" is a known keyword */ 89*3aa7d58aSMatthew Dillon #else 90*3aa7d58aSMatthew Dillon # define restrict // Disable restrict 91*3aa7d58aSMatthew Dillon #endif 92*3aa7d58aSMatthew Dillon 93*3aa7d58aSMatthew Dillon #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 94*3aa7d58aSMatthew Dillon 95*3aa7d58aSMatthew Dillon #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 96*3aa7d58aSMatthew Dillon # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 97*3aa7d58aSMatthew Dillon #else 98*3aa7d58aSMatthew Dillon # define expect(expr,value) (expr) 99*3aa7d58aSMatthew Dillon #endif 100*3aa7d58aSMatthew Dillon 101*3aa7d58aSMatthew Dillon #define likely(expr) expect((expr) != 0, 1) 102*3aa7d58aSMatthew Dillon #define unlikely(expr) expect((expr) != 0, 0) 103*3aa7d58aSMatthew Dillon 104*3aa7d58aSMatthew Dillon 105*3aa7d58aSMatthew Dillon //************************************** 106*3aa7d58aSMatthew Dillon // Includes 107*3aa7d58aSMatthew Dillon //************************************** 108*3aa7d58aSMatthew Dillon #include <stdio.h> 109*3aa7d58aSMatthew Dillon #include <stdlib.h> 110*3aa7d58aSMatthew Dillon #include <unistd.h> 111*3aa7d58aSMatthew Dillon #include <string.h> 112*3aa7d58aSMatthew Dillon 113*3aa7d58aSMatthew Dillon #include "hammer2_lz4.h" 114*3aa7d58aSMatthew Dillon 115*3aa7d58aSMatthew Dillon //************************************** 116*3aa7d58aSMatthew Dillon // Basic Types 117*3aa7d58aSMatthew Dillon //************************************** 118*3aa7d58aSMatthew Dillon #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 119*3aa7d58aSMatthew Dillon # include <sys/stdint.h> 120*3aa7d58aSMatthew Dillon typedef uint8_t BYTE; 121*3aa7d58aSMatthew Dillon typedef uint16_t U16; 122*3aa7d58aSMatthew Dillon typedef uint32_t U32; 123*3aa7d58aSMatthew Dillon typedef int32_t S32; 124*3aa7d58aSMatthew Dillon typedef uint64_t U64; 125*3aa7d58aSMatthew Dillon #else 126*3aa7d58aSMatthew Dillon typedef unsigned char BYTE; 127*3aa7d58aSMatthew Dillon typedef unsigned short U16; 128*3aa7d58aSMatthew Dillon typedef unsigned int U32; 129*3aa7d58aSMatthew Dillon typedef signed int S32; 130*3aa7d58aSMatthew Dillon typedef unsigned long long U64; 131*3aa7d58aSMatthew Dillon #endif 132*3aa7d58aSMatthew Dillon 133*3aa7d58aSMatthew Dillon #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 134*3aa7d58aSMatthew Dillon # define _PACKED __attribute__ ((packed)) 135*3aa7d58aSMatthew Dillon #else 136*3aa7d58aSMatthew Dillon # define _PACKED 137*3aa7d58aSMatthew Dillon #endif 138*3aa7d58aSMatthew Dillon 139*3aa7d58aSMatthew Dillon #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 140*3aa7d58aSMatthew Dillon # pragma pack(push, 1) 141*3aa7d58aSMatthew Dillon #endif 142*3aa7d58aSMatthew Dillon 143*3aa7d58aSMatthew Dillon typedef struct _U16_S { U16 v; } _PACKED U16_S; 144*3aa7d58aSMatthew Dillon typedef struct _U32_S { U32 v; } _PACKED U32_S; 145*3aa7d58aSMatthew Dillon typedef struct _U64_S { U64 v; } _PACKED U64_S; 146*3aa7d58aSMatthew Dillon 147*3aa7d58aSMatthew Dillon #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 148*3aa7d58aSMatthew Dillon # pragma pack(pop) 149*3aa7d58aSMatthew Dillon #endif 150*3aa7d58aSMatthew Dillon 151*3aa7d58aSMatthew Dillon #define A64(x) (((U64_S *)(x))->v) 152*3aa7d58aSMatthew Dillon #define A32(x) (((U32_S *)(x))->v) 153*3aa7d58aSMatthew Dillon #define A16(x) (((U16_S *)(x))->v) 154*3aa7d58aSMatthew Dillon 155*3aa7d58aSMatthew Dillon 156*3aa7d58aSMatthew Dillon //************************************** 157*3aa7d58aSMatthew Dillon // Constants 158*3aa7d58aSMatthew Dillon //************************************** 159*3aa7d58aSMatthew Dillon #define HASHTABLESIZE (1 << MEMORY_USAGE) 160*3aa7d58aSMatthew Dillon 161*3aa7d58aSMatthew Dillon #define MINMATCH 4 162*3aa7d58aSMatthew Dillon 163*3aa7d58aSMatthew Dillon #define COPYLENGTH 8 164*3aa7d58aSMatthew Dillon #define LASTLITERALS 5 165*3aa7d58aSMatthew Dillon #define MFLIMIT (COPYLENGTH+MINMATCH) 166*3aa7d58aSMatthew Dillon #define MINLENGTH (MFLIMIT+1) 167*3aa7d58aSMatthew Dillon 168*3aa7d58aSMatthew Dillon #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1)) 169*3aa7d58aSMatthew Dillon #define SKIPSTRENGTH 6 170*3aa7d58aSMatthew Dillon // Increasing this value will make the compression run slower on 171*3aa7d58aSMatthew Dillon // incompressible data 172*3aa7d58aSMatthew Dillon 173*3aa7d58aSMatthew Dillon #define MAXD_LOG 16 174*3aa7d58aSMatthew Dillon #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 175*3aa7d58aSMatthew Dillon 176*3aa7d58aSMatthew Dillon #define ML_BITS 4 177*3aa7d58aSMatthew Dillon #define ML_MASK ((1U<<ML_BITS)-1) 178*3aa7d58aSMatthew Dillon #define RUN_BITS (8-ML_BITS) 179*3aa7d58aSMatthew Dillon #define RUN_MASK ((1U<<RUN_BITS)-1) 180*3aa7d58aSMatthew Dillon 181*3aa7d58aSMatthew Dillon 182*3aa7d58aSMatthew Dillon //************************************** 183*3aa7d58aSMatthew Dillon // Architecture-specific macros 184*3aa7d58aSMatthew Dillon //************************************** 185*3aa7d58aSMatthew Dillon #if LZ4_ARCH64 // 64-bit 186*3aa7d58aSMatthew Dillon # define STEPSIZE 8 187*3aa7d58aSMatthew Dillon # define UARCH U64 188*3aa7d58aSMatthew Dillon # define AARCH A64 189*3aa7d58aSMatthew Dillon # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; 190*3aa7d58aSMatthew Dillon # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) 191*3aa7d58aSMatthew Dillon # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e) 192*3aa7d58aSMatthew Dillon # define HTYPE U32 193*3aa7d58aSMatthew Dillon # define INITBASE(base) BYTE* base = ip 194*3aa7d58aSMatthew Dillon #else // 32-bit 195*3aa7d58aSMatthew Dillon # define STEPSIZE 4 196*3aa7d58aSMatthew Dillon # define UARCH U32 197*3aa7d58aSMatthew Dillon # define AARCH A32 198*3aa7d58aSMatthew Dillon # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; 199*3aa7d58aSMatthew Dillon # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); 200*3aa7d58aSMatthew Dillon # define LZ4_SECURECOPY LZ4_WILDCOPY 201*3aa7d58aSMatthew Dillon # define HTYPE BYTE* 202*3aa7d58aSMatthew Dillon # define INITBASE(base) int base = 0 203*3aa7d58aSMatthew Dillon #endif 204*3aa7d58aSMatthew Dillon 205*3aa7d58aSMatthew Dillon #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 206*3aa7d58aSMatthew Dillon # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); 207*3aa7d58aSMatthew Dillon v = lz4_bswap16(v); 208*3aa7d58aSMatthew Dillon d = (s) - v; } 209*3aa7d58aSMatthew Dillon # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); 210*3aa7d58aSMatthew Dillon v = lz4_bswap16(v); 211*3aa7d58aSMatthew Dillon A16(p) = v; 212*3aa7d58aSMatthew Dillon p+=2; } 213*3aa7d58aSMatthew Dillon #else // Little Endian 214*3aa7d58aSMatthew Dillon # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } 215*3aa7d58aSMatthew Dillon # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } 216*3aa7d58aSMatthew Dillon #endif 217*3aa7d58aSMatthew Dillon 218*3aa7d58aSMatthew Dillon 219*3aa7d58aSMatthew Dillon //************************************** 220*3aa7d58aSMatthew Dillon // Macros 221*3aa7d58aSMatthew Dillon //************************************** 222*3aa7d58aSMatthew Dillon #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); 223*3aa7d58aSMatthew Dillon #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; } 224*3aa7d58aSMatthew Dillon 225*3aa7d58aSMatthew Dillon 226*3aa7d58aSMatthew Dillon //**************************** 227*3aa7d58aSMatthew Dillon // Private functions 228*3aa7d58aSMatthew Dillon //**************************** 229*3aa7d58aSMatthew Dillon #if LZ4_ARCH64 230*3aa7d58aSMatthew Dillon 231*3aa7d58aSMatthew Dillon static 232*3aa7d58aSMatthew Dillon inline 233*3aa7d58aSMatthew Dillon int 234*3aa7d58aSMatthew Dillon LZ4_NbCommonBytes (register U64 val) 235*3aa7d58aSMatthew Dillon { 236*3aa7d58aSMatthew Dillon #if defined(LZ4_BIG_ENDIAN) 237*3aa7d58aSMatthew Dillon #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 238*3aa7d58aSMatthew Dillon unsigned long r = 0; 239*3aa7d58aSMatthew Dillon _BitScanReverse64( &r, val ); 240*3aa7d58aSMatthew Dillon return (int)(r>>3); 241*3aa7d58aSMatthew Dillon #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 242*3aa7d58aSMatthew Dillon return (__builtin_clzll(val) >> 3); 243*3aa7d58aSMatthew Dillon #else 244*3aa7d58aSMatthew Dillon int r; 245*3aa7d58aSMatthew Dillon if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 246*3aa7d58aSMatthew Dillon if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 247*3aa7d58aSMatthew Dillon r += (!val); 248*3aa7d58aSMatthew Dillon return r; 249*3aa7d58aSMatthew Dillon #endif 250*3aa7d58aSMatthew Dillon #else 251*3aa7d58aSMatthew Dillon #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 252*3aa7d58aSMatthew Dillon unsigned long r = 0; 253*3aa7d58aSMatthew Dillon _BitScanForward64( &r, val ); 254*3aa7d58aSMatthew Dillon return (int)(r>>3); 255*3aa7d58aSMatthew Dillon #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 256*3aa7d58aSMatthew Dillon return (__builtin_ctzll(val) >> 3); 257*3aa7d58aSMatthew Dillon #else 258*3aa7d58aSMatthew Dillon static int DeBruijnBytePos[64] = { 259*3aa7d58aSMatthew Dillon 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 260*3aa7d58aSMatthew Dillon 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 261*3aa7d58aSMatthew Dillon 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 262*3aa7d58aSMatthew Dillon 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 263*3aa7d58aSMatthew Dillon 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 264*3aa7d58aSMatthew Dillon 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 265*3aa7d58aSMatthew Dillon 7, 6, 7, 7 }; 266*3aa7d58aSMatthew Dillon return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; 267*3aa7d58aSMatthew Dillon #endif 268*3aa7d58aSMatthew Dillon #endif 269*3aa7d58aSMatthew Dillon } 270*3aa7d58aSMatthew Dillon 271*3aa7d58aSMatthew Dillon #else 272*3aa7d58aSMatthew Dillon 273*3aa7d58aSMatthew Dillon static 274*3aa7d58aSMatthew Dillon inline 275*3aa7d58aSMatthew Dillon int 276*3aa7d58aSMatthew Dillon LZ4_NbCommonBytes (register U32 val) 277*3aa7d58aSMatthew Dillon { 278*3aa7d58aSMatthew Dillon #if defined(LZ4_BIG_ENDIAN) 279*3aa7d58aSMatthew Dillon # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 280*3aa7d58aSMatthew Dillon unsigned long r = 0; 281*3aa7d58aSMatthew Dillon _BitScanReverse( &r, val ); 282*3aa7d58aSMatthew Dillon return (int)(r>>3); 283*3aa7d58aSMatthew Dillon # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 284*3aa7d58aSMatthew Dillon return (__builtin_clz(val) >> 3); 285*3aa7d58aSMatthew Dillon # else 286*3aa7d58aSMatthew Dillon int r; 287*3aa7d58aSMatthew Dillon if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 288*3aa7d58aSMatthew Dillon r += (!val); 289*3aa7d58aSMatthew Dillon return r; 290*3aa7d58aSMatthew Dillon # endif 291*3aa7d58aSMatthew Dillon #else 292*3aa7d58aSMatthew Dillon # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 293*3aa7d58aSMatthew Dillon unsigned long r; 294*3aa7d58aSMatthew Dillon _BitScanForward( &r, val ); 295*3aa7d58aSMatthew Dillon return (int)(r>>3); 296*3aa7d58aSMatthew Dillon # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 297*3aa7d58aSMatthew Dillon return (__builtin_ctz(val) >> 3); 298*3aa7d58aSMatthew Dillon # else 299*3aa7d58aSMatthew Dillon static int DeBruijnBytePos[32] = { 300*3aa7d58aSMatthew Dillon 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 301*3aa7d58aSMatthew Dillon 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 302*3aa7d58aSMatthew Dillon 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 303*3aa7d58aSMatthew Dillon 1, 1 }; 304*3aa7d58aSMatthew Dillon return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 305*3aa7d58aSMatthew Dillon # endif 306*3aa7d58aSMatthew Dillon #endif 307*3aa7d58aSMatthew Dillon } 308*3aa7d58aSMatthew Dillon 309*3aa7d58aSMatthew Dillon #endif 310*3aa7d58aSMatthew Dillon 311*3aa7d58aSMatthew Dillon 312*3aa7d58aSMatthew Dillon 313*3aa7d58aSMatthew Dillon //****************************** 314*3aa7d58aSMatthew Dillon // Compression functions 315*3aa7d58aSMatthew Dillon //****************************** 316*3aa7d58aSMatthew Dillon 317*3aa7d58aSMatthew Dillon #include "hammer2_lz4_encoder.h" 318*3aa7d58aSMatthew Dillon 319*3aa7d58aSMatthew Dillon /* 320*3aa7d58aSMatthew Dillon void* LZ4_createHeapMemory(); 321*3aa7d58aSMatthew Dillon int LZ4_freeHeapMemory(void* ctx); 322*3aa7d58aSMatthew Dillon 323*3aa7d58aSMatthew Dillon Used to allocate and free hashTable memory 324*3aa7d58aSMatthew Dillon to be used by the LZ4_compress_heap* family of functions. 325*3aa7d58aSMatthew Dillon LZ4_createHeapMemory() returns NULL is memory allocation fails. 326*3aa7d58aSMatthew Dillon */ 327*3aa7d58aSMatthew Dillon void* 328*3aa7d58aSMatthew Dillon LZ4_create(void) 329*3aa7d58aSMatthew Dillon { 330*3aa7d58aSMatthew Dillon return malloc(HASHTABLESIZE); 331*3aa7d58aSMatthew Dillon } 332*3aa7d58aSMatthew Dillon 333*3aa7d58aSMatthew Dillon int 334*3aa7d58aSMatthew Dillon LZ4_free(void* ctx) 335*3aa7d58aSMatthew Dillon { 336*3aa7d58aSMatthew Dillon free(ctx); 337*3aa7d58aSMatthew Dillon return 0; 338*3aa7d58aSMatthew Dillon } 339*3aa7d58aSMatthew Dillon 340*3aa7d58aSMatthew Dillon int 341*3aa7d58aSMatthew Dillon LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize) 342*3aa7d58aSMatthew Dillon { 343*3aa7d58aSMatthew Dillon void* ctx = LZ4_create(); 344*3aa7d58aSMatthew Dillon int result; 345*3aa7d58aSMatthew Dillon if (ctx == NULL) return 0; // Failed allocation => compression not done 346*3aa7d58aSMatthew Dillon if (inputSize < LZ4_64KLIMIT) 347*3aa7d58aSMatthew Dillon result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, 348*3aa7d58aSMatthew Dillon inputSize, maxOutputSize); 349*3aa7d58aSMatthew Dillon else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, 350*3aa7d58aSMatthew Dillon inputSize, maxOutputSize); 351*3aa7d58aSMatthew Dillon LZ4_free(ctx); 352*3aa7d58aSMatthew Dillon return result; 353*3aa7d58aSMatthew Dillon } 354*3aa7d58aSMatthew Dillon 355*3aa7d58aSMatthew Dillon 356*3aa7d58aSMatthew Dillon //**************************** 357*3aa7d58aSMatthew Dillon // Decompression functions 358*3aa7d58aSMatthew Dillon //**************************** 359*3aa7d58aSMatthew Dillon 360*3aa7d58aSMatthew Dillon typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; 361*3aa7d58aSMatthew Dillon typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive; 362*3aa7d58aSMatthew Dillon typedef enum { full = 0, partial = 1 } exit_directive; 363*3aa7d58aSMatthew Dillon 364*3aa7d58aSMatthew Dillon 365*3aa7d58aSMatthew Dillon // This generic decompression function cover all use cases. 366*3aa7d58aSMatthew Dillon // It shall be instanciated several times, using different sets of directives 367*3aa7d58aSMatthew Dillon // Note that it is essential this generic function is really inlined, 368*3aa7d58aSMatthew Dillon // in order to remove useless branches during compilation optimisation. 369*3aa7d58aSMatthew Dillon static 370*3aa7d58aSMatthew Dillon inline 371*3aa7d58aSMatthew Dillon int LZ4_decompress_generic( 372*3aa7d58aSMatthew Dillon char* source, 373*3aa7d58aSMatthew Dillon char* dest, 374*3aa7d58aSMatthew Dillon int inputSize, // 375*3aa7d58aSMatthew Dillon int outputSize, 376*3aa7d58aSMatthew Dillon // OutputSize must be != 0; if endOnInput==endOnInputSize, 377*3aa7d58aSMatthew Dillon // this value is the max size of Output Buffer. 378*3aa7d58aSMatthew Dillon 379*3aa7d58aSMatthew Dillon int endOnInput, // endOnOutputSize, endOnInputSize 380*3aa7d58aSMatthew Dillon int prefix64k, // noPrefix, withPrefix 381*3aa7d58aSMatthew Dillon int partialDecoding, // full, partial 382*3aa7d58aSMatthew Dillon int targetOutputSize // only used if partialDecoding==partial 383*3aa7d58aSMatthew Dillon ) 384*3aa7d58aSMatthew Dillon { 385*3aa7d58aSMatthew Dillon // Local Variables 386*3aa7d58aSMatthew Dillon BYTE* restrict ip = (BYTE*) source; 387*3aa7d58aSMatthew Dillon BYTE* ref; 388*3aa7d58aSMatthew Dillon BYTE* iend = ip + inputSize; 389*3aa7d58aSMatthew Dillon 390*3aa7d58aSMatthew Dillon BYTE* op = (BYTE*) dest; 391*3aa7d58aSMatthew Dillon BYTE* oend = op + outputSize; 392*3aa7d58aSMatthew Dillon BYTE* cpy; 393*3aa7d58aSMatthew Dillon BYTE* oexit = op + targetOutputSize; 394*3aa7d58aSMatthew Dillon 395*3aa7d58aSMatthew Dillon size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 396*3aa7d58aSMatthew Dillon #if LZ4_ARCH64 397*3aa7d58aSMatthew Dillon size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 398*3aa7d58aSMatthew Dillon #endif 399*3aa7d58aSMatthew Dillon 400*3aa7d58aSMatthew Dillon 401*3aa7d58aSMatthew Dillon // Special case 402*3aa7d58aSMatthew Dillon if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; 403*3aa7d58aSMatthew Dillon // targetOutputSize too large, better decode everything 404*3aa7d58aSMatthew Dillon if unlikely(outputSize==0) goto _output_error; 405*3aa7d58aSMatthew Dillon // Empty output buffer 406*3aa7d58aSMatthew Dillon 407*3aa7d58aSMatthew Dillon 408*3aa7d58aSMatthew Dillon // Main Loop 409*3aa7d58aSMatthew Dillon while (1) 410*3aa7d58aSMatthew Dillon { 411*3aa7d58aSMatthew Dillon unsigned token; 412*3aa7d58aSMatthew Dillon size_t length; 413*3aa7d58aSMatthew Dillon 414*3aa7d58aSMatthew Dillon // get runlength 415*3aa7d58aSMatthew Dillon token = *ip++; 416*3aa7d58aSMatthew Dillon if ((length=(token>>ML_BITS)) == RUN_MASK) 417*3aa7d58aSMatthew Dillon { 418*3aa7d58aSMatthew Dillon unsigned s=255; 419*3aa7d58aSMatthew Dillon while (((endOnInput)?ip<iend:1) && (s==255)) 420*3aa7d58aSMatthew Dillon { 421*3aa7d58aSMatthew Dillon s = *ip++; 422*3aa7d58aSMatthew Dillon length += s; 423*3aa7d58aSMatthew Dillon } 424*3aa7d58aSMatthew Dillon } 425*3aa7d58aSMatthew Dillon 426*3aa7d58aSMatthew Dillon // copy literals 427*3aa7d58aSMatthew Dillon cpy = op+length; 428*3aa7d58aSMatthew Dillon if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) 429*3aa7d58aSMatthew Dillon || (ip+length>iend-(2+1+LASTLITERALS))) ) 430*3aa7d58aSMatthew Dillon || ((!endOnInput) && (cpy>oend-COPYLENGTH))) 431*3aa7d58aSMatthew Dillon { 432*3aa7d58aSMatthew Dillon if (partialDecoding) 433*3aa7d58aSMatthew Dillon { 434*3aa7d58aSMatthew Dillon if (cpy > oend) goto _output_error; 435*3aa7d58aSMatthew Dillon // Error : write attempt beyond end of output buffer 436*3aa7d58aSMatthew Dillon if ((endOnInput) && (ip+length > iend)) goto _output_error; 437*3aa7d58aSMatthew Dillon // Error : read attempt beyond end of input buffer 438*3aa7d58aSMatthew Dillon } 439*3aa7d58aSMatthew Dillon else 440*3aa7d58aSMatthew Dillon { 441*3aa7d58aSMatthew Dillon if ((!endOnInput) && (cpy != oend)) goto _output_error; 442*3aa7d58aSMatthew Dillon // Error : block decoding must stop exactly there, 443*3aa7d58aSMatthew Dillon // due to parsing restrictions 444*3aa7d58aSMatthew Dillon if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) 445*3aa7d58aSMatthew Dillon goto _output_error; 446*3aa7d58aSMatthew Dillon // Error : not enough place for another match (min 4) + 5 literals 447*3aa7d58aSMatthew Dillon } 448*3aa7d58aSMatthew Dillon memcpy(op, ip, length); 449*3aa7d58aSMatthew Dillon ip += length; 450*3aa7d58aSMatthew Dillon op += length; 451*3aa7d58aSMatthew Dillon break; 452*3aa7d58aSMatthew Dillon // Necessarily EOF, due to parsing restrictions 453*3aa7d58aSMatthew Dillon } 454*3aa7d58aSMatthew Dillon LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; 455*3aa7d58aSMatthew Dillon 456*3aa7d58aSMatthew Dillon // get offset 457*3aa7d58aSMatthew Dillon LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; 458*3aa7d58aSMatthew Dillon if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest)) 459*3aa7d58aSMatthew Dillon goto _output_error; // Error : offset outside destination buffer 460*3aa7d58aSMatthew Dillon 461*3aa7d58aSMatthew Dillon // get matchlength 462*3aa7d58aSMatthew Dillon if ((length=(token&ML_MASK)) == ML_MASK) 463*3aa7d58aSMatthew Dillon { 464*3aa7d58aSMatthew Dillon while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1) 465*3aa7d58aSMatthew Dillon // A minimum nb of input bytes must remain for LASTLITERALS + token 466*3aa7d58aSMatthew Dillon { 467*3aa7d58aSMatthew Dillon unsigned s = *ip++; 468*3aa7d58aSMatthew Dillon length += s; 469*3aa7d58aSMatthew Dillon if (s==255) continue; 470*3aa7d58aSMatthew Dillon break; 471*3aa7d58aSMatthew Dillon } 472*3aa7d58aSMatthew Dillon } 473*3aa7d58aSMatthew Dillon 474*3aa7d58aSMatthew Dillon // copy repeated sequence 475*3aa7d58aSMatthew Dillon if unlikely((op-ref)<STEPSIZE) 476*3aa7d58aSMatthew Dillon { 477*3aa7d58aSMatthew Dillon #if LZ4_ARCH64 478*3aa7d58aSMatthew Dillon size_t dec64 = dec64table[op-ref]; 479*3aa7d58aSMatthew Dillon #else 480*3aa7d58aSMatthew Dillon const size_t dec64 = 0; 481*3aa7d58aSMatthew Dillon #endif 482*3aa7d58aSMatthew Dillon op[0] = ref[0]; 483*3aa7d58aSMatthew Dillon op[1] = ref[1]; 484*3aa7d58aSMatthew Dillon op[2] = ref[2]; 485*3aa7d58aSMatthew Dillon op[3] = ref[3]; 486*3aa7d58aSMatthew Dillon op += 4, ref += 4; ref -= dec32table[op-ref]; 487*3aa7d58aSMatthew Dillon A32(op) = A32(ref); 488*3aa7d58aSMatthew Dillon op += STEPSIZE-4; ref -= dec64; 489*3aa7d58aSMatthew Dillon } else { LZ4_COPYSTEP(ref,op); } 490*3aa7d58aSMatthew Dillon cpy = op + length - (STEPSIZE-4); 491*3aa7d58aSMatthew Dillon 492*3aa7d58aSMatthew Dillon if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) 493*3aa7d58aSMatthew Dillon { 494*3aa7d58aSMatthew Dillon if (cpy > oend-LASTLITERALS) goto _output_error; 495*3aa7d58aSMatthew Dillon // Error : last 5 bytes must be literals 496*3aa7d58aSMatthew Dillon LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); 497*3aa7d58aSMatthew Dillon while(op<cpy) *op++=*ref++; 498*3aa7d58aSMatthew Dillon op=cpy; 499*3aa7d58aSMatthew Dillon continue; 500*3aa7d58aSMatthew Dillon } 501*3aa7d58aSMatthew Dillon LZ4_WILDCOPY(ref, op, cpy); 502*3aa7d58aSMatthew Dillon op=cpy; // correction 503*3aa7d58aSMatthew Dillon } 504*3aa7d58aSMatthew Dillon 505*3aa7d58aSMatthew Dillon // end of decoding 506*3aa7d58aSMatthew Dillon if (endOnInput) 507*3aa7d58aSMatthew Dillon return (int) (((char*)op)-dest); // Nb of output bytes decoded 508*3aa7d58aSMatthew Dillon else 509*3aa7d58aSMatthew Dillon return (int) (((char*)ip)-source); // Nb of input bytes read 510*3aa7d58aSMatthew Dillon 511*3aa7d58aSMatthew Dillon // Overflow error detected 512*3aa7d58aSMatthew Dillon _output_error: 513*3aa7d58aSMatthew Dillon return (int) (-(((char*)ip)-source))-1; 514*3aa7d58aSMatthew Dillon } 515*3aa7d58aSMatthew Dillon 516*3aa7d58aSMatthew Dillon 517*3aa7d58aSMatthew Dillon int 518*3aa7d58aSMatthew Dillon LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize) 519*3aa7d58aSMatthew Dillon { 520*3aa7d58aSMatthew Dillon return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, 521*3aa7d58aSMatthew Dillon endOnInputSize, noPrefix, full, 0); 522*3aa7d58aSMatthew Dillon } 523