1 /* 2 LZ4 - Fast LZ compression algorithm 3 Copyright (C) 2011-2013, Yann Collet. 4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are 8 met: 9 10 * Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 * Redistributions in binary form must reproduce the above 13 copyright notice, this list of conditions and the following disclaimer 14 in the documentation and/or other materials provided with the 15 distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 You can contact the author at : 30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31 - LZ4 source repository : http://code.google.com/p/lz4/ 32 */ 33 34 /* 35 Note : this source file requires "hammer2_lz4_encoder.h" 36 */ 37 38 //************************************** 39 // Tuning parameters 40 //************************************** 41 // MEMORY_USAGE : 42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB; 43 // 16 -> 64KB; 20 -> 1MB; etc.) 44 // Increasing memory usage improves compression ratio 45 // Reduced memory usage can improve speed, due to cache effect 46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 47 #define MEMORY_USAGE 14 48 49 // HEAPMODE : 50 // Select how default compression function will allocate memory for its 51 // hash table, 52 // in memory stack (0:default, fastest), or in memory heap (1:requires 53 // memory allocation (malloc)). 54 // Default allocation strategy is to use stack (HEAPMODE 0) 55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting 56 #define HEAPMODE 1 57 58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : 59 // This will provide a small boost to performance for big endian cpu, 60 // but the resulting compressed stream will be incompatible with little-endian CPU. 61 // You can set this option to 1 in situations where data will remain within 62 // closed environment 63 // This option is useless on Little_Endian CPU (such as x86) 64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 65 66 67 //************************************** 68 // CPU Feature Detection 69 //************************************** 70 // 32 or 64 bits ? 71 #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode 72 # define LZ4_ARCH64 1 73 #else 74 # define LZ4_ARCH64 0 75 #endif 76 77 //This reduced library code is only Little Endian compatible, 78 //if the need arises, please look for the appropriate defines in the 79 //original complete LZ4 library. 80 //Same is true for unaligned memory access which is enabled by default, 81 //hardware bit count, also enabled by default, and Microsoft/Visual 82 //Studio compilers. 83 84 //************************************** 85 // Compiler Options 86 //************************************** 87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 88 /* "restrict" is a known keyword */ 89 #else 90 # define restrict // Disable restrict 91 #endif 92 93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 94 95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 96 # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 97 #else 98 # define expect(expr,value) (expr) 99 #endif 100 101 #define likely(expr) expect((expr) != 0, 1) 102 #define unlikely(expr) expect((expr) != 0, 0) 103 104 105 //************************************** 106 // Includes 107 //************************************** 108 #include "hammer2.h" 109 #include "hammer2_lz4.h" 110 //#include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h 111 112 113 //Declaration for kmalloc functions 114 /* 115 static MALLOC_DEFINE(C_HASHTABLE, "comphashtable", 116 "A hash table used by LZ4 compression function."); 117 */ 118 119 120 //************************************** 121 // Basic Types 122 //************************************** 123 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 124 # include <sys/stdint.h> 125 typedef uint8_t BYTE; 126 typedef uint16_t U16; 127 typedef uint32_t U32; 128 typedef int32_t S32; 129 typedef uint64_t U64; 130 #else 131 typedef unsigned char BYTE; 132 typedef unsigned short U16; 133 typedef unsigned int U32; 134 typedef signed int S32; 135 typedef unsigned long long U64; 136 #endif 137 138 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 139 # define _PACKED __attribute__ ((packed)) 140 #else 141 # define _PACKED 142 #endif 143 144 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 145 # pragma pack(push, 1) 146 #endif 147 148 typedef struct _U16_S { U16 v; } _PACKED U16_S; 149 typedef struct _U32_S { U32 v; } _PACKED U32_S; 150 typedef struct _U64_S { U64 v; } _PACKED U64_S; 151 152 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 153 # pragma pack(pop) 154 #endif 155 156 #define A64(x) (((U64_S *)(x))->v) 157 #define A32(x) (((U32_S *)(x))->v) 158 #define A16(x) (((U16_S *)(x))->v) 159 160 161 //************************************** 162 // Constants 163 //************************************** 164 #define HASHTABLESIZE (1 << MEMORY_USAGE) 165 166 #define MINMATCH 4 167 168 #define COPYLENGTH 8 169 #define LASTLITERALS 5 170 #define MFLIMIT (COPYLENGTH+MINMATCH) 171 #define MINLENGTH (MFLIMIT+1) 172 173 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1)) 174 #define SKIPSTRENGTH 6 175 // Increasing this value will make the compression run slower on 176 // incompressible data 177 178 #define MAXD_LOG 16 179 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 180 181 #define ML_BITS 4 182 #define ML_MASK ((1U<<ML_BITS)-1) 183 #define RUN_BITS (8-ML_BITS) 184 #define RUN_MASK ((1U<<RUN_BITS)-1) 185 186 187 //************************************** 188 // Architecture-specific macros 189 //************************************** 190 #if LZ4_ARCH64 // 64-bit 191 # define STEPSIZE 8 192 # define UARCH U64 193 # define AARCH A64 194 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; 195 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) 196 # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e) 197 # define HTYPE U32 198 # define INITBASE(base) BYTE* base = ip 199 #else // 32-bit 200 # define STEPSIZE 4 201 # define UARCH U32 202 # define AARCH A32 203 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; 204 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); 205 # define LZ4_SECURECOPY LZ4_WILDCOPY 206 # define HTYPE BYTE* 207 # define INITBASE(base) int base = 0 208 #endif 209 210 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 211 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); 212 v = lz4_bswap16(v); 213 d = (s) - v; } 214 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); 215 v = lz4_bswap16(v); 216 A16(p) = v; 217 p+=2; } 218 #else // Little Endian 219 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } 220 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } 221 #endif 222 223 224 //************************************** 225 // Macros 226 //************************************** 227 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); 228 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; } 229 230 231 //**************************** 232 // Private functions 233 //**************************** 234 #if LZ4_ARCH64 235 236 static 237 inline 238 int 239 LZ4_NbCommonBytes (register U64 val) 240 { 241 #if defined(LZ4_BIG_ENDIAN) 242 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 243 unsigned long r = 0; 244 _BitScanReverse64( &r, val ); 245 return (int)(r>>3); 246 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 247 return (__builtin_clzll(val) >> 3); 248 #else 249 int r; 250 if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 251 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 252 r += (!val); 253 return r; 254 #endif 255 #else 256 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 257 unsigned long r = 0; 258 _BitScanForward64( &r, val ); 259 return (int)(r>>3); 260 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 261 return (__builtin_ctzll(val) >> 3); 262 #else 263 static int DeBruijnBytePos[64] = { 264 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 265 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 266 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 267 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 268 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 269 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 270 7, 6, 7, 7 }; 271 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; 272 #endif 273 #endif 274 } 275 276 #else 277 278 static 279 inline 280 int 281 LZ4_NbCommonBytes (register U32 val) 282 { 283 #if defined(LZ4_BIG_ENDIAN) 284 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 285 unsigned long r = 0; 286 _BitScanReverse( &r, val ); 287 return (int)(r>>3); 288 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 289 return (__builtin_clz(val) >> 3); 290 # else 291 int r; 292 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 293 r += (!val); 294 return r; 295 # endif 296 #else 297 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 298 unsigned long r; 299 _BitScanForward( &r, val ); 300 return (int)(r>>3); 301 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 302 return (__builtin_ctz(val) >> 3); 303 # else 304 static int DeBruijnBytePos[32] = { 305 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 306 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 307 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 308 1, 1 }; 309 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 310 # endif 311 #endif 312 } 313 314 #endif 315 316 317 318 //****************************** 319 // Compression functions 320 //****************************** 321 322 #include "hammer2_lz4_encoder.h" 323 324 /* 325 void* LZ4_createHeapMemory(); 326 int LZ4_freeHeapMemory(void* ctx); 327 328 Used to allocate and free hashTable memory 329 to be used by the LZ4_compress_heap* family of functions. 330 LZ4_createHeapMemory() returns NULL is memory allocation fails. 331 */ 332 void* 333 LZ4_create(void) 334 { 335 return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT); 336 } 337 338 int 339 LZ4_free(void* ctx) 340 { 341 kfree(ctx, C_HASHTABLE); 342 return 0; 343 } 344 345 int 346 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize) 347 { 348 void* ctx = LZ4_create(); 349 int result; 350 if (ctx == NULL) return 0; // Failed allocation => compression not done 351 if (inputSize < LZ4_64KLIMIT) 352 result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, 353 inputSize, maxOutputSize); 354 else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, 355 inputSize, maxOutputSize); 356 LZ4_free(ctx); 357 return result; 358 } 359 360 361 //**************************** 362 // Decompression functions 363 //**************************** 364 365 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; 366 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive; 367 typedef enum { full = 0, partial = 1 } exit_directive; 368 369 370 // This generic decompression function cover all use cases. 371 // It shall be instanciated several times, using different sets of directives 372 // Note that it is essential this generic function is really inlined, 373 // in order to remove useless branches during compilation optimisation. 374 static 375 inline 376 int LZ4_decompress_generic( 377 char* source, 378 char* dest, 379 int inputSize, // 380 int outputSize, 381 // OutputSize must be != 0; if endOnInput==endOnInputSize, 382 // this value is the max size of Output Buffer. 383 384 int endOnInput, // endOnOutputSize, endOnInputSize 385 int prefix64k, // noPrefix, withPrefix 386 int partialDecoding, // full, partial 387 int targetOutputSize // only used if partialDecoding==partial 388 ) 389 { 390 // Local Variables 391 BYTE* restrict ip = (BYTE*) source; 392 BYTE* ref; 393 BYTE* iend = ip + inputSize; 394 395 BYTE* op = (BYTE*) dest; 396 BYTE* oend = op + outputSize; 397 BYTE* cpy; 398 BYTE* oexit = op + targetOutputSize; 399 400 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 401 #if LZ4_ARCH64 402 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 403 #endif 404 405 406 // Special case 407 if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; 408 // targetOutputSize too large, better decode everything 409 if unlikely(outputSize==0) goto _output_error; 410 // Empty output buffer 411 412 413 // Main Loop 414 while (1) 415 { 416 unsigned token; 417 size_t length; 418 419 // get runlength 420 token = *ip++; 421 if ((length=(token>>ML_BITS)) == RUN_MASK) 422 { 423 unsigned s=255; 424 while (((endOnInput)?ip<iend:1) && (s==255)) 425 { 426 s = *ip++; 427 length += s; 428 } 429 } 430 431 // copy literals 432 cpy = op+length; 433 if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) 434 || (ip+length>iend-(2+1+LASTLITERALS))) ) 435 || ((!endOnInput) && (cpy>oend-COPYLENGTH))) 436 { 437 if (partialDecoding) 438 { 439 if (cpy > oend) goto _output_error; 440 // Error : write attempt beyond end of output buffer 441 if ((endOnInput) && (ip+length > iend)) goto _output_error; 442 // Error : read attempt beyond end of input buffer 443 } 444 else 445 { 446 if ((!endOnInput) && (cpy != oend)) goto _output_error; 447 // Error : block decoding must stop exactly there, 448 // due to parsing restrictions 449 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) 450 goto _output_error; 451 // Error : not enough place for another match (min 4) + 5 literals 452 } 453 memcpy(op, ip, length); 454 ip += length; 455 op += length; 456 break; 457 // Necessarily EOF, due to parsing restrictions 458 } 459 LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; 460 461 // get offset 462 LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; 463 if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest)) 464 goto _output_error; // Error : offset outside destination buffer 465 466 // get matchlength 467 if ((length=(token&ML_MASK)) == ML_MASK) 468 { 469 while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1) 470 // A minimum nb of input bytes must remain for LASTLITERALS + token 471 { 472 unsigned s = *ip++; 473 length += s; 474 if (s==255) continue; 475 break; 476 } 477 } 478 479 // copy repeated sequence 480 if unlikely((op-ref)<STEPSIZE) 481 { 482 #if LZ4_ARCH64 483 size_t dec64 = dec64table[op-ref]; 484 #else 485 const size_t dec64 = 0; 486 #endif 487 op[0] = ref[0]; 488 op[1] = ref[1]; 489 op[2] = ref[2]; 490 op[3] = ref[3]; 491 op += 4, ref += 4; ref -= dec32table[op-ref]; 492 A32(op) = A32(ref); 493 op += STEPSIZE-4; ref -= dec64; 494 } else { LZ4_COPYSTEP(ref,op); } 495 cpy = op + length - (STEPSIZE-4); 496 497 if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) 498 { 499 if (cpy > oend-LASTLITERALS) goto _output_error; 500 // Error : last 5 bytes must be literals 501 LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); 502 while(op<cpy) *op++=*ref++; 503 op=cpy; 504 continue; 505 } 506 LZ4_WILDCOPY(ref, op, cpy); 507 op=cpy; // correction 508 } 509 510 // end of decoding 511 if (endOnInput) 512 return (int) (((char*)op)-dest); // Nb of output bytes decoded 513 else 514 return (int) (((char*)ip)-source); // Nb of input bytes read 515 516 // Overflow error detected 517 _output_error: 518 return (int) (-(((char*)ip)-source))-1; 519 } 520 521 522 int 523 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize) 524 { 525 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, 526 endOnInputSize, noPrefix, full, 0); 527 } 528