1 /* 2 LZ4 - Fast LZ compression algorithm 3 Copyright (C) 2011-2013, Yann Collet. 4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are 8 met: 9 10 * Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 * Redistributions in binary form must reproduce the above 13 copyright notice, this list of conditions and the following disclaimer 14 in the documentation and/or other materials provided with the 15 distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 You can contact the author at : 30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31 - LZ4 source repository : http://code.google.com/p/lz4/ 32 */ 33 34 /* 35 Note : this source file requires "hammer2_lz4_encoder.h" 36 */ 37 38 //************************************** 39 // Tuning parameters 40 //************************************** 41 // MEMORY_USAGE : 42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB; 43 // 16 -> 64KB; 20 -> 1MB; etc.) 44 // Increasing memory usage improves compression ratio 45 // Reduced memory usage can improve speed, due to cache effect 46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 47 #define MEMORY_USAGE 14 48 49 // HEAPMODE : 50 // Select how default compression function will allocate memory for its 51 // hash table, 52 // in memory stack (0:default, fastest), or in memory heap (1:requires 53 // memory allocation (malloc)). 54 // Default allocation strategy is to use stack (HEAPMODE 0) 55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting 56 #define HEAPMODE 1 57 58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : 59 // This will provide a small boost to performance for big endian cpu, 60 // but the resulting compressed stream will be incompatible with little-endian CPU. 61 // You can set this option to 1 in situations where data will remain within 62 // closed environment 63 // This option is useless on Little_Endian CPU (such as x86) 64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 65 66 67 //************************************** 68 // CPU Feature Detection 69 //************************************** 70 // 32 or 64 bits ? 71 #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode 72 # define LZ4_ARCH64 1 73 #else 74 # define LZ4_ARCH64 0 75 #endif 76 77 //This reduced library code is only Little Endian compatible, 78 //if the need arises, please look for the appropriate defines in the 79 //original complete LZ4 library. 80 //Same is true for unaligned memory access which is enabled by default, 81 //hardware bit count, also enabled by default, and Microsoft/Visual 82 //Studio compilers. 83 84 //************************************** 85 // Compiler Options 86 //************************************** 87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 88 /* "restrict" is a known keyword */ 89 #else 90 # define restrict // Disable restrict 91 #endif 92 93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 94 95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 96 # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 97 #else 98 # define expect(expr,value) (expr) 99 #endif 100 101 #define likely(expr) expect((expr) != 0, 1) 102 #define unlikely(expr) expect((expr) != 0, 0) 103 104 105 //************************************** 106 // Includes 107 //************************************** 108 #include "hammer2.h" 109 #include "hammer2_lz4.h" 110 #include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h 111 112 113 //Declaration for kmalloc functions 114 static MALLOC_DEFINE(C_HASHTABLE, "comphashtable", 115 "A hash table used by LZ4 compression function."); 116 117 118 //************************************** 119 // Basic Types 120 //************************************** 121 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 122 # include <sys/stdint.h> 123 typedef uint8_t BYTE; 124 typedef uint16_t U16; 125 typedef uint32_t U32; 126 typedef int32_t S32; 127 typedef uint64_t U64; 128 #else 129 typedef unsigned char BYTE; 130 typedef unsigned short U16; 131 typedef unsigned int U32; 132 typedef signed int S32; 133 typedef unsigned long long U64; 134 #endif 135 136 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 137 # define _PACKED __attribute__ ((packed)) 138 #else 139 # define _PACKED 140 #endif 141 142 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 143 # pragma pack(push, 1) 144 #endif 145 146 typedef struct _U16_S { U16 v; } _PACKED U16_S; 147 typedef struct _U32_S { U32 v; } _PACKED U32_S; 148 typedef struct _U64_S { U64 v; } _PACKED U64_S; 149 150 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 151 # pragma pack(pop) 152 #endif 153 154 #define A64(x) (((U64_S *)(x))->v) 155 #define A32(x) (((U32_S *)(x))->v) 156 #define A16(x) (((U16_S *)(x))->v) 157 158 159 //************************************** 160 // Constants 161 //************************************** 162 #define HASHTABLESIZE (1 << MEMORY_USAGE) 163 164 #define MINMATCH 4 165 166 #define COPYLENGTH 8 167 #define LASTLITERALS 5 168 #define MFLIMIT (COPYLENGTH+MINMATCH) 169 #define MINLENGTH (MFLIMIT+1) 170 171 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1)) 172 #define SKIPSTRENGTH 6 173 // Increasing this value will make the compression run slower on 174 // incompressible data 175 176 #define MAXD_LOG 16 177 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 178 179 #define ML_BITS 4 180 #define ML_MASK ((1U<<ML_BITS)-1) 181 #define RUN_BITS (8-ML_BITS) 182 #define RUN_MASK ((1U<<RUN_BITS)-1) 183 184 185 //************************************** 186 // Architecture-specific macros 187 //************************************** 188 #if LZ4_ARCH64 // 64-bit 189 # define STEPSIZE 8 190 # define UARCH U64 191 # define AARCH A64 192 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; 193 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) 194 # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e) 195 # define HTYPE U32 196 # define INITBASE(base) BYTE* base = ip 197 #else // 32-bit 198 # define STEPSIZE 4 199 # define UARCH U32 200 # define AARCH A32 201 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; 202 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); 203 # define LZ4_SECURECOPY LZ4_WILDCOPY 204 # define HTYPE BYTE* 205 # define INITBASE(base) int base = 0 206 #endif 207 208 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 209 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); 210 v = lz4_bswap16(v); 211 d = (s) - v; } 212 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); 213 v = lz4_bswap16(v); 214 A16(p) = v; 215 p+=2; } 216 #else // Little Endian 217 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } 218 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } 219 #endif 220 221 222 //************************************** 223 // Macros 224 //************************************** 225 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); 226 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; } 227 228 229 //**************************** 230 // Private functions 231 //**************************** 232 #if LZ4_ARCH64 233 234 static 235 inline 236 int 237 LZ4_NbCommonBytes (register U64 val) 238 { 239 #if defined(LZ4_BIG_ENDIAN) 240 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 241 unsigned long r = 0; 242 _BitScanReverse64( &r, val ); 243 return (int)(r>>3); 244 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 245 return (__builtin_clzll(val) >> 3); 246 #else 247 int r; 248 if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 249 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 250 r += (!val); 251 return r; 252 #endif 253 #else 254 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 255 unsigned long r = 0; 256 _BitScanForward64( &r, val ); 257 return (int)(r>>3); 258 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 259 return (__builtin_ctzll(val) >> 3); 260 #else 261 static int DeBruijnBytePos[64] = { 262 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 263 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 264 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 265 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 266 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 267 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 268 7, 6, 7, 7 }; 269 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; 270 #endif 271 #endif 272 } 273 274 #else 275 276 static 277 inline 278 int 279 LZ4_NbCommonBytes (register U32 val) 280 { 281 #if defined(LZ4_BIG_ENDIAN) 282 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 283 unsigned long r = 0; 284 _BitScanReverse( &r, val ); 285 return (int)(r>>3); 286 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 287 return (__builtin_clz(val) >> 3); 288 # else 289 int r; 290 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 291 r += (!val); 292 return r; 293 # endif 294 #else 295 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 296 unsigned long r; 297 _BitScanForward( &r, val ); 298 return (int)(r>>3); 299 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 300 return (__builtin_ctz(val) >> 3); 301 # else 302 static int DeBruijnBytePos[32] = { 303 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 304 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 305 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 306 1, 1 }; 307 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 308 # endif 309 #endif 310 } 311 312 #endif 313 314 315 316 //****************************** 317 // Compression functions 318 //****************************** 319 320 #include "hammer2_lz4_encoder.h" 321 322 /* 323 void* LZ4_createHeapMemory(); 324 int LZ4_freeHeapMemory(void* ctx); 325 326 Used to allocate and free hashTable memory 327 to be used by the LZ4_compress_heap* family of functions. 328 LZ4_createHeapMemory() returns NULL is memory allocation fails. 329 */ 330 void* 331 LZ4_create(void) 332 { 333 return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT); 334 } 335 336 int 337 LZ4_free(void* ctx) 338 { 339 kfree(ctx, C_HASHTABLE); 340 return 0; 341 } 342 343 int 344 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize) 345 { 346 void* ctx = LZ4_create(); 347 int result; 348 if (ctx == NULL) return 0; // Failed allocation => compression not done 349 if (inputSize < LZ4_64KLIMIT) 350 result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, 351 inputSize, maxOutputSize); 352 else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, 353 inputSize, maxOutputSize); 354 LZ4_free(ctx); 355 return result; 356 } 357 358 359 //**************************** 360 // Decompression functions 361 //**************************** 362 363 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; 364 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive; 365 typedef enum { full = 0, partial = 1 } exit_directive; 366 367 368 // This generic decompression function cover all use cases. 369 // It shall be instanciated several times, using different sets of directives 370 // Note that it is essential this generic function is really inlined, 371 // in order to remove useless branches during compilation optimisation. 372 static 373 inline 374 int LZ4_decompress_generic( 375 char* source, 376 char* dest, 377 int inputSize, // 378 int outputSize, 379 // OutputSize must be != 0; if endOnInput==endOnInputSize, 380 // this value is the max size of Output Buffer. 381 382 int endOnInput, // endOnOutputSize, endOnInputSize 383 int prefix64k, // noPrefix, withPrefix 384 int partialDecoding, // full, partial 385 int targetOutputSize // only used if partialDecoding==partial 386 ) 387 { 388 // Local Variables 389 BYTE* restrict ip = (BYTE*) source; 390 BYTE* ref; 391 BYTE* iend = ip + inputSize; 392 393 BYTE* op = (BYTE*) dest; 394 BYTE* oend = op + outputSize; 395 BYTE* cpy; 396 BYTE* oexit = op + targetOutputSize; 397 398 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 399 #if LZ4_ARCH64 400 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 401 #endif 402 403 404 // Special case 405 if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; 406 // targetOutputSize too large, better decode everything 407 if unlikely(outputSize==0) goto _output_error; 408 // Empty output buffer 409 410 411 // Main Loop 412 while (1) 413 { 414 unsigned token; 415 size_t length; 416 417 // get runlength 418 token = *ip++; 419 if ((length=(token>>ML_BITS)) == RUN_MASK) 420 { 421 unsigned s=255; 422 while (((endOnInput)?ip<iend:1) && (s==255)) 423 { 424 s = *ip++; 425 length += s; 426 } 427 } 428 429 // copy literals 430 cpy = op+length; 431 if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) 432 || (ip+length>iend-(2+1+LASTLITERALS))) ) 433 || ((!endOnInput) && (cpy>oend-COPYLENGTH))) 434 { 435 if (partialDecoding) 436 { 437 if (cpy > oend) goto _output_error; 438 // Error : write attempt beyond end of output buffer 439 if ((endOnInput) && (ip+length > iend)) goto _output_error; 440 // Error : read attempt beyond end of input buffer 441 } 442 else 443 { 444 if ((!endOnInput) && (cpy != oend)) goto _output_error; 445 // Error : block decoding must stop exactly there, 446 // due to parsing restrictions 447 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) 448 goto _output_error; 449 // Error : not enough place for another match (min 4) + 5 literals 450 } 451 memcpy(op, ip, length); 452 ip += length; 453 op += length; 454 break; 455 // Necessarily EOF, due to parsing restrictions 456 } 457 LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; 458 459 // get offset 460 LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; 461 if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest)) 462 goto _output_error; // Error : offset outside destination buffer 463 464 // get matchlength 465 if ((length=(token&ML_MASK)) == ML_MASK) 466 { 467 while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1) 468 // A minimum nb of input bytes must remain for LASTLITERALS + token 469 { 470 unsigned s = *ip++; 471 length += s; 472 if (s==255) continue; 473 break; 474 } 475 } 476 477 // copy repeated sequence 478 if unlikely((op-ref)<STEPSIZE) 479 { 480 #if LZ4_ARCH64 481 size_t dec64 = dec64table[op-ref]; 482 #else 483 const size_t dec64 = 0; 484 #endif 485 op[0] = ref[0]; 486 op[1] = ref[1]; 487 op[2] = ref[2]; 488 op[3] = ref[3]; 489 op += 4, ref += 4; ref -= dec32table[op-ref]; 490 A32(op) = A32(ref); 491 op += STEPSIZE-4; ref -= dec64; 492 } else { LZ4_COPYSTEP(ref,op); } 493 cpy = op + length - (STEPSIZE-4); 494 495 if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) 496 { 497 if (cpy > oend-LASTLITERALS) goto _output_error; 498 // Error : last 5 bytes must be literals 499 LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); 500 while(op<cpy) *op++=*ref++; 501 op=cpy; 502 continue; 503 } 504 LZ4_WILDCOPY(ref, op, cpy); 505 op=cpy; // correction 506 } 507 508 // end of decoding 509 if (endOnInput) 510 return (int) (((char*)op)-dest); // Nb of output bytes decoded 511 else 512 return (int) (((char*)ip)-source); // Nb of input bytes read 513 514 // Overflow error detected 515 _output_error: 516 return (int) (-(((char*)ip)-source))-1; 517 } 518 519 520 int 521 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize) 522 { 523 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, 524 endOnInputSize, noPrefix, full, 0); 525 } 526