1 /* 2 LZ4 - Fast LZ compression algorithm 3 Copyright (C) 2011-2013, Yann Collet. 4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are 8 met: 9 10 * Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 * Redistributions in binary form must reproduce the above 13 copyright notice, this list of conditions and the following disclaimer 14 in the documentation and/or other materials provided with the 15 distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 You can contact the author at : 30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31 - LZ4 source repository : http://code.google.com/p/lz4/ 32 */ 33 34 /* 35 Note : this source file requires "hammer2_lz4_encoder.h" 36 */ 37 38 //************************************** 39 // Tuning parameters 40 //************************************** 41 // MEMORY_USAGE : 42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB; 43 // 16 -> 64KB; 20 -> 1MB; etc.) 44 // Increasing memory usage improves compression ratio 45 // Reduced memory usage can improve speed, due to cache effect 46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 47 #define MEMORY_USAGE 14 48 49 // HEAPMODE : 50 // Select how default compression function will allocate memory for its 51 // hash table, 52 // in memory stack (0:default, fastest), or in memory heap (1:requires 53 // memory allocation (malloc)). 54 // Default allocation strategy is to use stack (HEAPMODE 0) 55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting 56 #define HEAPMODE 1 57 58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : 59 // This will provide a small boost to performance for big endian cpu, 60 // but the resulting compressed stream will be incompatible with little-endian CPU. 61 // You can set this option to 1 in situations where data will remain within 62 // closed environment 63 // This option is useless on Little_Endian CPU (such as x86) 64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 65 66 67 //************************************** 68 // CPU Feature Detection 69 //************************************** 70 // 32 or 64 bits ? 71 #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode 72 # define LZ4_ARCH64 1 73 #else 74 # define LZ4_ARCH64 0 75 #endif 76 77 //This reduced library code is only Little Endian compatible, 78 //if the need arises, please look for the appropriate defines in the 79 //original complete LZ4 library. 80 //Same is true for unaligned memory access which is enabled by default, 81 //hardware bit count, also enabled by default, and Microsoft/Visual 82 //Studio compilers. 83 84 //************************************** 85 // Compiler Options 86 //************************************** 87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 88 /* "restrict" is a known keyword */ 89 #else 90 # define restrict // Disable restrict 91 #endif 92 93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 94 95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 96 # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 97 #else 98 # define expect(expr,value) (expr) 99 #endif 100 101 #define likely(expr) expect((expr) != 0, 1) 102 #define unlikely(expr) expect((expr) != 0, 0) 103 104 105 //************************************** 106 // Includes 107 //************************************** 108 #include <sys/malloc.h> //for malloc macros 109 #include "hammer2.h" 110 #include "hammer2_lz4.h" 111 112 113 //Declaration for kmalloc functions 114 MALLOC_DECLARE(C_HASHTABLE); 115 MALLOC_DEFINE(C_HASHTABLE, "comphashtable", 116 "A hash table used by LZ4 compression function."); 117 118 119 //************************************** 120 // Basic Types 121 //************************************** 122 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 123 # include <stdint.h> 124 typedef uint8_t BYTE; 125 typedef uint16_t U16; 126 typedef uint32_t U32; 127 typedef int32_t S32; 128 typedef uint64_t U64; 129 #else 130 typedef unsigned char BYTE; 131 typedef unsigned short U16; 132 typedef unsigned int U32; 133 typedef signed int S32; 134 typedef unsigned long long U64; 135 #endif 136 137 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 138 # define _PACKED __attribute__ ((packed)) 139 #else 140 # define _PACKED 141 #endif 142 143 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 144 # pragma pack(push, 1) 145 #endif 146 147 typedef struct _U16_S { U16 v; } _PACKED U16_S; 148 typedef struct _U32_S { U32 v; } _PACKED U32_S; 149 typedef struct _U64_S { U64 v; } _PACKED U64_S; 150 151 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 152 # pragma pack(pop) 153 #endif 154 155 #define A64(x) (((U64_S *)(x))->v) 156 #define A32(x) (((U32_S *)(x))->v) 157 #define A16(x) (((U16_S *)(x))->v) 158 159 160 //************************************** 161 // Constants 162 //************************************** 163 #define HASHTABLESIZE (1 << MEMORY_USAGE) 164 165 #define MINMATCH 4 166 167 #define COPYLENGTH 8 168 #define LASTLITERALS 5 169 #define MFLIMIT (COPYLENGTH+MINMATCH) 170 #define MINLENGTH (MFLIMIT+1) 171 172 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1)) 173 #define SKIPSTRENGTH 6 174 // Increasing this value will make the compression run slower on 175 // incompressible data 176 177 #define MAXD_LOG 16 178 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 179 180 #define ML_BITS 4 181 #define ML_MASK ((1U<<ML_BITS)-1) 182 #define RUN_BITS (8-ML_BITS) 183 #define RUN_MASK ((1U<<RUN_BITS)-1) 184 185 186 //************************************** 187 // Architecture-specific macros 188 //************************************** 189 #if LZ4_ARCH64 // 64-bit 190 # define STEPSIZE 8 191 # define UARCH U64 192 # define AARCH A64 193 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; 194 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) 195 # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e) 196 # define HTYPE U32 197 # define INITBASE(base) BYTE* base = ip 198 #else // 32-bit 199 # define STEPSIZE 4 200 # define UARCH U32 201 # define AARCH A32 202 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; 203 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); 204 # define LZ4_SECURECOPY LZ4_WILDCOPY 205 # define HTYPE BYTE* 206 # define INITBASE(base) int base = 0 207 #endif 208 209 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 210 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); 211 v = lz4_bswap16(v); 212 d = (s) - v; } 213 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); 214 v = lz4_bswap16(v); 215 A16(p) = v; 216 p+=2; } 217 #else // Little Endian 218 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } 219 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } 220 #endif 221 222 223 //************************************** 224 // Macros 225 //************************************** 226 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); 227 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; } 228 229 230 //**************************** 231 // Private functions 232 //**************************** 233 #if LZ4_ARCH64 234 235 static 236 inline 237 int 238 LZ4_NbCommonBytes (register U64 val) 239 { 240 #if defined(LZ4_BIG_ENDIAN) 241 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 242 unsigned long r = 0; 243 _BitScanReverse64( &r, val ); 244 return (int)(r>>3); 245 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 246 return (__builtin_clzll(val) >> 3); 247 #else 248 int r; 249 if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 250 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 251 r += (!val); 252 return r; 253 #endif 254 #else 255 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 256 unsigned long r = 0; 257 _BitScanForward64( &r, val ); 258 return (int)(r>>3); 259 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 260 return (__builtin_ctzll(val) >> 3); 261 #else 262 static int DeBruijnBytePos[64] = { 263 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 264 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 265 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 266 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 267 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 268 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 269 7, 6, 7, 7 }; 270 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; 271 #endif 272 #endif 273 } 274 275 #else 276 277 static 278 inline 279 int 280 LZ4_NbCommonBytes (register U32 val) 281 { 282 #if defined(LZ4_BIG_ENDIAN) 283 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 284 unsigned long r = 0; 285 _BitScanReverse( &r, val ); 286 return (int)(r>>3); 287 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 288 return (__builtin_clz(val) >> 3); 289 # else 290 int r; 291 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 292 r += (!val); 293 return r; 294 # endif 295 #else 296 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 297 unsigned long r; 298 _BitScanForward( &r, val ); 299 return (int)(r>>3); 300 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 301 return (__builtin_ctz(val) >> 3); 302 # else 303 static int DeBruijnBytePos[32] = { 304 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 305 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 306 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 307 1, 1 }; 308 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 309 # endif 310 #endif 311 } 312 313 #endif 314 315 316 317 //****************************** 318 // Compression functions 319 //****************************** 320 321 #include "hammer2_lz4_encoder.h" 322 323 /* 324 void* LZ4_createHeapMemory(); 325 int LZ4_freeHeapMemory(void* ctx); 326 327 Used to allocate and free hashTable memory 328 to be used by the LZ4_compress_heap* family of functions. 329 LZ4_createHeapMemory() returns NULL is memory allocation fails. 330 */ 331 void* 332 LZ4_create(void) 333 { 334 return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT); 335 } 336 337 int 338 LZ4_free(void* ctx) 339 { 340 kfree(ctx, C_HASHTABLE); 341 return 0; 342 } 343 344 int 345 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize) 346 { 347 void* ctx = LZ4_create(); 348 int result; 349 if (ctx == NULL) return 0; // Failed allocation => compression not done 350 if (inputSize < LZ4_64KLIMIT) 351 result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, 352 inputSize, maxOutputSize); 353 else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, 354 inputSize, maxOutputSize); 355 LZ4_free(ctx); 356 return result; 357 } 358 359 360 //**************************** 361 // Decompression functions 362 //**************************** 363 364 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; 365 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive; 366 typedef enum { full = 0, partial = 1 } exit_directive; 367 368 369 // This generic decompression function cover all use cases. 370 // It shall be instanciated several times, using different sets of directives 371 // Note that it is essential this generic function is really inlined, 372 // in order to remove useless branches during compilation optimisation. 373 static 374 inline 375 int LZ4_decompress_generic( 376 char* source, 377 char* dest, 378 int inputSize, // 379 int outputSize, 380 // OutputSize must be != 0; if endOnInput==endOnInputSize, 381 // this value is the max size of Output Buffer. 382 383 int endOnInput, // endOnOutputSize, endOnInputSize 384 int prefix64k, // noPrefix, withPrefix 385 int partialDecoding, // full, partial 386 int targetOutputSize // only used if partialDecoding==partial 387 ) 388 { 389 // Local Variables 390 BYTE* restrict ip = (BYTE*) source; 391 BYTE* ref; 392 BYTE* iend = ip + inputSize; 393 394 BYTE* op = (BYTE*) dest; 395 BYTE* oend = op + outputSize; 396 BYTE* cpy; 397 BYTE* oexit = op + targetOutputSize; 398 399 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 400 #if LZ4_ARCH64 401 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 402 #endif 403 404 405 // Special case 406 if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; 407 // targetOutputSize too large, better decode everything 408 if unlikely(outputSize==0) goto _output_error; 409 // Empty output buffer 410 411 412 // Main Loop 413 while (1) 414 { 415 unsigned token; 416 size_t length; 417 418 // get runlength 419 token = *ip++; 420 if ((length=(token>>ML_BITS)) == RUN_MASK) 421 { 422 unsigned s=255; 423 while (((endOnInput)?ip<iend:1) && (s==255)) 424 { 425 s = *ip++; 426 length += s; 427 } 428 } 429 430 // copy literals 431 cpy = op+length; 432 if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) 433 || (ip+length>iend-(2+1+LASTLITERALS))) ) 434 || ((!endOnInput) && (cpy>oend-COPYLENGTH))) 435 { 436 if (partialDecoding) 437 { 438 if (cpy > oend) goto _output_error; 439 // Error : write attempt beyond end of output buffer 440 if ((endOnInput) && (ip+length > iend)) goto _output_error; 441 // Error : read attempt beyond end of input buffer 442 } 443 else 444 { 445 if ((!endOnInput) && (cpy != oend)) goto _output_error; 446 // Error : block decoding must stop exactly there, 447 // due to parsing restrictions 448 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) 449 goto _output_error; 450 // Error : not enough place for another match (min 4) + 5 literals 451 } 452 memcpy(op, ip, length); 453 ip += length; 454 op += length; 455 break; 456 // Necessarily EOF, due to parsing restrictions 457 } 458 LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; 459 460 // get offset 461 LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; 462 if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest)) 463 goto _output_error; // Error : offset outside destination buffer 464 465 // get matchlength 466 if ((length=(token&ML_MASK)) == ML_MASK) 467 { 468 while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1) 469 // A minimum nb of input bytes must remain for LASTLITERALS + token 470 { 471 unsigned s = *ip++; 472 length += s; 473 if (s==255) continue; 474 break; 475 } 476 } 477 478 // copy repeated sequence 479 if unlikely((op-ref)<STEPSIZE) 480 { 481 #if LZ4_ARCH64 482 size_t dec64 = dec64table[op-ref]; 483 #else 484 const size_t dec64 = 0; 485 #endif 486 op[0] = ref[0]; 487 op[1] = ref[1]; 488 op[2] = ref[2]; 489 op[3] = ref[3]; 490 op += 4, ref += 4; ref -= dec32table[op-ref]; 491 A32(op) = A32(ref); 492 op += STEPSIZE-4; ref -= dec64; 493 } else { LZ4_COPYSTEP(ref,op); } 494 cpy = op + length - (STEPSIZE-4); 495 496 if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) 497 { 498 if (cpy > oend-LASTLITERALS) goto _output_error; 499 // Error : last 5 bytes must be literals 500 LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); 501 while(op<cpy) *op++=*ref++; 502 op=cpy; 503 continue; 504 } 505 LZ4_WILDCOPY(ref, op, cpy); 506 op=cpy; // correction 507 } 508 509 // end of decoding 510 if (endOnInput) 511 return (int) (((char*)op)-dest); // Nb of output bytes decoded 512 else 513 return (int) (((char*)ip)-source); // Nb of input bytes read 514 515 // Overflow error detected 516 _output_error: 517 return (int) (-(((char*)ip)-source))-1; 518 } 519 520 521 int 522 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize) 523 { 524 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, 525 endOnInputSize, noPrefix, full, 0); 526 } 527