1 /* 2 LZ4 - Fast LZ compression algorithm 3 Copyright (C) 2011-2013, Yann Collet. 4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are 8 met: 9 10 * Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 * Redistributions in binary form must reproduce the above 13 copyright notice, this list of conditions and the following disclaimer 14 in the documentation and/or other materials provided with the 15 distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 You can contact the author at : 30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31 - LZ4 source repository : http://code.google.com/p/lz4/ 32 */ 33 34 /* 35 Note : this source file requires "hammer2_lz4_encoder.h" 36 */ 37 38 //************************************** 39 // Tuning parameters 40 //************************************** 41 // MEMORY_USAGE : 42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB; 43 // 16 -> 64KB; 20 -> 1MB; etc.) 44 // Increasing memory usage improves compression ratio 45 // Reduced memory usage can improve speed, due to cache effect 46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache 47 #define MEMORY_USAGE 14 48 49 // HEAPMODE : 50 // Select how default compression function will allocate memory for its 51 // hash table, 52 // in memory stack (0:default, fastest), or in memory heap (1:requires 53 // memory allocation (malloc)). 54 // Default allocation strategy is to use stack (HEAPMODE 0) 55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting 56 #define HEAPMODE 1 57 58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : 59 // This will provide a small boost to performance for big endian cpu, 60 // but the resulting compressed stream will be incompatible with little-endian CPU. 61 // You can set this option to 1 in situations where data will remain within 62 // closed environment 63 // This option is useless on Little_Endian CPU (such as x86) 64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 65 66 67 //************************************** 68 // CPU Feature Detection 69 //************************************** 70 // 32 or 64 bits ? 71 #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode 72 # define LZ4_ARCH64 1 73 #else 74 # define LZ4_ARCH64 0 75 #endif 76 77 //This reduced library code is only Little Endian compatible, 78 //if the need arises, please look for the appropriate defines in the 79 //original complete LZ4 library. 80 //Same is true for unaligned memory access which is enabled by default, 81 //hardware bit count, also enabled by default, and Microsoft/Visual 82 //Studio compilers. 83 84 //************************************** 85 // Compiler Options 86 //************************************** 87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 88 /* "restrict" is a known keyword */ 89 #else 90 # define restrict // Disable restrict 91 #endif 92 93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 94 95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 96 # define expect(expr,value) (__builtin_expect ((expr),(value)) ) 97 #else 98 # define expect(expr,value) (expr) 99 #endif 100 101 #define likely(expr) expect((expr) != 0, 1) 102 #define unlikely(expr) expect((expr) != 0, 0) 103 104 105 //************************************** 106 // Includes 107 //************************************** 108 #include <stdio.h> 109 #include <stdlib.h> 110 #include <unistd.h> 111 #include <string.h> 112 113 #include "hammer2_lz4.h" 114 115 //************************************** 116 // Basic Types 117 //************************************** 118 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 119 # include <sys/stdint.h> 120 typedef uint8_t BYTE; 121 typedef uint16_t U16; 122 typedef uint32_t U32; 123 typedef int32_t S32; 124 typedef uint64_t U64; 125 #else 126 typedef unsigned char BYTE; 127 typedef unsigned short U16; 128 typedef unsigned int U32; 129 typedef signed int S32; 130 typedef unsigned long long U64; 131 #endif 132 133 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) 134 # define _PACKED __attribute__ ((packed)) 135 #else 136 # define _PACKED 137 #endif 138 139 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 140 # pragma pack(push, 1) 141 #endif 142 143 typedef struct _U16_S { U16 v; } _PACKED U16_S; 144 typedef struct _U32_S { U32 v; } _PACKED U32_S; 145 typedef struct _U64_S { U64 v; } _PACKED U64_S; 146 147 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) 148 # pragma pack(pop) 149 #endif 150 151 #define A64(x) (((U64_S *)(x))->v) 152 #define A32(x) (((U32_S *)(x))->v) 153 #define A16(x) (((U16_S *)(x))->v) 154 155 156 //************************************** 157 // Constants 158 //************************************** 159 #define HASHTABLESIZE (1 << MEMORY_USAGE) 160 161 #define MINMATCH 4 162 163 #define COPYLENGTH 8 164 #define LASTLITERALS 5 165 #define MFLIMIT (COPYLENGTH+MINMATCH) 166 #define MINLENGTH (MFLIMIT+1) 167 168 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1)) 169 #define SKIPSTRENGTH 6 170 // Increasing this value will make the compression run slower on 171 // incompressible data 172 173 #define MAXD_LOG 16 174 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 175 176 #define ML_BITS 4 177 #define ML_MASK ((1U<<ML_BITS)-1) 178 #define RUN_BITS (8-ML_BITS) 179 #define RUN_MASK ((1U<<RUN_BITS)-1) 180 181 182 //************************************** 183 // Architecture-specific macros 184 //************************************** 185 #if LZ4_ARCH64 // 64-bit 186 # define STEPSIZE 8 187 # define UARCH U64 188 # define AARCH A64 189 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; 190 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) 191 # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e) 192 # define HTYPE U32 193 # define INITBASE(base) BYTE* base = ip 194 #else // 32-bit 195 # define STEPSIZE 4 196 # define UARCH U32 197 # define AARCH A32 198 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; 199 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); 200 # define LZ4_SECURECOPY LZ4_WILDCOPY 201 # define HTYPE BYTE* 202 # define INITBASE(base) int base = 0 203 #endif 204 205 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 206 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); 207 v = lz4_bswap16(v); 208 d = (s) - v; } 209 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); 210 v = lz4_bswap16(v); 211 A16(p) = v; 212 p+=2; } 213 #else // Little Endian 214 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } 215 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } 216 #endif 217 218 219 //************************************** 220 // Macros 221 //************************************** 222 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); 223 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; } 224 225 226 //**************************** 227 // Private functions 228 //**************************** 229 #if LZ4_ARCH64 230 231 static 232 inline 233 int 234 LZ4_NbCommonBytes (register U64 val) 235 { 236 #if defined(LZ4_BIG_ENDIAN) 237 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 238 unsigned long r = 0; 239 _BitScanReverse64( &r, val ); 240 return (int)(r>>3); 241 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 242 return (__builtin_clzll(val) >> 3); 243 #else 244 int r; 245 if (!(val>>32)) { r=4; } else { r=0; val>>=32; } 246 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 247 r += (!val); 248 return r; 249 #endif 250 #else 251 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 252 unsigned long r = 0; 253 _BitScanForward64( &r, val ); 254 return (int)(r>>3); 255 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 256 return (__builtin_ctzll(val) >> 3); 257 #else 258 static int DeBruijnBytePos[64] = { 259 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 260 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 261 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 262 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 263 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 264 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 265 7, 6, 7, 7 }; 266 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; 267 #endif 268 #endif 269 } 270 271 #else 272 273 static 274 inline 275 int 276 LZ4_NbCommonBytes (register U32 val) 277 { 278 #if defined(LZ4_BIG_ENDIAN) 279 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 280 unsigned long r = 0; 281 _BitScanReverse( &r, val ); 282 return (int)(r>>3); 283 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 284 return (__builtin_clz(val) >> 3); 285 # else 286 int r; 287 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 288 r += (!val); 289 return r; 290 # endif 291 #else 292 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 293 unsigned long r; 294 _BitScanForward( &r, val ); 295 return (int)(r>>3); 296 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) 297 return (__builtin_ctz(val) >> 3); 298 # else 299 static int DeBruijnBytePos[32] = { 300 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 301 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 302 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 303 1, 1 }; 304 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 305 # endif 306 #endif 307 } 308 309 #endif 310 311 312 313 //****************************** 314 // Compression functions 315 //****************************** 316 317 #include "hammer2_lz4_encoder.h" 318 319 /* 320 void* LZ4_createHeapMemory(); 321 int LZ4_freeHeapMemory(void* ctx); 322 323 Used to allocate and free hashTable memory 324 to be used by the LZ4_compress_heap* family of functions. 325 LZ4_createHeapMemory() returns NULL is memory allocation fails. 326 */ 327 void* 328 LZ4_create(void) 329 { 330 return malloc(HASHTABLESIZE); 331 } 332 333 int 334 LZ4_free(void* ctx) 335 { 336 free(ctx); 337 return 0; 338 } 339 340 int 341 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize) 342 { 343 void* ctx = LZ4_create(); 344 int result; 345 if (ctx == NULL) return 0; // Failed allocation => compression not done 346 if (inputSize < LZ4_64KLIMIT) 347 result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest, 348 inputSize, maxOutputSize); 349 else result = LZ4_compress_heap_limitedOutput(ctx, source, dest, 350 inputSize, maxOutputSize); 351 LZ4_free(ctx); 352 return result; 353 } 354 355 356 //**************************** 357 // Decompression functions 358 //**************************** 359 360 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive; 361 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive; 362 typedef enum { full = 0, partial = 1 } exit_directive; 363 364 365 // This generic decompression function cover all use cases. 366 // It shall be instanciated several times, using different sets of directives 367 // Note that it is essential this generic function is really inlined, 368 // in order to remove useless branches during compilation optimisation. 369 static 370 inline 371 int LZ4_decompress_generic( 372 char* source, 373 char* dest, 374 int inputSize, // 375 int outputSize, 376 // OutputSize must be != 0; if endOnInput==endOnInputSize, 377 // this value is the max size of Output Buffer. 378 379 int endOnInput, // endOnOutputSize, endOnInputSize 380 int prefix64k, // noPrefix, withPrefix 381 int partialDecoding, // full, partial 382 int targetOutputSize // only used if partialDecoding==partial 383 ) 384 { 385 // Local Variables 386 BYTE* restrict ip = (BYTE*) source; 387 BYTE* ref; 388 BYTE* iend = ip + inputSize; 389 390 BYTE* op = (BYTE*) dest; 391 BYTE* oend = op + outputSize; 392 BYTE* cpy; 393 BYTE* oexit = op + targetOutputSize; 394 395 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 396 #if LZ4_ARCH64 397 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 398 #endif 399 400 401 // Special case 402 if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; 403 // targetOutputSize too large, better decode everything 404 if unlikely(outputSize==0) goto _output_error; 405 // Empty output buffer 406 407 408 // Main Loop 409 while (1) 410 { 411 unsigned token; 412 size_t length; 413 414 // get runlength 415 token = *ip++; 416 if ((length=(token>>ML_BITS)) == RUN_MASK) 417 { 418 unsigned s=255; 419 while (((endOnInput)?ip<iend:1) && (s==255)) 420 { 421 s = *ip++; 422 length += s; 423 } 424 } 425 426 // copy literals 427 cpy = op+length; 428 if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) 429 || (ip+length>iend-(2+1+LASTLITERALS))) ) 430 || ((!endOnInput) && (cpy>oend-COPYLENGTH))) 431 { 432 if (partialDecoding) 433 { 434 if (cpy > oend) goto _output_error; 435 // Error : write attempt beyond end of output buffer 436 if ((endOnInput) && (ip+length > iend)) goto _output_error; 437 // Error : read attempt beyond end of input buffer 438 } 439 else 440 { 441 if ((!endOnInput) && (cpy != oend)) goto _output_error; 442 // Error : block decoding must stop exactly there, 443 // due to parsing restrictions 444 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) 445 goto _output_error; 446 // Error : not enough place for another match (min 4) + 5 literals 447 } 448 memcpy(op, ip, length); 449 ip += length; 450 op += length; 451 break; 452 // Necessarily EOF, due to parsing restrictions 453 } 454 LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; 455 456 // get offset 457 LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; 458 if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest)) 459 goto _output_error; // Error : offset outside destination buffer 460 461 // get matchlength 462 if ((length=(token&ML_MASK)) == ML_MASK) 463 { 464 while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1) 465 // A minimum nb of input bytes must remain for LASTLITERALS + token 466 { 467 unsigned s = *ip++; 468 length += s; 469 if (s==255) continue; 470 break; 471 } 472 } 473 474 // copy repeated sequence 475 if unlikely((op-ref)<STEPSIZE) 476 { 477 #if LZ4_ARCH64 478 size_t dec64 = dec64table[op-ref]; 479 #else 480 const size_t dec64 = 0; 481 #endif 482 op[0] = ref[0]; 483 op[1] = ref[1]; 484 op[2] = ref[2]; 485 op[3] = ref[3]; 486 op += 4, ref += 4; ref -= dec32table[op-ref]; 487 A32(op) = A32(ref); 488 op += STEPSIZE-4; ref -= dec64; 489 } else { LZ4_COPYSTEP(ref,op); } 490 cpy = op + length - (STEPSIZE-4); 491 492 if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) 493 { 494 if (cpy > oend-LASTLITERALS) goto _output_error; 495 // Error : last 5 bytes must be literals 496 LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); 497 while(op<cpy) *op++=*ref++; 498 op=cpy; 499 continue; 500 } 501 LZ4_WILDCOPY(ref, op, cpy); 502 op=cpy; // correction 503 } 504 505 // end of decoding 506 if (endOnInput) 507 return (int) (((char*)op)-dest); // Nb of output bytes decoded 508 else 509 return (int) (((char*)ip)-source); // Nb of input bytes read 510 511 // Overflow error detected 512 _output_error: 513 return (int) (-(((char*)ip)-source))-1; 514 } 515 516 517 int 518 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize) 519 { 520 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, 521 endOnInputSize, noPrefix, full, 0); 522 } 523