1 /* 2 LZ4 Encoder - Part of LZ4 compression algorithm 3 Copyright (C) 2011-2013, Yann Collet. 4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are 8 met: 9 10 * Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 * Redistributions in binary form must reproduce the above 13 copyright notice, this list of conditions and the following disclaimer 14 in the documentation and/or other materials provided with the 15 distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 You can contact the author at : 30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 31 - LZ4 source repository : http://code.google.com/p/lz4/ 32 */ 33 34 /* lz4_encoder.h must be included into lz4.c 35 The objective of this file is to create a single LZ4 compression function source 36 which will be instanciated multiple times with minor variations 37 depending on a set of #define. 38 */ 39 40 void* 41 LZ4_create(void); 42 int 43 LZ4_free(void* ctx); 44 45 int 46 LZ4_compress_heap_limitedOutput( 47 void* ctx, 48 char* source, 49 char* dest, 50 int inputSize, 51 int maxOutputSize); 52 53 int 54 LZ4_compress64k_heap_limitedOutput( 55 void* ctx, 56 char* source, 57 char* dest, 58 int inputSize, 59 int maxOutputSize); 60 61 62 //**************************** 63 // Local definitions 64 //**************************** 65 66 #ifdef COMPRESS_64K 67 # define HASHLOG (MEMORY_USAGE-1) 68 # define CURRENT_H_TYPE U16 69 # define CURRENTBASE(base) BYTE* base = ip 70 #else 71 # define HASHLOG (MEMORY_USAGE-2) 72 # define CURRENT_H_TYPE HTYPE 73 # define CURRENTBASE(base) INITBASE(base) 74 #endif 75 76 #define HASHTABLE_NBCELLS (1U<<HASHLOG) 77 #define LZ4_HASH(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG)) 78 #define LZ4_HASHVALUE(p) LZ4_HASH(A32(p)) 79 80 81 82 //**************************** 83 // Function code 84 //**************************** 85 86 int 87 LZ4_compress_heap_limitedOutput( 88 void* ctx, 89 char* source, 90 char* dest, 91 int inputSize, 92 int maxOutputSize) 93 { 94 CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx; 95 96 BYTE* ip = (BYTE*) source; 97 CURRENTBASE(base); 98 BYTE* anchor = ip; 99 BYTE* iend = ip + inputSize; 100 BYTE* mflimit = iend - MFLIMIT; 101 #define matchlimit (iend - LASTLITERALS) 102 103 BYTE* op = (BYTE*) dest; 104 BYTE* oend = op + maxOutputSize; 105 106 int length; 107 int skipStrength = SKIPSTRENGTH; 108 U32 forwardH; 109 110 111 // Init 112 if (inputSize<MINLENGTH) goto _last_literals; 113 114 memset((void*)HashTable, 0, HASHTABLESIZE); 115 116 // First Byte 117 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); 118 ip++; 119 forwardH = LZ4_HASHVALUE(ip); 120 121 // Main Loop 122 for ( ; ; ) 123 { 124 int findMatchAttempts = (1U << skipStrength) + 3; 125 BYTE* forwardIp = ip; 126 BYTE* ref; 127 BYTE* token; 128 129 // Find a match 130 do { 131 U32 h = forwardH; 132 int step = findMatchAttempts++ >> skipStrength; 133 ip = forwardIp; 134 forwardIp = ip + step; 135 136 if unlikely(forwardIp > mflimit) { 137 goto _last_literals; 138 } 139 140 forwardH = LZ4_HASHVALUE(forwardIp); 141 ref = base + HashTable[h]; 142 HashTable[h] = (CURRENT_H_TYPE)(ip - base); 143 144 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 145 146 // Catch up 147 while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { 148 ip--; 149 ref--; 150 } 151 152 // Encode Literal length 153 length = (int)(ip - anchor); 154 token = op++; 155 156 if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) 157 return 0; // Check output limit 158 159 if (length>=(int)RUN_MASK) 160 { 161 int len = length-RUN_MASK; 162 *token=(RUN_MASK<<ML_BITS); 163 for(; len >= 255 ; len-=255) 164 *op++ = 255; 165 *op++ = (BYTE)len; 166 } 167 else *token = (BYTE)(length<<ML_BITS); 168 169 // Copy Literals 170 LZ4_BLINDCOPY(anchor, op, length); 171 172 _next_match: 173 // Encode Offset 174 LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref)); 175 176 // Start Counting 177 ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified 178 anchor = ip; 179 while likely(ip<matchlimit-(STEPSIZE-1)) 180 { 181 UARCH diff = AARCH(ref) ^ AARCH(ip); 182 if (!diff) { 183 ip+=STEPSIZE; 184 ref+=STEPSIZE; 185 continue; 186 } 187 ip += LZ4_NbCommonBytes(diff); 188 goto _endCount; 189 } 190 if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { 191 ip+=4; 192 ref+=4; 193 } 194 if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { 195 ip+=2; 196 ref+=2; 197 } 198 if ((ip<matchlimit) && (*ref == *ip)) 199 ip++; 200 _endCount: 201 202 // Encode MatchLength 203 length = (int)(ip - anchor); 204 205 if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) 206 return 0; // Check output limit 207 208 if (length>=(int)ML_MASK) 209 { 210 *token += ML_MASK; 211 length -= ML_MASK; 212 for (; length > 509 ; length-=510) { 213 *op++ = 255; 214 *op++ = 255; 215 } 216 if (length >= 255) { 217 length-=255; 218 *op++ = 255; 219 } 220 *op++ = (BYTE)length; 221 } 222 else *token += (BYTE)length; 223 224 // Test end of chunk 225 if (ip > mflimit) { 226 anchor = ip; 227 break; 228 } 229 230 // Fill table 231 HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base); 232 233 // Test next position 234 ref = base + HashTable[LZ4_HASHVALUE(ip)]; 235 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); 236 if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) { 237 token = op++; 238 *token=0; 239 goto _next_match; 240 } 241 242 // Prepare next loop 243 anchor = ip++; 244 forwardH = LZ4_HASHVALUE(ip); 245 } 246 247 _last_literals: 248 // Encode Last Literals 249 { 250 int lastRun = (int)(iend - anchor); 251 252 if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) 253 return 0; // Check output limit 254 255 if (lastRun>=(int)RUN_MASK) { 256 *op++=(RUN_MASK<<ML_BITS); 257 lastRun-=RUN_MASK; 258 for(; lastRun >= 255 ; lastRun-=255) 259 *op++ = 255; 260 *op++ = (BYTE) lastRun; 261 } 262 else *op++ = (BYTE)(lastRun<<ML_BITS); 263 memcpy(op, anchor, iend - anchor); 264 op += iend-anchor; 265 } 266 267 // End 268 return (int) (((char*)op)-dest); 269 } 270 271 int 272 LZ4_compress64k_heap_limitedOutput( 273 void* ctx, 274 char* source, 275 char* dest, 276 int inputSize, 277 int maxOutputSize) 278 { 279 CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx; 280 281 BYTE* ip = (BYTE*) source; 282 CURRENTBASE(base); 283 BYTE* anchor = ip; 284 BYTE* iend = ip + inputSize; 285 BYTE* mflimit = iend - MFLIMIT; 286 #define matchlimit (iend - LASTLITERALS) 287 288 BYTE* op = (BYTE*) dest; 289 BYTE* oend = op + maxOutputSize; 290 291 int length; 292 int skipStrength = SKIPSTRENGTH; 293 U32 forwardH; 294 295 296 // Init 297 if (inputSize<MINLENGTH) goto _last_literals; 298 299 memset((void*)HashTable, 0, HASHTABLESIZE); 300 301 // First Byte 302 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); 303 ip++; 304 forwardH = LZ4_HASHVALUE(ip); 305 306 // Main Loop 307 for ( ; ; ) 308 { 309 int findMatchAttempts = (1U << skipStrength) + 3; 310 BYTE* forwardIp = ip; 311 BYTE* ref; 312 BYTE* token; 313 314 // Find a match 315 do { 316 U32 h = forwardH; 317 int step = findMatchAttempts++ >> skipStrength; 318 ip = forwardIp; 319 forwardIp = ip + step; 320 321 if unlikely(forwardIp > mflimit) { 322 goto _last_literals; 323 } 324 325 forwardH = LZ4_HASHVALUE(forwardIp); 326 ref = base + HashTable[h]; 327 HashTable[h] = (CURRENT_H_TYPE)(ip - base); 328 329 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 330 331 // Catch up 332 while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { 333 ip--; 334 ref--; 335 } 336 337 // Encode Literal length 338 length = (int)(ip - anchor); 339 token = op++; 340 341 if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) 342 return 0; // Check output limit 343 344 if (length>=(int)RUN_MASK) 345 { 346 int len = length-RUN_MASK; 347 *token=(RUN_MASK<<ML_BITS); 348 for(; len >= 255 ; len-=255) 349 *op++ = 255; 350 *op++ = (BYTE)len; 351 } 352 else *token = (BYTE)(length<<ML_BITS); 353 354 // Copy Literals 355 LZ4_BLINDCOPY(anchor, op, length); 356 357 _next_match: 358 // Encode Offset 359 LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref)); 360 361 // Start Counting 362 ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified 363 anchor = ip; 364 while likely(ip<matchlimit-(STEPSIZE-1)) 365 { 366 UARCH diff = AARCH(ref) ^ AARCH(ip); 367 if (!diff) { 368 ip+=STEPSIZE; 369 ref+=STEPSIZE; 370 continue; 371 } 372 ip += LZ4_NbCommonBytes(diff); 373 goto _endCount; 374 } 375 if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { 376 ip+=4; 377 ref+=4; 378 } 379 if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { 380 ip+=2; 381 ref+=2; 382 } 383 if ((ip<matchlimit) && (*ref == *ip)) 384 ip++; 385 _endCount: 386 387 // Encode MatchLength 388 length = (int)(ip - anchor); 389 390 if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) 391 return 0; // Check output limit 392 393 if (length>=(int)ML_MASK) 394 { 395 *token += ML_MASK; 396 length -= ML_MASK; 397 for (; length > 509 ; length-=510) { 398 *op++ = 255; 399 *op++ = 255; 400 } 401 if (length >= 255) { 402 length-=255; 403 *op++ = 255; 404 } 405 *op++ = (BYTE)length; 406 } 407 else *token += (BYTE)length; 408 409 // Test end of chunk 410 if (ip > mflimit) { 411 anchor = ip; 412 break; 413 } 414 415 // Fill table 416 HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base); 417 418 // Test next position 419 ref = base + HashTable[LZ4_HASHVALUE(ip)]; 420 HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); 421 if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) { 422 token = op++; 423 *token=0; 424 goto _next_match; 425 } 426 427 // Prepare next loop 428 anchor = ip++; 429 forwardH = LZ4_HASHVALUE(ip); 430 } 431 432 _last_literals: 433 // Encode Last Literals 434 { 435 int lastRun = (int)(iend - anchor); 436 437 if (((char*)op - dest) + lastRun + 1 + 438 ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) 439 return 0; // Check output limit 440 441 if (lastRun>=(int)RUN_MASK) { 442 *op++=(RUN_MASK<<ML_BITS); 443 lastRun-=RUN_MASK; 444 for(; lastRun >= 255 ; lastRun-=255) 445 *op++ = 255; 446 *op++ = (BYTE) lastRun; 447 } 448 else *op++ = (BYTE)(lastRun<<ML_BITS); 449 memcpy(op, anchor, iend - anchor); 450 op += iend-anchor; 451 } 452 453 // End 454 return (int) (((char*)op)-dest); 455 } 456 457 //**************************** 458 // Clean defines 459 //**************************** 460 461 // Locally Generated 462 #undef HASHLOG 463 #undef HASHTABLE_NBCELLS 464 #undef LZ4_HASH 465 #undef LZ4_HASHVALUE 466 #undef CURRENT_H_TYPE 467 #undef CURRENTBASE 468