1 /* 2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_compress_internal.h" 12 #include "zstd_lazy.h" 13 14 15 /*-************************************* 16 * Binary Tree search 17 ***************************************/ 18 19 static void 20 ZSTD_updateDUBT(ZSTD_matchState_t* ms, 21 const BYTE* ip, const BYTE* iend, 22 U32 mls) 23 { 24 const ZSTD_compressionParameters* const cParams = &ms->cParams; 25 U32* const hashTable = ms->hashTable; 26 U32 const hashLog = cParams->hashLog; 27 28 U32* const bt = ms->chainTable; 29 U32 const btLog = cParams->chainLog - 1; 30 U32 const btMask = (1 << btLog) - 1; 31 32 const BYTE* const base = ms->window.base; 33 U32 const target = (U32)(ip - base); 34 U32 idx = ms->nextToUpdate; 35 36 if (idx != target) 37 DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", 38 idx, target, ms->window.dictLimit); 39 assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ 40 (void)iend; 41 42 assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ 43 for ( ; idx < target ; idx++) { 44 size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ 45 U32 const matchIndex = hashTable[h]; 46 47 U32* const nextCandidatePtr = bt + 2*(idx&btMask); 48 U32* const sortMarkPtr = nextCandidatePtr + 1; 49 50 DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); 51 hashTable[h] = idx; /* Update Hash Table */ 52 *nextCandidatePtr = matchIndex; /* update BT like a chain */ 53 *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; 54 } 55 ms->nextToUpdate = target; 56 } 57 58 59 /** ZSTD_insertDUBT1() : 60 * sort one already inserted but unsorted position 61 * assumption : current >= btlow == (current - btmask) 62 * doesn't fail */ 63 static void 64 ZSTD_insertDUBT1(ZSTD_matchState_t* ms, 65 U32 current, const BYTE* inputEnd, 66 U32 nbCompares, U32 btLow, 67 const ZSTD_dictMode_e dictMode) 68 { 69 const ZSTD_compressionParameters* const cParams = &ms->cParams; 70 U32* const bt = ms->chainTable; 71 U32 const btLog = cParams->chainLog - 1; 72 U32 const btMask = (1 << btLog) - 1; 73 size_t commonLengthSmaller=0, commonLengthLarger=0; 74 const BYTE* const base = ms->window.base; 75 const BYTE* const dictBase = ms->window.dictBase; 76 const U32 dictLimit = ms->window.dictLimit; 77 const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; 78 const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; 79 const BYTE* const dictEnd = dictBase + dictLimit; 80 const BYTE* const prefixStart = base + dictLimit; 81 const BYTE* match; 82 U32* smallerPtr = bt + 2*(current&btMask); 83 U32* largerPtr = smallerPtr + 1; 84 U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ 85 U32 dummy32; /* to be nullified at the end */ 86 U32 const windowValid = ms->window.lowLimit; 87 U32 const maxDistance = 1U << cParams->windowLog; 88 U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; 89 90 91 DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", 92 current, dictLimit, windowLow); 93 assert(current >= btLow); 94 assert(ip < iend); /* condition for ZSTD_count */ 95 96 while (nbCompares-- && (matchIndex > windowLow)) { 97 U32* const nextPtr = bt + 2*(matchIndex & btMask); 98 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 99 assert(matchIndex < current); 100 /* note : all candidates are now supposed sorted, 101 * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK 102 * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ 103 104 if ( (dictMode != ZSTD_extDict) 105 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ 106 || (current < dictLimit) /* both in extDict */) { 107 const BYTE* const mBase = ( (dictMode != ZSTD_extDict) 108 || (matchIndex+matchLength >= dictLimit)) ? 109 base : dictBase; 110 assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ 111 || (current < dictLimit) ); 112 match = mBase + matchIndex; 113 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 114 } else { 115 match = dictBase + matchIndex; 116 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 117 if (matchIndex+matchLength >= dictLimit) 118 match = base + matchIndex; /* preparation for next read of match[matchLength] */ 119 } 120 121 DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", 122 current, matchIndex, (U32)matchLength); 123 124 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 125 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ 126 } 127 128 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ 129 /* match is smaller than current */ 130 *smallerPtr = matchIndex; /* update smaller idx */ 131 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 132 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 133 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", 134 matchIndex, btLow, nextPtr[1]); 135 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ 136 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ 137 } else { 138 /* match is larger than current */ 139 *largerPtr = matchIndex; 140 commonLengthLarger = matchLength; 141 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ 142 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", 143 matchIndex, btLow, nextPtr[0]); 144 largerPtr = nextPtr; 145 matchIndex = nextPtr[0]; 146 } } 147 148 *smallerPtr = *largerPtr = 0; 149 } 150 151 152 static size_t 153 ZSTD_DUBT_findBetterDictMatch ( 154 ZSTD_matchState_t* ms, 155 const BYTE* const ip, const BYTE* const iend, 156 size_t* offsetPtr, 157 size_t bestLength, 158 U32 nbCompares, 159 U32 const mls, 160 const ZSTD_dictMode_e dictMode) 161 { 162 const ZSTD_matchState_t * const dms = ms->dictMatchState; 163 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; 164 const U32 * const dictHashTable = dms->hashTable; 165 U32 const hashLog = dmsCParams->hashLog; 166 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 167 U32 dictMatchIndex = dictHashTable[h]; 168 169 const BYTE* const base = ms->window.base; 170 const BYTE* const prefixStart = base + ms->window.dictLimit; 171 U32 const current = (U32)(ip-base); 172 const BYTE* const dictBase = dms->window.base; 173 const BYTE* const dictEnd = dms->window.nextSrc; 174 U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); 175 U32 const dictLowLimit = dms->window.lowLimit; 176 U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; 177 178 U32* const dictBt = dms->chainTable; 179 U32 const btLog = dmsCParams->chainLog - 1; 180 U32 const btMask = (1 << btLog) - 1; 181 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; 182 183 size_t commonLengthSmaller=0, commonLengthLarger=0; 184 185 (void)dictMode; 186 assert(dictMode == ZSTD_dictMatchState); 187 188 while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { 189 U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); 190 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 191 const BYTE* match = dictBase + dictMatchIndex; 192 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 193 if (dictMatchIndex+matchLength >= dictHighLimit) 194 match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ 195 196 if (matchLength > bestLength) { 197 U32 matchIndex = dictMatchIndex + dictIndexDelta; 198 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { 199 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", 200 current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); 201 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; 202 } 203 if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ 204 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 205 } 206 } 207 208 if (match[matchLength] < ip[matchLength]) { 209 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ 210 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 211 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 212 } else { 213 /* match is larger than current */ 214 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ 215 commonLengthLarger = matchLength; 216 dictMatchIndex = nextPtr[0]; 217 } 218 } 219 220 if (bestLength >= MINMATCH) { 221 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; 222 DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 223 current, (U32)bestLength, (U32)*offsetPtr, mIndex); 224 } 225 return bestLength; 226 227 } 228 229 230 static size_t 231 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, 232 const BYTE* const ip, const BYTE* const iend, 233 size_t* offsetPtr, 234 U32 const mls, 235 const ZSTD_dictMode_e dictMode) 236 { 237 const ZSTD_compressionParameters* const cParams = &ms->cParams; 238 U32* const hashTable = ms->hashTable; 239 U32 const hashLog = cParams->hashLog; 240 size_t const h = ZSTD_hashPtr(ip, hashLog, mls); 241 U32 matchIndex = hashTable[h]; 242 243 const BYTE* const base = ms->window.base; 244 U32 const current = (U32)(ip-base); 245 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); 246 247 U32* const bt = ms->chainTable; 248 U32 const btLog = cParams->chainLog - 1; 249 U32 const btMask = (1 << btLog) - 1; 250 U32 const btLow = (btMask >= current) ? 0 : current - btMask; 251 U32 const unsortLimit = MAX(btLow, windowLow); 252 253 U32* nextCandidate = bt + 2*(matchIndex&btMask); 254 U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; 255 U32 nbCompares = 1U << cParams->searchLog; 256 U32 nbCandidates = nbCompares; 257 U32 previousCandidate = 0; 258 259 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); 260 assert(ip <= iend-8); /* required for h calculation */ 261 262 /* reach end of unsorted candidates list */ 263 while ( (matchIndex > unsortLimit) 264 && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) 265 && (nbCandidates > 1) ) { 266 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", 267 matchIndex); 268 *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ 269 previousCandidate = matchIndex; 270 matchIndex = *nextCandidate; 271 nextCandidate = bt + 2*(matchIndex&btMask); 272 unsortedMark = bt + 2*(matchIndex&btMask) + 1; 273 nbCandidates --; 274 } 275 276 /* nullify last candidate if it's still unsorted 277 * simplification, detrimental to compression ratio, beneficial for speed */ 278 if ( (matchIndex > unsortLimit) 279 && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { 280 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", 281 matchIndex); 282 *nextCandidate = *unsortedMark = 0; 283 } 284 285 /* batch sort stacked candidates */ 286 matchIndex = previousCandidate; 287 while (matchIndex) { /* will end on matchIndex == 0 */ 288 U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; 289 U32 const nextCandidateIdx = *nextCandidateIdxPtr; 290 ZSTD_insertDUBT1(ms, matchIndex, iend, 291 nbCandidates, unsortLimit, dictMode); 292 matchIndex = nextCandidateIdx; 293 nbCandidates++; 294 } 295 296 /* find longest match */ 297 { size_t commonLengthSmaller = 0, commonLengthLarger = 0; 298 const BYTE* const dictBase = ms->window.dictBase; 299 const U32 dictLimit = ms->window.dictLimit; 300 const BYTE* const dictEnd = dictBase + dictLimit; 301 const BYTE* const prefixStart = base + dictLimit; 302 U32* smallerPtr = bt + 2*(current&btMask); 303 U32* largerPtr = bt + 2*(current&btMask) + 1; 304 U32 matchEndIdx = current + 8 + 1; 305 U32 dummy32; /* to be nullified at the end */ 306 size_t bestLength = 0; 307 308 matchIndex = hashTable[h]; 309 hashTable[h] = current; /* Update Hash Table */ 310 311 while (nbCompares-- && (matchIndex > windowLow)) { 312 U32* const nextPtr = bt + 2*(matchIndex & btMask); 313 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ 314 const BYTE* match; 315 316 if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { 317 match = base + matchIndex; 318 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); 319 } else { 320 match = dictBase + matchIndex; 321 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); 322 if (matchIndex+matchLength >= dictLimit) 323 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ 324 } 325 326 if (matchLength > bestLength) { 327 if (matchLength > matchEndIdx - matchIndex) 328 matchEndIdx = matchIndex + (U32)matchLength; 329 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) 330 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; 331 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 332 if (dictMode == ZSTD_dictMatchState) { 333 nbCompares = 0; /* in addition to avoiding checking any 334 * further in this loop, make sure we 335 * skip checking in the dictionary. */ 336 } 337 break; /* drop, to guarantee consistency (miss a little bit of compression) */ 338 } 339 } 340 341 if (match[matchLength] < ip[matchLength]) { 342 /* match is smaller than current */ 343 *smallerPtr = matchIndex; /* update smaller idx */ 344 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ 345 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 346 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ 347 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ 348 } else { 349 /* match is larger than current */ 350 *largerPtr = matchIndex; 351 commonLengthLarger = matchLength; 352 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ 353 largerPtr = nextPtr; 354 matchIndex = nextPtr[0]; 355 } } 356 357 *smallerPtr = *largerPtr = 0; 358 359 if (dictMode == ZSTD_dictMatchState && nbCompares) { 360 bestLength = ZSTD_DUBT_findBetterDictMatch( 361 ms, ip, iend, 362 offsetPtr, bestLength, nbCompares, 363 mls, dictMode); 364 } 365 366 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ 367 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 368 if (bestLength >= MINMATCH) { 369 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; 370 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 371 current, (U32)bestLength, (U32)*offsetPtr, mIndex); 372 } 373 return bestLength; 374 } 375 } 376 377 378 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ 379 FORCE_INLINE_TEMPLATE size_t 380 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, 381 const BYTE* const ip, const BYTE* const iLimit, 382 size_t* offsetPtr, 383 const U32 mls /* template */, 384 const ZSTD_dictMode_e dictMode) 385 { 386 DEBUGLOG(7, "ZSTD_BtFindBestMatch"); 387 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ 388 ZSTD_updateDUBT(ms, ip, iLimit, mls); 389 return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); 390 } 391 392 393 static size_t 394 ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, 395 const BYTE* ip, const BYTE* const iLimit, 396 size_t* offsetPtr) 397 { 398 switch(ms->cParams.minMatch) 399 { 400 default : /* includes case 3 */ 401 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); 402 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); 403 case 7 : 404 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); 405 } 406 } 407 408 409 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( 410 ZSTD_matchState_t* ms, 411 const BYTE* ip, const BYTE* const iLimit, 412 size_t* offsetPtr) 413 { 414 switch(ms->cParams.minMatch) 415 { 416 default : /* includes case 3 */ 417 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); 418 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); 419 case 7 : 420 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); 421 } 422 } 423 424 425 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( 426 ZSTD_matchState_t* ms, 427 const BYTE* ip, const BYTE* const iLimit, 428 size_t* offsetPtr) 429 { 430 switch(ms->cParams.minMatch) 431 { 432 default : /* includes case 3 */ 433 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); 434 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); 435 case 7 : 436 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); 437 } 438 } 439 440 441 442 /* ********************************* 443 * Hash Chain 444 ***********************************/ 445 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] 446 447 /* Update chains up to ip (excluded) 448 Assumption : always within prefix (i.e. not within extDict) */ 449 static U32 ZSTD_insertAndFindFirstIndex_internal( 450 ZSTD_matchState_t* ms, 451 const ZSTD_compressionParameters* const cParams, 452 const BYTE* ip, U32 const mls) 453 { 454 U32* const hashTable = ms->hashTable; 455 const U32 hashLog = cParams->hashLog; 456 U32* const chainTable = ms->chainTable; 457 const U32 chainMask = (1 << cParams->chainLog) - 1; 458 const BYTE* const base = ms->window.base; 459 const U32 target = (U32)(ip - base); 460 U32 idx = ms->nextToUpdate; 461 462 while(idx < target) { /* catch up */ 463 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); 464 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; 465 hashTable[h] = idx; 466 idx++; 467 } 468 469 ms->nextToUpdate = target; 470 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; 471 } 472 473 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { 474 const ZSTD_compressionParameters* const cParams = &ms->cParams; 475 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); 476 } 477 478 479 /* inlining is important to hardwire a hot branch (template emulation) */ 480 FORCE_INLINE_TEMPLATE 481 size_t ZSTD_HcFindBestMatch_generic ( 482 ZSTD_matchState_t* ms, 483 const BYTE* const ip, const BYTE* const iLimit, 484 size_t* offsetPtr, 485 const U32 mls, const ZSTD_dictMode_e dictMode) 486 { 487 const ZSTD_compressionParameters* const cParams = &ms->cParams; 488 U32* const chainTable = ms->chainTable; 489 const U32 chainSize = (1 << cParams->chainLog); 490 const U32 chainMask = chainSize-1; 491 const BYTE* const base = ms->window.base; 492 const BYTE* const dictBase = ms->window.dictBase; 493 const U32 dictLimit = ms->window.dictLimit; 494 const BYTE* const prefixStart = base + dictLimit; 495 const BYTE* const dictEnd = dictBase + dictLimit; 496 const U32 current = (U32)(ip-base); 497 const U32 maxDistance = 1U << cParams->windowLog; 498 const U32 lowestValid = ms->window.lowLimit; 499 const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; 500 const U32 isDictionary = (ms->loadedDictEnd != 0); 501 const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; 502 const U32 minChain = current > chainSize ? current - chainSize : 0; 503 U32 nbAttempts = 1U << cParams->searchLog; 504 size_t ml=4-1; 505 506 /* HC4 match finder */ 507 U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); 508 509 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { 510 size_t currentMl=0; 511 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { 512 const BYTE* const match = base + matchIndex; 513 assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ 514 if (match[ml] == ip[ml]) /* potentially better */ 515 currentMl = ZSTD_count(ip, match, iLimit); 516 } else { 517 const BYTE* const match = dictBase + matchIndex; 518 assert(match+4 <= dictEnd); 519 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 520 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; 521 } 522 523 /* save best solution */ 524 if (currentMl > ml) { 525 ml = currentMl; 526 *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; 527 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 528 } 529 530 if (matchIndex <= minChain) break; 531 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); 532 } 533 534 if (dictMode == ZSTD_dictMatchState) { 535 const ZSTD_matchState_t* const dms = ms->dictMatchState; 536 const U32* const dmsChainTable = dms->chainTable; 537 const U32 dmsChainSize = (1 << dms->cParams.chainLog); 538 const U32 dmsChainMask = dmsChainSize - 1; 539 const U32 dmsLowestIndex = dms->window.dictLimit; 540 const BYTE* const dmsBase = dms->window.base; 541 const BYTE* const dmsEnd = dms->window.nextSrc; 542 const U32 dmsSize = (U32)(dmsEnd - dmsBase); 543 const U32 dmsIndexDelta = dictLimit - dmsSize; 544 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; 545 546 matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; 547 548 for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { 549 size_t currentMl=0; 550 const BYTE* const match = dmsBase + matchIndex; 551 assert(match+4 <= dmsEnd); 552 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ 553 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; 554 555 /* save best solution */ 556 if (currentMl > ml) { 557 ml = currentMl; 558 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; 559 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 560 } 561 562 if (matchIndex <= dmsMinChain) break; 563 matchIndex = dmsChainTable[matchIndex & dmsChainMask]; 564 } 565 } 566 567 return ml; 568 } 569 570 571 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( 572 ZSTD_matchState_t* ms, 573 const BYTE* ip, const BYTE* const iLimit, 574 size_t* offsetPtr) 575 { 576 switch(ms->cParams.minMatch) 577 { 578 default : /* includes case 3 */ 579 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); 580 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); 581 case 7 : 582 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); 583 } 584 } 585 586 587 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( 588 ZSTD_matchState_t* ms, 589 const BYTE* ip, const BYTE* const iLimit, 590 size_t* offsetPtr) 591 { 592 switch(ms->cParams.minMatch) 593 { 594 default : /* includes case 3 */ 595 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); 596 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); 597 case 7 : 598 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); 599 } 600 } 601 602 603 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( 604 ZSTD_matchState_t* ms, 605 const BYTE* ip, const BYTE* const iLimit, 606 size_t* offsetPtr) 607 { 608 switch(ms->cParams.minMatch) 609 { 610 default : /* includes case 3 */ 611 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); 612 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); 613 case 7 : 614 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); 615 } 616 } 617 618 619 /* ******************************* 620 * Common parser - lazy strategy 621 *********************************/ 622 typedef enum { search_hashChain, search_binaryTree } searchMethod_e; 623 624 size_t 625 ZSTD_compressBlock_lazy_generic( 626 ZSTD_matchState_t* ms, seqStore_t* seqStore, 627 U32 rep[ZSTD_REP_NUM], 628 const void* src, size_t srcSize, 629 const searchMethod_e searchMethod, const U32 depth, 630 ZSTD_dictMode_e const dictMode) 631 { 632 const BYTE* const istart = (const BYTE*)src; 633 const BYTE* ip = istart; 634 const BYTE* anchor = istart; 635 const BYTE* const iend = istart + srcSize; 636 const BYTE* const ilimit = iend - 8; 637 const BYTE* const base = ms->window.base; 638 const U32 prefixLowestIndex = ms->window.dictLimit; 639 const BYTE* const prefixLowest = base + prefixLowestIndex; 640 641 typedef size_t (*searchMax_f)( 642 ZSTD_matchState_t* ms, 643 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); 644 searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? 645 (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS 646 : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : 647 (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS 648 : ZSTD_HcFindBestMatch_selectMLS); 649 U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; 650 651 const ZSTD_matchState_t* const dms = ms->dictMatchState; 652 const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? 653 dms->window.dictLimit : 0; 654 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? 655 dms->window.base : NULL; 656 const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? 657 dictBase + dictLowestIndex : NULL; 658 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? 659 dms->window.nextSrc : NULL; 660 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? 661 prefixLowestIndex - (U32)(dictEnd - dictBase) : 662 0; 663 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); 664 665 DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); 666 667 /* init */ 668 ip += (dictAndPrefixLength == 0); 669 if (dictMode == ZSTD_noDict) { 670 U32 const current = (U32)(ip - base); 671 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); 672 U32 const maxRep = current - windowLow; 673 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; 674 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; 675 } 676 if (dictMode == ZSTD_dictMatchState) { 677 /* dictMatchState repCode checks don't currently handle repCode == 0 678 * disabling. */ 679 assert(offset_1 <= dictAndPrefixLength); 680 assert(offset_2 <= dictAndPrefixLength); 681 } 682 683 /* Match Loop */ 684 #if defined(__GNUC__) && defined(__x86_64__) 685 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the 686 * code alignment is perturbed. To fix the instability align the loop on 32-bytes. 687 */ 688 __asm__(".p2align 5"); 689 #endif 690 while (ip < ilimit) { 691 size_t matchLength=0; 692 size_t offset=0; 693 const BYTE* start=ip+1; 694 695 /* check repCode */ 696 if (dictMode == ZSTD_dictMatchState) { 697 const U32 repIndex = (U32)(ip - base) + 1 - offset_1; 698 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState 699 && repIndex < prefixLowestIndex) ? 700 dictBase + (repIndex - dictIndexDelta) : 701 base + repIndex; 702 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 703 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 704 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 705 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 706 if (depth==0) goto _storeSequence; 707 } 708 } 709 if ( dictMode == ZSTD_noDict 710 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { 711 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 712 if (depth==0) goto _storeSequence; 713 } 714 715 /* first search (depth 0) */ 716 { size_t offsetFound = 999999999; 717 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); 718 if (ml2 > matchLength) 719 matchLength = ml2, start = ip, offset=offsetFound; 720 } 721 722 if (matchLength < 4) { 723 ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 724 continue; 725 } 726 727 /* let's try to find a better solution */ 728 if (depth>=1) 729 while (ip<ilimit) { 730 ip ++; 731 if ( (dictMode == ZSTD_noDict) 732 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 733 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 734 int const gain2 = (int)(mlRep * 3); 735 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 736 if ((mlRep >= 4) && (gain2 > gain1)) 737 matchLength = mlRep, offset = 0, start = ip; 738 } 739 if (dictMode == ZSTD_dictMatchState) { 740 const U32 repIndex = (U32)(ip - base) - offset_1; 741 const BYTE* repMatch = repIndex < prefixLowestIndex ? 742 dictBase + (repIndex - dictIndexDelta) : 743 base + repIndex; 744 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 745 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 746 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 747 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 748 int const gain2 = (int)(mlRep * 3); 749 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 750 if ((mlRep >= 4) && (gain2 > gain1)) 751 matchLength = mlRep, offset = 0, start = ip; 752 } 753 } 754 { size_t offset2=999999999; 755 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 756 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 757 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 758 if ((ml2 >= 4) && (gain2 > gain1)) { 759 matchLength = ml2, offset = offset2, start = ip; 760 continue; /* search a better one */ 761 } } 762 763 /* let's find an even better one */ 764 if ((depth==2) && (ip<ilimit)) { 765 ip ++; 766 if ( (dictMode == ZSTD_noDict) 767 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 768 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 769 int const gain2 = (int)(mlRep * 4); 770 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 771 if ((mlRep >= 4) && (gain2 > gain1)) 772 matchLength = mlRep, offset = 0, start = ip; 773 } 774 if (dictMode == ZSTD_dictMatchState) { 775 const U32 repIndex = (U32)(ip - base) - offset_1; 776 const BYTE* repMatch = repIndex < prefixLowestIndex ? 777 dictBase + (repIndex - dictIndexDelta) : 778 base + repIndex; 779 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 780 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 781 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 782 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 783 int const gain2 = (int)(mlRep * 4); 784 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 785 if ((mlRep >= 4) && (gain2 > gain1)) 786 matchLength = mlRep, offset = 0, start = ip; 787 } 788 } 789 { size_t offset2=999999999; 790 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 791 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 792 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 793 if ((ml2 >= 4) && (gain2 > gain1)) { 794 matchLength = ml2, offset = offset2, start = ip; 795 continue; 796 } } } 797 break; /* nothing found : store previous solution */ 798 } 799 800 /* NOTE: 801 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. 802 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which 803 * overflows the pointer, which is undefined behavior. 804 */ 805 /* catch up */ 806 if (offset) { 807 if (dictMode == ZSTD_noDict) { 808 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) 809 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ 810 { start--; matchLength++; } 811 } 812 if (dictMode == ZSTD_dictMatchState) { 813 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 814 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; 815 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; 816 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 817 } 818 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 819 } 820 /* store sequence */ 821 _storeSequence: 822 { size_t const litLength = start - anchor; 823 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); 824 anchor = ip = start + matchLength; 825 } 826 827 /* check immediate repcode */ 828 if (dictMode == ZSTD_dictMatchState) { 829 while (ip <= ilimit) { 830 U32 const current2 = (U32)(ip-base); 831 U32 const repIndex = current2 - offset_2; 832 const BYTE* repMatch = dictMode == ZSTD_dictMatchState 833 && repIndex < prefixLowestIndex ? 834 dictBase - dictIndexDelta + repIndex : 835 base + repIndex; 836 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) 837 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 838 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; 839 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; 840 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ 841 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 842 ip += matchLength; 843 anchor = ip; 844 continue; 845 } 846 break; 847 } 848 } 849 850 if (dictMode == ZSTD_noDict) { 851 while ( ((ip <= ilimit) & (offset_2>0)) 852 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { 853 /* store sequence */ 854 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 855 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ 856 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 857 ip += matchLength; 858 anchor = ip; 859 continue; /* faster when present ... (?) */ 860 } } } 861 862 /* Save reps for next block */ 863 rep[0] = offset_1 ? offset_1 : savedOffset; 864 rep[1] = offset_2 ? offset_2 : savedOffset; 865 866 /* Return the last literals size */ 867 return (size_t)(iend - anchor); 868 } 869 870 871 size_t ZSTD_compressBlock_btlazy2( 872 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 873 void const* src, size_t srcSize) 874 { 875 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); 876 } 877 878 size_t ZSTD_compressBlock_lazy2( 879 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 880 void const* src, size_t srcSize) 881 { 882 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); 883 } 884 885 size_t ZSTD_compressBlock_lazy( 886 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 887 void const* src, size_t srcSize) 888 { 889 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); 890 } 891 892 size_t ZSTD_compressBlock_greedy( 893 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 894 void const* src, size_t srcSize) 895 { 896 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); 897 } 898 899 size_t ZSTD_compressBlock_btlazy2_dictMatchState( 900 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 901 void const* src, size_t srcSize) 902 { 903 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); 904 } 905 906 size_t ZSTD_compressBlock_lazy2_dictMatchState( 907 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 908 void const* src, size_t srcSize) 909 { 910 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); 911 } 912 913 size_t ZSTD_compressBlock_lazy_dictMatchState( 914 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 915 void const* src, size_t srcSize) 916 { 917 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); 918 } 919 920 size_t ZSTD_compressBlock_greedy_dictMatchState( 921 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 922 void const* src, size_t srcSize) 923 { 924 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); 925 } 926 927 928 FORCE_INLINE_TEMPLATE 929 size_t ZSTD_compressBlock_lazy_extDict_generic( 930 ZSTD_matchState_t* ms, seqStore_t* seqStore, 931 U32 rep[ZSTD_REP_NUM], 932 const void* src, size_t srcSize, 933 const searchMethod_e searchMethod, const U32 depth) 934 { 935 const BYTE* const istart = (const BYTE*)src; 936 const BYTE* ip = istart; 937 const BYTE* anchor = istart; 938 const BYTE* const iend = istart + srcSize; 939 const BYTE* const ilimit = iend - 8; 940 const BYTE* const base = ms->window.base; 941 const U32 dictLimit = ms->window.dictLimit; 942 const BYTE* const prefixStart = base + dictLimit; 943 const BYTE* const dictBase = ms->window.dictBase; 944 const BYTE* const dictEnd = dictBase + dictLimit; 945 const BYTE* const dictStart = dictBase + ms->window.lowLimit; 946 const U32 windowLog = ms->cParams.windowLog; 947 948 typedef size_t (*searchMax_f)( 949 ZSTD_matchState_t* ms, 950 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); 951 searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; 952 953 U32 offset_1 = rep[0], offset_2 = rep[1]; 954 955 DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); 956 957 /* init */ 958 ip += (ip == prefixStart); 959 960 /* Match Loop */ 961 #if defined(__GNUC__) && defined(__x86_64__) 962 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the 963 * code alignment is perturbed. To fix the instability align the loop on 32-bytes. 964 */ 965 __asm__(".p2align 5"); 966 #endif 967 while (ip < ilimit) { 968 size_t matchLength=0; 969 size_t offset=0; 970 const BYTE* start=ip+1; 971 U32 current = (U32)(ip-base); 972 973 /* check repCode */ 974 { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); 975 const U32 repIndex = (U32)(current+1 - offset_1); 976 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 977 const BYTE* const repMatch = repBase + repIndex; 978 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 979 if (MEM_read32(ip+1) == MEM_read32(repMatch)) { 980 /* repcode detected we should take it */ 981 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 982 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; 983 if (depth==0) goto _storeSequence; 984 } } 985 986 /* first search (depth 0) */ 987 { size_t offsetFound = 999999999; 988 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); 989 if (ml2 > matchLength) 990 matchLength = ml2, start = ip, offset=offsetFound; 991 } 992 993 if (matchLength < 4) { 994 ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 995 continue; 996 } 997 998 /* let's try to find a better solution */ 999 if (depth>=1) 1000 while (ip<ilimit) { 1001 ip ++; 1002 current++; 1003 /* check repCode */ 1004 if (offset) { 1005 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); 1006 const U32 repIndex = (U32)(current - offset_1); 1007 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1008 const BYTE* const repMatch = repBase + repIndex; 1009 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 1010 if (MEM_read32(ip) == MEM_read32(repMatch)) { 1011 /* repcode detected */ 1012 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1013 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1014 int const gain2 = (int)(repLength * 3); 1015 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); 1016 if ((repLength >= 4) && (gain2 > gain1)) 1017 matchLength = repLength, offset = 0, start = ip; 1018 } } 1019 1020 /* search match, depth 1 */ 1021 { size_t offset2=999999999; 1022 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1023 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1024 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); 1025 if ((ml2 >= 4) && (gain2 > gain1)) { 1026 matchLength = ml2, offset = offset2, start = ip; 1027 continue; /* search a better one */ 1028 } } 1029 1030 /* let's find an even better one */ 1031 if ((depth==2) && (ip<ilimit)) { 1032 ip ++; 1033 current++; 1034 /* check repCode */ 1035 if (offset) { 1036 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); 1037 const U32 repIndex = (U32)(current - offset_1); 1038 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1039 const BYTE* const repMatch = repBase + repIndex; 1040 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 1041 if (MEM_read32(ip) == MEM_read32(repMatch)) { 1042 /* repcode detected */ 1043 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1044 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1045 int const gain2 = (int)(repLength * 4); 1046 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); 1047 if ((repLength >= 4) && (gain2 > gain1)) 1048 matchLength = repLength, offset = 0, start = ip; 1049 } } 1050 1051 /* search match, depth 2 */ 1052 { size_t offset2=999999999; 1053 size_t const ml2 = searchMax(ms, ip, iend, &offset2); 1054 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ 1055 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); 1056 if ((ml2 >= 4) && (gain2 > gain1)) { 1057 matchLength = ml2, offset = offset2, start = ip; 1058 continue; 1059 } } } 1060 break; /* nothing found : store previous solution */ 1061 } 1062 1063 /* catch up */ 1064 if (offset) { 1065 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); 1066 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; 1067 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; 1068 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 1069 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); 1070 } 1071 1072 /* store sequence */ 1073 _storeSequence: 1074 { size_t const litLength = start - anchor; 1075 ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); 1076 anchor = ip = start + matchLength; 1077 } 1078 1079 /* check immediate repcode */ 1080 while (ip <= ilimit) { 1081 const U32 repCurrent = (U32)(ip-base); 1082 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); 1083 const U32 repIndex = repCurrent - offset_2; 1084 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1085 const BYTE* const repMatch = repBase + repIndex; 1086 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ 1087 if (MEM_read32(ip) == MEM_read32(repMatch)) { 1088 /* repcode detected we should take it */ 1089 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 1090 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 1091 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ 1092 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); 1093 ip += matchLength; 1094 anchor = ip; 1095 continue; /* faster when present ... (?) */ 1096 } 1097 break; 1098 } } 1099 1100 /* Save reps for next block */ 1101 rep[0] = offset_1; 1102 rep[1] = offset_2; 1103 1104 /* Return the last literals size */ 1105 return (size_t)(iend - anchor); 1106 } 1107 1108 1109 size_t ZSTD_compressBlock_greedy_extDict( 1110 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1111 void const* src, size_t srcSize) 1112 { 1113 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); 1114 } 1115 1116 size_t ZSTD_compressBlock_lazy_extDict( 1117 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1118 void const* src, size_t srcSize) 1119 1120 { 1121 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); 1122 } 1123 1124 size_t ZSTD_compressBlock_lazy2_extDict( 1125 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1126 void const* src, size_t srcSize) 1127 1128 { 1129 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); 1130 } 1131 1132 size_t ZSTD_compressBlock_btlazy2_extDict( 1133 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1134 void const* src, size_t srcSize) 1135 1136 { 1137 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); 1138 } 1139