1 /* 2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #include "zstd_ldm.h" 12 13 #include "debug.h" 14 #include "zstd_fast.h" /* ZSTD_fillHashTable() */ 15 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ 16 17 #define LDM_BUCKET_SIZE_LOG 3 18 #define LDM_MIN_MATCH_LENGTH 64 19 #define LDM_HASH_RLOG 7 20 #define LDM_HASH_CHAR_OFFSET 10 21 22 void ZSTD_ldm_adjustParameters(ldmParams_t* params, 23 ZSTD_compressionParameters const* cParams) 24 { 25 params->windowLog = cParams->windowLog; 26 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); 27 DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); 28 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; 29 if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; 30 if (cParams->strategy >= ZSTD_btopt) { 31 /* Get out of the way of the optimal parser */ 32 U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); 33 assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); 34 assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); 35 params->minMatchLength = minMatch; 36 } 37 if (params->hashLog == 0) { 38 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); 39 assert(params->hashLog <= ZSTD_HASHLOG_MAX); 40 } 41 if (params->hashRateLog == 0) { 42 params->hashRateLog = params->windowLog < params->hashLog 43 ? 0 44 : params->windowLog - params->hashLog; 45 } 46 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); 47 } 48 49 size_t ZSTD_ldm_getTableSize(ldmParams_t params) 50 { 51 size_t const ldmHSize = ((size_t)1) << params.hashLog; 52 size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); 53 size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); 54 size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) 55 + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); 56 return params.enableLdm ? totalSize : 0; 57 } 58 59 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) 60 { 61 return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; 62 } 63 64 /** ZSTD_ldm_getSmallHash() : 65 * numBits should be <= 32 66 * If numBits==0, returns 0. 67 * @return : the most significant numBits of value. */ 68 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) 69 { 70 assert(numBits <= 32); 71 return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); 72 } 73 74 /** ZSTD_ldm_getChecksum() : 75 * numBitsToDiscard should be <= 32 76 * @return : the next most significant 32 bits after numBitsToDiscard */ 77 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) 78 { 79 assert(numBitsToDiscard <= 32); 80 return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; 81 } 82 83 /** ZSTD_ldm_getTag() ; 84 * Given the hash, returns the most significant numTagBits bits 85 * after (32 + hbits) bits. 86 * 87 * If there are not enough bits remaining, return the last 88 * numTagBits bits. */ 89 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) 90 { 91 assert(numTagBits < 32 && hbits <= 32); 92 if (32 - hbits < numTagBits) { 93 return hash & (((U32)1 << numTagBits) - 1); 94 } else { 95 return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); 96 } 97 } 98 99 /** ZSTD_ldm_getBucket() : 100 * Returns a pointer to the start of the bucket associated with hash. */ 101 static ldmEntry_t* ZSTD_ldm_getBucket( 102 ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) 103 { 104 return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); 105 } 106 107 /** ZSTD_ldm_insertEntry() : 108 * Insert the entry with corresponding hash into the hash table */ 109 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, 110 size_t const hash, const ldmEntry_t entry, 111 ldmParams_t const ldmParams) 112 { 113 BYTE* const bucketOffsets = ldmState->bucketOffsets; 114 *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; 115 bucketOffsets[hash]++; 116 bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; 117 } 118 119 /** ZSTD_ldm_makeEntryAndInsertByTag() : 120 * 121 * Gets the small hash, checksum, and tag from the rollingHash. 122 * 123 * If the tag matches (1 << ldmParams.hashRateLog)-1, then 124 * creates an ldmEntry from the offset, and inserts it into the hash table. 125 * 126 * hBits is the length of the small hash, which is the most significant hBits 127 * of rollingHash. The checksum is the next 32 most significant bits, followed 128 * by ldmParams.hashRateLog bits that make up the tag. */ 129 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, 130 U64 const rollingHash, 131 U32 const hBits, 132 U32 const offset, 133 ldmParams_t const ldmParams) 134 { 135 U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); 136 U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; 137 if (tag == tagMask) { 138 U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); 139 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); 140 ldmEntry_t entry; 141 entry.offset = offset; 142 entry.checksum = checksum; 143 ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); 144 } 145 } 146 147 /** ZSTD_ldm_countBackwardsMatch() : 148 * Returns the number of bytes that match backwards before pIn and pMatch. 149 * 150 * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ 151 static size_t ZSTD_ldm_countBackwardsMatch( 152 const BYTE* pIn, const BYTE* pAnchor, 153 const BYTE* pMatch, const BYTE* pBase) 154 { 155 size_t matchLength = 0; 156 while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { 157 pIn--; 158 pMatch--; 159 matchLength++; 160 } 161 return matchLength; 162 } 163 164 /** ZSTD_ldm_fillFastTables() : 165 * 166 * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. 167 * This is similar to ZSTD_loadDictionaryContent. 168 * 169 * The tables for the other strategies are filled within their 170 * block compressors. */ 171 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, 172 void const* end) 173 { 174 const BYTE* const iend = (const BYTE*)end; 175 176 switch(ms->cParams.strategy) 177 { 178 case ZSTD_fast: 179 ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); 180 break; 181 182 case ZSTD_dfast: 183 ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); 184 break; 185 186 case ZSTD_greedy: 187 case ZSTD_lazy: 188 case ZSTD_lazy2: 189 case ZSTD_btlazy2: 190 case ZSTD_btopt: 191 case ZSTD_btultra: 192 case ZSTD_btultra2: 193 break; 194 default: 195 assert(0); /* not possible : not a valid strategy id */ 196 } 197 198 return 0; 199 } 200 201 /** ZSTD_ldm_fillLdmHashTable() : 202 * 203 * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). 204 * lastHash is the rolling hash that corresponds to lastHashed. 205 * 206 * Returns the rolling hash corresponding to position iend-1. */ 207 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, 208 U64 lastHash, const BYTE* lastHashed, 209 const BYTE* iend, const BYTE* base, 210 U32 hBits, ldmParams_t const ldmParams) 211 { 212 U64 rollingHash = lastHash; 213 const BYTE* cur = lastHashed + 1; 214 215 while (cur < iend) { 216 rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], 217 cur[ldmParams.minMatchLength-1], 218 state->hashPower); 219 ZSTD_ldm_makeEntryAndInsertByTag(state, 220 rollingHash, hBits, 221 (U32)(cur - base), ldmParams); 222 ++cur; 223 } 224 return rollingHash; 225 } 226 227 void ZSTD_ldm_fillHashTable( 228 ldmState_t* state, const BYTE* ip, 229 const BYTE* iend, ldmParams_t const* params) 230 { 231 DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); 232 if ((size_t)(iend - ip) >= params->minMatchLength) { 233 U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); 234 ZSTD_ldm_fillLdmHashTable( 235 state, startingHash, ip, iend - params->minMatchLength, state->window.base, 236 params->hashLog - params->bucketSizeLog, 237 *params); 238 } 239 } 240 241 242 /** ZSTD_ldm_limitTableUpdate() : 243 * 244 * Sets cctx->nextToUpdate to a position corresponding closer to anchor 245 * if it is far way 246 * (after a long match, only update tables a limited amount). */ 247 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) 248 { 249 U32 const current = (U32)(anchor - ms->window.base); 250 if (current > ms->nextToUpdate + 1024) { 251 ms->nextToUpdate = 252 current - MIN(512, current - ms->nextToUpdate - 1024); 253 } 254 } 255 256 static size_t ZSTD_ldm_generateSequences_internal( 257 ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, 258 ldmParams_t const* params, void const* src, size_t srcSize) 259 { 260 /* LDM parameters */ 261 int const extDict = ZSTD_window_hasExtDict(ldmState->window); 262 U32 const minMatchLength = params->minMatchLength; 263 U64 const hashPower = ldmState->hashPower; 264 U32 const hBits = params->hashLog - params->bucketSizeLog; 265 U32 const ldmBucketSize = 1U << params->bucketSizeLog; 266 U32 const hashRateLog = params->hashRateLog; 267 U32 const ldmTagMask = (1U << params->hashRateLog) - 1; 268 /* Prefix and extDict parameters */ 269 U32 const dictLimit = ldmState->window.dictLimit; 270 U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; 271 BYTE const* const base = ldmState->window.base; 272 BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; 273 BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; 274 BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; 275 BYTE const* const lowPrefixPtr = base + dictLimit; 276 /* Input bounds */ 277 BYTE const* const istart = (BYTE const*)src; 278 BYTE const* const iend = istart + srcSize; 279 BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); 280 /* Input positions */ 281 BYTE const* anchor = istart; 282 BYTE const* ip = istart; 283 /* Rolling hash */ 284 BYTE const* lastHashed = NULL; 285 U64 rollingHash = 0; 286 287 while (ip <= ilimit) { 288 size_t mLength; 289 U32 const current = (U32)(ip - base); 290 size_t forwardMatchLength = 0, backwardMatchLength = 0; 291 ldmEntry_t* bestEntry = NULL; 292 if (ip != istart) { 293 rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], 294 lastHashed[minMatchLength], 295 hashPower); 296 } else { 297 rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); 298 } 299 lastHashed = ip; 300 301 /* Do not insert and do not look for a match */ 302 if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { 303 ip++; 304 continue; 305 } 306 307 /* Get the best entry and compute the match lengths */ 308 { 309 ldmEntry_t* const bucket = 310 ZSTD_ldm_getBucket(ldmState, 311 ZSTD_ldm_getSmallHash(rollingHash, hBits), 312 *params); 313 ldmEntry_t* cur; 314 size_t bestMatchLength = 0; 315 U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); 316 317 for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { 318 size_t curForwardMatchLength, curBackwardMatchLength, 319 curTotalMatchLength; 320 if (cur->checksum != checksum || cur->offset <= lowestIndex) { 321 continue; 322 } 323 if (extDict) { 324 BYTE const* const curMatchBase = 325 cur->offset < dictLimit ? dictBase : base; 326 BYTE const* const pMatch = curMatchBase + cur->offset; 327 BYTE const* const matchEnd = 328 cur->offset < dictLimit ? dictEnd : iend; 329 BYTE const* const lowMatchPtr = 330 cur->offset < dictLimit ? dictStart : lowPrefixPtr; 331 332 curForwardMatchLength = ZSTD_count_2segments( 333 ip, pMatch, iend, 334 matchEnd, lowPrefixPtr); 335 if (curForwardMatchLength < minMatchLength) { 336 continue; 337 } 338 curBackwardMatchLength = 339 ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, 340 lowMatchPtr); 341 curTotalMatchLength = curForwardMatchLength + 342 curBackwardMatchLength; 343 } else { /* !extDict */ 344 BYTE const* const pMatch = base + cur->offset; 345 curForwardMatchLength = ZSTD_count(ip, pMatch, iend); 346 if (curForwardMatchLength < minMatchLength) { 347 continue; 348 } 349 curBackwardMatchLength = 350 ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, 351 lowPrefixPtr); 352 curTotalMatchLength = curForwardMatchLength + 353 curBackwardMatchLength; 354 } 355 356 if (curTotalMatchLength > bestMatchLength) { 357 bestMatchLength = curTotalMatchLength; 358 forwardMatchLength = curForwardMatchLength; 359 backwardMatchLength = curBackwardMatchLength; 360 bestEntry = cur; 361 } 362 } 363 } 364 365 /* No match found -- continue searching */ 366 if (bestEntry == NULL) { 367 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, 368 hBits, current, 369 *params); 370 ip++; 371 continue; 372 } 373 374 /* Match found */ 375 mLength = forwardMatchLength + backwardMatchLength; 376 ip -= backwardMatchLength; 377 378 { 379 /* Store the sequence: 380 * ip = current - backwardMatchLength 381 * The match is at (bestEntry->offset - backwardMatchLength) 382 */ 383 U32 const matchIndex = bestEntry->offset; 384 U32 const offset = current - matchIndex; 385 rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; 386 387 /* Out of sequence storage */ 388 if (rawSeqStore->size == rawSeqStore->capacity) 389 return ERROR(dstSize_tooSmall); 390 seq->litLength = (U32)(ip - anchor); 391 seq->matchLength = (U32)mLength; 392 seq->offset = offset; 393 rawSeqStore->size++; 394 } 395 396 /* Insert the current entry into the hash table */ 397 ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, 398 (U32)(lastHashed - base), 399 *params); 400 401 assert(ip + backwardMatchLength == lastHashed); 402 403 /* Fill the hash table from lastHashed+1 to ip+mLength*/ 404 /* Heuristic: don't need to fill the entire table at end of block */ 405 if (ip + mLength <= ilimit) { 406 rollingHash = ZSTD_ldm_fillLdmHashTable( 407 ldmState, rollingHash, lastHashed, 408 ip + mLength, base, hBits, *params); 409 lastHashed = ip + mLength - 1; 410 } 411 ip += mLength; 412 anchor = ip; 413 } 414 return iend - anchor; 415 } 416 417 /*! ZSTD_ldm_reduceTable() : 418 * reduce table indexes by `reducerValue` */ 419 static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, 420 U32 const reducerValue) 421 { 422 U32 u; 423 for (u = 0; u < size; u++) { 424 if (table[u].offset < reducerValue) table[u].offset = 0; 425 else table[u].offset -= reducerValue; 426 } 427 } 428 429 size_t ZSTD_ldm_generateSequences( 430 ldmState_t* ldmState, rawSeqStore_t* sequences, 431 ldmParams_t const* params, void const* src, size_t srcSize) 432 { 433 U32 const maxDist = 1U << params->windowLog; 434 BYTE const* const istart = (BYTE const*)src; 435 BYTE const* const iend = istart + srcSize; 436 size_t const kMaxChunkSize = 1 << 20; 437 size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); 438 size_t chunk; 439 size_t leftoverSize = 0; 440 441 assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); 442 /* Check that ZSTD_window_update() has been called for this chunk prior 443 * to passing it to this function. 444 */ 445 assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); 446 /* The input could be very large (in zstdmt), so it must be broken up into 447 * chunks to enforce the maximum distance and handle overflow correction. 448 */ 449 assert(sequences->pos <= sequences->size); 450 assert(sequences->size <= sequences->capacity); 451 for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { 452 BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; 453 size_t const remaining = (size_t)(iend - chunkStart); 454 BYTE const *const chunkEnd = 455 (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; 456 size_t const chunkSize = chunkEnd - chunkStart; 457 size_t newLeftoverSize; 458 size_t const prevSize = sequences->size; 459 460 assert(chunkStart < iend); 461 /* 1. Perform overflow correction if necessary. */ 462 if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { 463 U32 const ldmHSize = 1U << params->hashLog; 464 U32 const correction = ZSTD_window_correctOverflow( 465 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); 466 ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); 467 /* invalidate dictionaries on overflow correction */ 468 ldmState->loadedDictEnd = 0; 469 } 470 /* 2. We enforce the maximum offset allowed. 471 * 472 * kMaxChunkSize should be small enough that we don't lose too much of 473 * the window through early invalidation. 474 * TODO: * Test the chunk size. 475 * * Try invalidation after the sequence generation and test the 476 * the offset against maxDist directly. 477 * 478 * NOTE: Because of dictionaries + sequence splitting we MUST make sure 479 * that any offset used is valid at the END of the sequence, since it may 480 * be split into two sequences. This condition holds when using 481 * ZSTD_window_enforceMaxDist(), but if we move to checking offsets 482 * against maxDist directly, we'll have to carefully handle that case. 483 */ 484 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); 485 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ 486 newLeftoverSize = ZSTD_ldm_generateSequences_internal( 487 ldmState, sequences, params, chunkStart, chunkSize); 488 if (ZSTD_isError(newLeftoverSize)) 489 return newLeftoverSize; 490 /* 4. We add the leftover literals from previous iterations to the first 491 * newly generated sequence, or add the `newLeftoverSize` if none are 492 * generated. 493 */ 494 /* Prepend the leftover literals from the last call */ 495 if (prevSize < sequences->size) { 496 sequences->seq[prevSize].litLength += (U32)leftoverSize; 497 leftoverSize = newLeftoverSize; 498 } else { 499 assert(newLeftoverSize == chunkSize); 500 leftoverSize += chunkSize; 501 } 502 } 503 return 0; 504 } 505 506 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { 507 while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { 508 rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; 509 if (srcSize <= seq->litLength) { 510 /* Skip past srcSize literals */ 511 seq->litLength -= (U32)srcSize; 512 return; 513 } 514 srcSize -= seq->litLength; 515 seq->litLength = 0; 516 if (srcSize < seq->matchLength) { 517 /* Skip past the first srcSize of the match */ 518 seq->matchLength -= (U32)srcSize; 519 if (seq->matchLength < minMatch) { 520 /* The match is too short, omit it */ 521 if (rawSeqStore->pos + 1 < rawSeqStore->size) { 522 seq[1].litLength += seq[0].matchLength; 523 } 524 rawSeqStore->pos++; 525 } 526 return; 527 } 528 srcSize -= seq->matchLength; 529 seq->matchLength = 0; 530 rawSeqStore->pos++; 531 } 532 } 533 534 /** 535 * If the sequence length is longer than remaining then the sequence is split 536 * between this block and the next. 537 * 538 * Returns the current sequence to handle, or if the rest of the block should 539 * be literals, it returns a sequence with offset == 0. 540 */ 541 static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, 542 U32 const remaining, U32 const minMatch) 543 { 544 rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; 545 assert(sequence.offset > 0); 546 /* Likely: No partial sequence */ 547 if (remaining >= sequence.litLength + sequence.matchLength) { 548 rawSeqStore->pos++; 549 return sequence; 550 } 551 /* Cut the sequence short (offset == 0 ==> rest is literals). */ 552 if (remaining <= sequence.litLength) { 553 sequence.offset = 0; 554 } else if (remaining < sequence.litLength + sequence.matchLength) { 555 sequence.matchLength = remaining - sequence.litLength; 556 if (sequence.matchLength < minMatch) { 557 sequence.offset = 0; 558 } 559 } 560 /* Skip past `remaining` bytes for the future sequences. */ 561 ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); 562 return sequence; 563 } 564 565 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 566 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 567 void const* src, size_t srcSize) 568 { 569 const ZSTD_compressionParameters* const cParams = &ms->cParams; 570 unsigned const minMatch = cParams->minMatch; 571 ZSTD_blockCompressor const blockCompressor = 572 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); 573 /* Input bounds */ 574 BYTE const* const istart = (BYTE const*)src; 575 BYTE const* const iend = istart + srcSize; 576 /* Input positions */ 577 BYTE const* ip = istart; 578 579 DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); 580 assert(rawSeqStore->pos <= rawSeqStore->size); 581 assert(rawSeqStore->size <= rawSeqStore->capacity); 582 /* Loop through each sequence and apply the block compressor to the lits */ 583 while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { 584 /* maybeSplitSequence updates rawSeqStore->pos */ 585 rawSeq const sequence = maybeSplitSequence(rawSeqStore, 586 (U32)(iend - ip), minMatch); 587 int i; 588 /* End signal */ 589 if (sequence.offset == 0) 590 break; 591 592 assert(ip + sequence.litLength + sequence.matchLength <= iend); 593 594 /* Fill tables for block compressor */ 595 ZSTD_ldm_limitTableUpdate(ms, ip); 596 ZSTD_ldm_fillFastTables(ms, ip); 597 /* Run the block compressor */ 598 DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); 599 { 600 size_t const newLitLength = 601 blockCompressor(ms, seqStore, rep, ip, sequence.litLength); 602 ip += sequence.litLength; 603 /* Update the repcodes */ 604 for (i = ZSTD_REP_NUM - 1; i > 0; i--) 605 rep[i] = rep[i-1]; 606 rep[0] = sequence.offset; 607 /* Store the sequence */ 608 ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, 609 sequence.offset + ZSTD_REP_MOVE, 610 sequence.matchLength - MINMATCH); 611 ip += sequence.matchLength; 612 } 613 } 614 /* Fill the tables for the block compressor */ 615 ZSTD_ldm_limitTableUpdate(ms, ip); 616 ZSTD_ldm_fillFastTables(ms, ip); 617 /* Compress the last literals */ 618 return blockCompressor(ms, seqStore, rep, ip, iend - ip); 619 } 620