xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_ldm.c (revision 0c16b537)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  */
9 
10 #include "zstd_ldm.h"
11 
12 #include "zstd_fast.h"          /* ZSTD_fillHashTable() */
13 #include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
14 
15 #define LDM_BUCKET_SIZE_LOG 3
16 #define LDM_MIN_MATCH_LENGTH 64
17 #define LDM_HASH_RLOG 7
18 #define LDM_HASH_CHAR_OFFSET 10
19 
20 size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
21 {
22     ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
23     params->enableLdm = enableLdm>0;
24     params->hashLog = 0;
25     params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
26     params->minMatchLength = LDM_MIN_MATCH_LENGTH;
27     params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
28     return 0;
29 }
30 
31 void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
32 {
33     if (params->hashLog == 0) {
34         params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
35         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
36     }
37     if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
38         params->hashEveryLog =
39                 windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
40     }
41     params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
42 }
43 
44 size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
45     size_t const ldmHSize = ((size_t)1) << hashLog;
46     size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
47     size_t const ldmBucketSize =
48         ((size_t)1) << (hashLog - ldmBucketSizeLog);
49     return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
50 }
51 
52 /** ZSTD_ldm_getSmallHash() :
53  *  numBits should be <= 32
54  *  If numBits==0, returns 0.
55  *  @return : the most significant numBits of value. */
56 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
57 {
58     assert(numBits <= 32);
59     return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
60 }
61 
62 /** ZSTD_ldm_getChecksum() :
63  *  numBitsToDiscard should be <= 32
64  *  @return : the next most significant 32 bits after numBitsToDiscard */
65 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
66 {
67     assert(numBitsToDiscard <= 32);
68     return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
69 }
70 
71 /** ZSTD_ldm_getTag() ;
72  *  Given the hash, returns the most significant numTagBits bits
73  *  after (32 + hbits) bits.
74  *
75  *  If there are not enough bits remaining, return the last
76  *  numTagBits bits. */
77 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
78 {
79     assert(numTagBits < 32 && hbits <= 32);
80     if (32 - hbits < numTagBits) {
81         return hash & (((U32)1 << numTagBits) - 1);
82     } else {
83         return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
84     }
85 }
86 
87 /** ZSTD_ldm_getBucket() :
88  *  Returns a pointer to the start of the bucket associated with hash. */
89 static ldmEntry_t* ZSTD_ldm_getBucket(
90         ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
91 {
92     return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
93 }
94 
95 /** ZSTD_ldm_insertEntry() :
96  *  Insert the entry with corresponding hash into the hash table */
97 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
98                                  size_t const hash, const ldmEntry_t entry,
99                                  ldmParams_t const ldmParams)
100 {
101     BYTE* const bucketOffsets = ldmState->bucketOffsets;
102     *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
103     bucketOffsets[hash]++;
104     bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
105 }
106 
107 /** ZSTD_ldm_makeEntryAndInsertByTag() :
108  *
109  *  Gets the small hash, checksum, and tag from the rollingHash.
110  *
111  *  If the tag matches (1 << ldmParams.hashEveryLog)-1, then
112  *  creates an ldmEntry from the offset, and inserts it into the hash table.
113  *
114  *  hBits is the length of the small hash, which is the most significant hBits
115  *  of rollingHash. The checksum is the next 32 most significant bits, followed
116  *  by ldmParams.hashEveryLog bits that make up the tag. */
117 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
118                                              U64 const rollingHash,
119                                              U32 const hBits,
120                                              U32 const offset,
121                                              ldmParams_t const ldmParams)
122 {
123     U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
124     U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
125     if (tag == tagMask) {
126         U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
127         U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
128         ldmEntry_t entry;
129         entry.offset = offset;
130         entry.checksum = checksum;
131         ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
132     }
133 }
134 
135 /** ZSTD_ldm_getRollingHash() :
136  *  Get a 64-bit hash using the first len bytes from buf.
137  *
138  *  Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
139  *  H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
140  *
141  *  where the constant a is defined to be prime8bytes.
142  *
143  *  The implementation adds an offset to each byte, so
144  *  H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
145 static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
146 {
147     U64 ret = 0;
148     U32 i;
149     for (i = 0; i < len; i++) {
150         ret *= prime8bytes;
151         ret += buf[i] + LDM_HASH_CHAR_OFFSET;
152     }
153     return ret;
154 }
155 
156 /** ZSTD_ldm_ipow() :
157  *  Return base^exp. */
158 static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
159 {
160     U64 ret = 1;
161     while (exp) {
162         if (exp & 1) { ret *= base; }
163         exp >>= 1;
164         base *= base;
165     }
166     return ret;
167 }
168 
169 U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
170     assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
171     return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
172 }
173 
174 /** ZSTD_ldm_updateHash() :
175  *  Updates hash by removing toRemove and adding toAdd. */
176 static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
177 {
178     hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
179     hash *= prime8bytes;
180     hash += toAdd + LDM_HASH_CHAR_OFFSET;
181     return hash;
182 }
183 
184 /** ZSTD_ldm_countBackwardsMatch() :
185  *  Returns the number of bytes that match backwards before pIn and pMatch.
186  *
187  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
188 static size_t ZSTD_ldm_countBackwardsMatch(
189             const BYTE* pIn, const BYTE* pAnchor,
190             const BYTE* pMatch, const BYTE* pBase)
191 {
192     size_t matchLength = 0;
193     while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
194         pIn--;
195         pMatch--;
196         matchLength++;
197     }
198     return matchLength;
199 }
200 
201 /** ZSTD_ldm_fillFastTables() :
202  *
203  *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
204  *  This is similar to ZSTD_loadDictionaryContent.
205  *
206  *  The tables for the other strategies are filled within their
207  *  block compressors. */
208 static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
209 {
210     const BYTE* const iend = (const BYTE*)end;
211     const U32 mls = zc->appliedParams.cParams.searchLength;
212 
213     switch(zc->appliedParams.cParams.strategy)
214     {
215     case ZSTD_fast:
216         ZSTD_fillHashTable(zc, iend, mls);
217         zc->nextToUpdate = (U32)(iend - zc->base);
218         break;
219 
220     case ZSTD_dfast:
221         ZSTD_fillDoubleHashTable(zc, iend, mls);
222         zc->nextToUpdate = (U32)(iend - zc->base);
223         break;
224 
225     case ZSTD_greedy:
226     case ZSTD_lazy:
227     case ZSTD_lazy2:
228     case ZSTD_btlazy2:
229     case ZSTD_btopt:
230     case ZSTD_btultra:
231         break;
232     default:
233         assert(0);  /* not possible : not a valid strategy id */
234     }
235 
236     return 0;
237 }
238 
239 /** ZSTD_ldm_fillLdmHashTable() :
240  *
241  *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
242  *  lastHash is the rolling hash that corresponds to lastHashed.
243  *
244  *  Returns the rolling hash corresponding to position iend-1. */
245 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
246                                      U64 lastHash, const BYTE* lastHashed,
247                                      const BYTE* iend, const BYTE* base,
248                                      U32 hBits, ldmParams_t const ldmParams)
249 {
250     U64 rollingHash = lastHash;
251     const BYTE* cur = lastHashed + 1;
252 
253     while (cur < iend) {
254         rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
255                                           cur[ldmParams.minMatchLength-1],
256                                           state->hashPower);
257         ZSTD_ldm_makeEntryAndInsertByTag(state,
258                                          rollingHash, hBits,
259                                          (U32)(cur - base), ldmParams);
260         ++cur;
261     }
262     return rollingHash;
263 }
264 
265 
266 /** ZSTD_ldm_limitTableUpdate() :
267  *
268  *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
269  *  if it is far way
270  *  (after a long match, only update tables a limited amount). */
271 static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
272 {
273     U32 const current = (U32)(anchor - cctx->base);
274     if (current > cctx->nextToUpdate + 1024) {
275         cctx->nextToUpdate =
276             current - MIN(512, current - cctx->nextToUpdate - 1024);
277     }
278 }
279 
280 typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
281 /* defined in zstd_compress.c */
282 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
283 
284 FORCE_INLINE_TEMPLATE
285 size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
286                                       const void* src, size_t srcSize)
287 {
288     ldmState_t* const ldmState = &(cctx->ldmState);
289     const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
290     const U64 hashPower = ldmState->hashPower;
291     const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
292     const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
293     const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
294     seqStore_t* const seqStorePtr = &(cctx->seqStore);
295     const BYTE* const base = cctx->base;
296     const BYTE* const istart = (const BYTE*)src;
297     const BYTE* ip = istart;
298     const BYTE* anchor = istart;
299     const U32   lowestIndex = cctx->dictLimit;
300     const BYTE* const lowest = base + lowestIndex;
301     const BYTE* const iend = istart + srcSize;
302     const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
303 
304     const ZSTD_blockCompressor blockCompressor =
305         ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
306     U32* const repToConfirm = seqStorePtr->repToConfirm;
307     U32 savedRep[ZSTD_REP_NUM];
308     U64 rollingHash = 0;
309     const BYTE* lastHashed = NULL;
310     size_t i, lastLiterals;
311 
312     /* Save seqStorePtr->rep and copy repToConfirm */
313     for (i = 0; i < ZSTD_REP_NUM; i++)
314         savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
315 
316     /* Main Search Loop */
317     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
318         size_t mLength;
319         U32 const current = (U32)(ip - base);
320         size_t forwardMatchLength = 0, backwardMatchLength = 0;
321         ldmEntry_t* bestEntry = NULL;
322         if (ip != istart) {
323             rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
324                                               lastHashed[ldmParams.minMatchLength],
325                                               hashPower);
326         } else {
327             rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
328         }
329         lastHashed = ip;
330 
331         /* Do not insert and do not look for a match */
332         if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
333                 ldmTagMask) {
334            ip++;
335            continue;
336         }
337 
338         /* Get the best entry and compute the match lengths */
339         {
340             ldmEntry_t* const bucket =
341                 ZSTD_ldm_getBucket(ldmState,
342                                    ZSTD_ldm_getSmallHash(rollingHash, hBits),
343                                    ldmParams);
344             ldmEntry_t* cur;
345             size_t bestMatchLength = 0;
346             U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
347 
348             for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
349                 const BYTE* const pMatch = cur->offset + base;
350                 size_t curForwardMatchLength, curBackwardMatchLength,
351                        curTotalMatchLength;
352                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
353                     continue;
354                 }
355 
356                 curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
357                 if (curForwardMatchLength < ldmParams.minMatchLength) {
358                     continue;
359                 }
360                 curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
361                                              ip, anchor, pMatch, lowest);
362                 curTotalMatchLength = curForwardMatchLength +
363                                       curBackwardMatchLength;
364 
365                 if (curTotalMatchLength > bestMatchLength) {
366                     bestMatchLength = curTotalMatchLength;
367                     forwardMatchLength = curForwardMatchLength;
368                     backwardMatchLength = curBackwardMatchLength;
369                     bestEntry = cur;
370                 }
371             }
372         }
373 
374         /* No match found -- continue searching */
375         if (bestEntry == NULL) {
376             ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
377                                              hBits, current,
378                                              ldmParams);
379             ip++;
380             continue;
381         }
382 
383         /* Match found */
384         mLength = forwardMatchLength + backwardMatchLength;
385         ip -= backwardMatchLength;
386 
387         /* Call the block compressor on the remaining literals */
388         {
389             U32 const matchIndex = bestEntry->offset;
390             const BYTE* const match = base + matchIndex - backwardMatchLength;
391             U32 const offset = (U32)(ip - match);
392 
393             /* Overwrite rep codes */
394             for (i = 0; i < ZSTD_REP_NUM; i++)
395                 seqStorePtr->rep[i] = repToConfirm[i];
396 
397             /* Fill tables for block compressor */
398             ZSTD_ldm_limitTableUpdate(cctx, anchor);
399             ZSTD_ldm_fillFastTables(cctx, anchor);
400 
401             /* Call block compressor and get remaining literals */
402             lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
403             cctx->nextToUpdate = (U32)(ip - base);
404 
405             /* Update repToConfirm with the new offset */
406             for (i = ZSTD_REP_NUM - 1; i > 0; i--)
407                 repToConfirm[i] = repToConfirm[i-1];
408             repToConfirm[0] = offset;
409 
410             /* Store the sequence with the leftover literals */
411             ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
412                           offset + ZSTD_REP_MOVE, mLength - MINMATCH);
413         }
414 
415         /* Insert the current entry into the hash table */
416         ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
417                                          (U32)(lastHashed - base),
418                                          ldmParams);
419 
420         assert(ip + backwardMatchLength == lastHashed);
421 
422         /* Fill the hash table from lastHashed+1 to ip+mLength*/
423         /* Heuristic: don't need to fill the entire table at end of block */
424         if (ip + mLength < ilimit) {
425             rollingHash = ZSTD_ldm_fillLdmHashTable(
426                               ldmState, rollingHash, lastHashed,
427                               ip + mLength, base, hBits, ldmParams);
428             lastHashed = ip + mLength - 1;
429         }
430         ip += mLength;
431         anchor = ip;
432         /* Check immediate repcode */
433         while ( (ip < ilimit)
434              && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
435              && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
436 
437             size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
438                                               iend) + 4;
439             /* Swap repToConfirm[1] <=> repToConfirm[0] */
440             {
441                 U32 const tmpOff = repToConfirm[1];
442                 repToConfirm[1] = repToConfirm[0];
443                 repToConfirm[0] = tmpOff;
444             }
445 
446             ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
447 
448             /* Fill the  hash table from lastHashed+1 to ip+rLength*/
449             if (ip + rLength < ilimit) {
450                 rollingHash = ZSTD_ldm_fillLdmHashTable(
451                                 ldmState, rollingHash, lastHashed,
452                                 ip + rLength, base, hBits, ldmParams);
453                 lastHashed = ip + rLength - 1;
454             }
455             ip += rLength;
456             anchor = ip;
457         }
458     }
459 
460     /* Overwrite rep */
461     for (i = 0; i < ZSTD_REP_NUM; i++)
462         seqStorePtr->rep[i] = repToConfirm[i];
463 
464     ZSTD_ldm_limitTableUpdate(cctx, anchor);
465     ZSTD_ldm_fillFastTables(cctx, anchor);
466 
467     lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
468     cctx->nextToUpdate = (U32)(iend - base);
469 
470     /* Restore seqStorePtr->rep */
471     for (i = 0; i < ZSTD_REP_NUM; i++)
472         seqStorePtr->rep[i] = savedRep[i];
473 
474     /* Return the last literals size */
475     return lastLiterals;
476 }
477 
478 size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
479                               const void* src, size_t srcSize)
480 {
481     return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
482 }
483 
484 static size_t ZSTD_compressBlock_ldm_extDict_generic(
485                                  ZSTD_CCtx* ctx,
486                                  const void* src, size_t srcSize)
487 {
488     ldmState_t* const ldmState = &(ctx->ldmState);
489     const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
490     const U64 hashPower = ldmState->hashPower;
491     const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
492     const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
493     const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
494     seqStore_t* const seqStorePtr = &(ctx->seqStore);
495     const BYTE* const base = ctx->base;
496     const BYTE* const dictBase = ctx->dictBase;
497     const BYTE* const istart = (const BYTE*)src;
498     const BYTE* ip = istart;
499     const BYTE* anchor = istart;
500     const U32   lowestIndex = ctx->lowLimit;
501     const BYTE* const dictStart = dictBase + lowestIndex;
502     const U32   dictLimit = ctx->dictLimit;
503     const BYTE* const lowPrefixPtr = base + dictLimit;
504     const BYTE* const dictEnd = dictBase + dictLimit;
505     const BYTE* const iend = istart + srcSize;
506     const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
507 
508     const ZSTD_blockCompressor blockCompressor =
509         ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
510     U32* const repToConfirm = seqStorePtr->repToConfirm;
511     U32 savedRep[ZSTD_REP_NUM];
512     U64 rollingHash = 0;
513     const BYTE* lastHashed = NULL;
514     size_t i, lastLiterals;
515 
516     /* Save seqStorePtr->rep and copy repToConfirm */
517     for (i = 0; i < ZSTD_REP_NUM; i++) {
518         savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
519     }
520 
521     /* Search Loop */
522     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
523         size_t mLength;
524         const U32 current = (U32)(ip-base);
525         size_t forwardMatchLength = 0, backwardMatchLength = 0;
526         ldmEntry_t* bestEntry = NULL;
527         if (ip != istart) {
528           rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
529                                        lastHashed[ldmParams.minMatchLength],
530                                        hashPower);
531         } else {
532             rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
533         }
534         lastHashed = ip;
535 
536         if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
537                 ldmTagMask) {
538             /* Don't insert and don't look for a match */
539            ip++;
540            continue;
541         }
542 
543         /* Get the best entry and compute the match lengths */
544         {
545             ldmEntry_t* const bucket =
546                 ZSTD_ldm_getBucket(ldmState,
547                                    ZSTD_ldm_getSmallHash(rollingHash, hBits),
548                                    ldmParams);
549             ldmEntry_t* cur;
550             size_t bestMatchLength = 0;
551             U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
552 
553             for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
554                 const BYTE* const curMatchBase =
555                     cur->offset < dictLimit ? dictBase : base;
556                 const BYTE* const pMatch = curMatchBase + cur->offset;
557                 const BYTE* const matchEnd =
558                     cur->offset < dictLimit ? dictEnd : iend;
559                 const BYTE* const lowMatchPtr =
560                     cur->offset < dictLimit ? dictStart : lowPrefixPtr;
561                 size_t curForwardMatchLength, curBackwardMatchLength,
562                        curTotalMatchLength;
563 
564                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
565                     continue;
566                 }
567 
568                 curForwardMatchLength = ZSTD_count_2segments(
569                                             ip, pMatch, iend,
570                                             matchEnd, lowPrefixPtr);
571                 if (curForwardMatchLength < ldmParams.minMatchLength) {
572                     continue;
573                 }
574                 curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
575                                              ip, anchor, pMatch, lowMatchPtr);
576                 curTotalMatchLength = curForwardMatchLength +
577                                       curBackwardMatchLength;
578 
579                 if (curTotalMatchLength > bestMatchLength) {
580                     bestMatchLength = curTotalMatchLength;
581                     forwardMatchLength = curForwardMatchLength;
582                     backwardMatchLength = curBackwardMatchLength;
583                     bestEntry = cur;
584                 }
585             }
586         }
587 
588         /* No match found -- continue searching */
589         if (bestEntry == NULL) {
590             ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
591                                              (U32)(lastHashed - base),
592                                              ldmParams);
593             ip++;
594             continue;
595         }
596 
597         /* Match found */
598         mLength = forwardMatchLength + backwardMatchLength;
599         ip -= backwardMatchLength;
600 
601         /* Call the block compressor on the remaining literals */
602         {
603             /* ip = current - backwardMatchLength
604              * The match is at (bestEntry->offset - backwardMatchLength) */
605             U32 const matchIndex = bestEntry->offset;
606             U32 const offset = current - matchIndex;
607 
608             /* Overwrite rep codes */
609             for (i = 0; i < ZSTD_REP_NUM; i++)
610                 seqStorePtr->rep[i] = repToConfirm[i];
611 
612             /* Fill the hash table for the block compressor */
613             ZSTD_ldm_limitTableUpdate(ctx, anchor);
614             ZSTD_ldm_fillFastTables(ctx, anchor);
615 
616             /* Call block compressor and get remaining literals  */
617             lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
618             ctx->nextToUpdate = (U32)(ip - base);
619 
620             /* Update repToConfirm with the new offset */
621             for (i = ZSTD_REP_NUM - 1; i > 0; i--)
622                 repToConfirm[i] = repToConfirm[i-1];
623             repToConfirm[0] = offset;
624 
625             /* Store the sequence with the leftover literals */
626             ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
627                           offset + ZSTD_REP_MOVE, mLength - MINMATCH);
628         }
629 
630         /* Insert the current entry into the hash table */
631         ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
632                                          (U32)(lastHashed - base),
633                                          ldmParams);
634 
635         /* Fill the hash table from lastHashed+1 to ip+mLength */
636         assert(ip + backwardMatchLength == lastHashed);
637         if (ip + mLength < ilimit) {
638             rollingHash = ZSTD_ldm_fillLdmHashTable(
639                               ldmState, rollingHash, lastHashed,
640                               ip + mLength, base, hBits,
641                               ldmParams);
642             lastHashed = ip + mLength - 1;
643         }
644         ip += mLength;
645         anchor = ip;
646 
647         /* check immediate repcode */
648         while (ip < ilimit) {
649             U32 const current2 = (U32)(ip-base);
650             U32 const repIndex2 = current2 - repToConfirm[1];
651             const BYTE* repMatch2 = repIndex2 < dictLimit ?
652                                     dictBase + repIndex2 : base + repIndex2;
653             if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
654                         (repIndex2 > lowestIndex))  /* intentional overflow */
655                && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
656                 const BYTE* const repEnd2 = repIndex2 < dictLimit ?
657                                             dictEnd : iend;
658                 size_t const repLength2 =
659                         ZSTD_count_2segments(ip+4, repMatch2+4, iend,
660                                              repEnd2, lowPrefixPtr) + 4;
661 
662                 U32 tmpOffset = repToConfirm[1];
663                 repToConfirm[1] = repToConfirm[0];
664                 repToConfirm[0] = tmpOffset;
665 
666                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
667 
668                 /* Fill the  hash table from lastHashed+1 to ip+repLength2*/
669                 if (ip + repLength2 < ilimit) {
670                     rollingHash = ZSTD_ldm_fillLdmHashTable(
671                                       ldmState, rollingHash, lastHashed,
672                                       ip + repLength2, base, hBits,
673                                       ldmParams);
674                     lastHashed = ip + repLength2 - 1;
675                 }
676                 ip += repLength2;
677                 anchor = ip;
678                 continue;
679             }
680             break;
681         }
682     }
683 
684     /* Overwrite rep */
685     for (i = 0; i < ZSTD_REP_NUM; i++)
686         seqStorePtr->rep[i] = repToConfirm[i];
687 
688     ZSTD_ldm_limitTableUpdate(ctx, anchor);
689     ZSTD_ldm_fillFastTables(ctx, anchor);
690 
691     /* Call the block compressor one last time on the last literals */
692     lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
693     ctx->nextToUpdate = (U32)(iend - base);
694 
695     /* Restore seqStorePtr->rep */
696     for (i = 0; i < ZSTD_REP_NUM; i++)
697         seqStorePtr->rep[i] = savedRep[i];
698 
699     /* Return the last literals size */
700     return lastLiterals;
701 }
702 
703 size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
704                                       const void* src, size_t srcSize)
705 {
706     return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
707 }
708