xref: /freebsd/sys/contrib/zstd/lib/compress/zstd_opt.c (revision 19fcbaf1)
1 /*
2  * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #include "zstd_compress_internal.h"
12 #include "zstd_opt.h"
13 
14 
15 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
16 #define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
17 #define ZSTD_MAX_PRICE     (1<<30)
18 
19 
20 /*-*************************************
21 *  Price functions for optimal parser
22 ***************************************/
23 static void ZSTD_setLog2Prices(optState_t* optPtr)
24 {
25     optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
26     optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
27     optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
28     optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
29 }
30 
31 
32 static void ZSTD_rescaleFreqs(optState_t* const optPtr,
33                               const BYTE* const src, size_t const srcSize)
34 {
35     optPtr->staticPrices = 0;
36 
37     if (optPtr->litLengthSum == 0) {  /* first init */
38         unsigned u;
39         if (srcSize <= 1024) optPtr->staticPrices = 1;
40 
41         assert(optPtr->litFreq!=NULL);
42         for (u=0; u<=MaxLit; u++)
43             optPtr->litFreq[u] = 0;
44         for (u=0; u<srcSize; u++)
45             optPtr->litFreq[src[u]]++;
46         optPtr->litSum = 0;
47         for (u=0; u<=MaxLit; u++) {
48             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
49             optPtr->litSum += optPtr->litFreq[u];
50         }
51 
52         for (u=0; u<=MaxLL; u++)
53             optPtr->litLengthFreq[u] = 1;
54         optPtr->litLengthSum = MaxLL+1;
55         for (u=0; u<=MaxML; u++)
56             optPtr->matchLengthFreq[u] = 1;
57         optPtr->matchLengthSum = MaxML+1;
58         for (u=0; u<=MaxOff; u++)
59             optPtr->offCodeFreq[u] = 1;
60         optPtr->offCodeSum = (MaxOff+1);
61 
62     } else {
63         unsigned u;
64 
65         optPtr->litSum = 0;
66         for (u=0; u<=MaxLit; u++) {
67             optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
68             optPtr->litSum += optPtr->litFreq[u];
69         }
70         optPtr->litLengthSum = 0;
71         for (u=0; u<=MaxLL; u++) {
72             optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
73             optPtr->litLengthSum += optPtr->litLengthFreq[u];
74         }
75         optPtr->matchLengthSum = 0;
76         for (u=0; u<=MaxML; u++) {
77             optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
78             optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
79         }
80         optPtr->offCodeSum = 0;
81         for (u=0; u<=MaxOff; u++) {
82             optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
83             optPtr->offCodeSum += optPtr->offCodeFreq[u];
84         }
85     }
86 
87     ZSTD_setLog2Prices(optPtr);
88 }
89 
90 
91 /* ZSTD_rawLiteralsCost() :
92  * cost of literals (only) in given segment (which length can be null)
93  * does not include cost of literalLength symbol */
94 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
95                                 const optState_t* const optPtr)
96 {
97     if (optPtr->staticPrices) return (litLength*6);  /* 6 bit per literal - no statistic used */
98     if (litLength == 0) return 0;
99 
100     /* literals */
101     {   U32 u;
102         U32 cost = litLength * optPtr->log2litSum;
103         for (u=0; u < litLength; u++)
104             cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
105         return cost;
106     }
107 }
108 
109 /* ZSTD_litLengthPrice() :
110  * cost of literalLength symbol */
111 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
112 {
113     if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
114 
115     /* literal Length */
116     {   U32 const llCode = ZSTD_LLcode(litLength);
117         U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
118         return price;
119     }
120 }
121 
122 /* ZSTD_litLengthPrice() :
123  * cost of the literal part of a sequence,
124  * including literals themselves, and literalLength symbol */
125 static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
126                                  const optState_t* const optPtr)
127 {
128     return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
129          + ZSTD_litLengthPrice(litLength, optPtr);
130 }
131 
132 /* ZSTD_litLengthContribution() :
133  * @return ( cost(litlength) - cost(0) )
134  * this value can then be added to rawLiteralsCost()
135  * to provide a cost which is directly comparable to a match ending at same position */
136 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
137 {
138     if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
139 
140     /* literal Length */
141     {   U32 const llCode = ZSTD_LLcode(litLength);
142         int const contribution = LL_bits[llCode]
143                         + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
144                         - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
145 #if 1
146         return contribution;
147 #else
148         return MAX(0, contribution); /* sometimes better, sometimes not ... */
149 #endif
150     }
151 }
152 
153 /* ZSTD_literalsContribution() :
154  * creates a fake cost for the literals part of a sequence
155  * which can be compared to the ending cost of a match
156  * should a new match start at this position */
157 static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
158                                      const optState_t* const optPtr)
159 {
160     int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
161                            + ZSTD_litLengthContribution(litLength, optPtr);
162     return contribution;
163 }
164 
165 /* ZSTD_getMatchPrice() :
166  * Provides the cost of the match part (offset + matchLength) of a sequence
167  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
168  * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
169 FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
170                                     U32 const offset, U32 const matchLength,
171                                     const optState_t* const optPtr,
172                                     int const optLevel)
173 {
174     U32 price;
175     U32 const offCode = ZSTD_highbit32(offset+1);
176     U32 const mlBase = matchLength - MINMATCH;
177     assert(matchLength >= MINMATCH);
178 
179     if (optPtr->staticPrices)  /* fixed scheme, do not use statistics */
180         return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
181 
182     price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
183     if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
184 
185     /* match Length */
186     {   U32 const mlCode = ZSTD_MLcode(mlBase);
187         price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
188     }
189 
190     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
191     return price;
192 }
193 
194 static void ZSTD_updateStats(optState_t* const optPtr,
195                              U32 litLength, const BYTE* literals,
196                              U32 offsetCode, U32 matchLength)
197 {
198     /* literals */
199     {   U32 u;
200         for (u=0; u < litLength; u++)
201             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
202         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
203     }
204 
205     /* literal Length */
206     {   U32 const llCode = ZSTD_LLcode(litLength);
207         optPtr->litLengthFreq[llCode]++;
208         optPtr->litLengthSum++;
209     }
210 
211     /* match offset code (0-2=>repCode; 3+=>offset+2) */
212     {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
213         assert(offCode <= MaxOff);
214         optPtr->offCodeFreq[offCode]++;
215         optPtr->offCodeSum++;
216     }
217 
218     /* match Length */
219     {   U32 const mlBase = matchLength - MINMATCH;
220         U32 const mlCode = ZSTD_MLcode(mlBase);
221         optPtr->matchLengthFreq[mlCode]++;
222         optPtr->matchLengthSum++;
223     }
224 }
225 
226 
227 /* ZSTD_readMINMATCH() :
228  * function safe only for comparisons
229  * assumption : memPtr must be at least 4 bytes before end of buffer */
230 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
231 {
232     switch (length)
233     {
234     default :
235     case 4 : return MEM_read32(memPtr);
236     case 3 : if (MEM_isLittleEndian())
237                 return MEM_read32(memPtr)<<8;
238              else
239                 return MEM_read32(memPtr)>>8;
240     }
241 }
242 
243 
244 /* Update hashTable3 up to ip (excluded)
245    Assumption : always within prefix (i.e. not within extDict) */
246 static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
247 {
248     U32* const hashTable3 = ms->hashTable3;
249     U32 const hashLog3 = ms->hashLog3;
250     const BYTE* const base = ms->window.base;
251     U32 idx = ms->nextToUpdate3;
252     U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
253     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
254     assert(hashLog3 > 0);
255 
256     while(idx < target) {
257         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
258         idx++;
259     }
260 
261     return hashTable3[hash3];
262 }
263 
264 
265 /*-*************************************
266 *  Binary Tree search
267 ***************************************/
268 /** ZSTD_insertBt1() : add one or multiple positions to tree.
269  *  ip : assumed <= iend-8 .
270  * @return : nb of positions added */
271 static U32 ZSTD_insertBt1(
272                 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
273                 const BYTE* const ip, const BYTE* const iend,
274                 U32 const mls, U32 const extDict)
275 {
276     U32*   const hashTable = ms->hashTable;
277     U32    const hashLog = cParams->hashLog;
278     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
279     U32*   const bt = ms->chainTable;
280     U32    const btLog  = cParams->chainLog - 1;
281     U32    const btMask = (1 << btLog) - 1;
282     U32 matchIndex = hashTable[h];
283     size_t commonLengthSmaller=0, commonLengthLarger=0;
284     const BYTE* const base = ms->window.base;
285     const BYTE* const dictBase = ms->window.dictBase;
286     const U32 dictLimit = ms->window.dictLimit;
287     const BYTE* const dictEnd = dictBase + dictLimit;
288     const BYTE* const prefixStart = base + dictLimit;
289     const BYTE* match;
290     const U32 current = (U32)(ip-base);
291     const U32 btLow = btMask >= current ? 0 : current - btMask;
292     U32* smallerPtr = bt + 2*(current&btMask);
293     U32* largerPtr  = smallerPtr + 1;
294     U32 dummy32;   /* to be nullified at the end */
295     U32 const windowLow = ms->window.lowLimit;
296     U32 matchEndIdx = current+8+1;
297     size_t bestLength = 8;
298     U32 nbCompares = 1U << cParams->searchLog;
299 #ifdef ZSTD_C_PREDICT
300     U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
301     U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
302     predictedSmall += (predictedSmall>0);
303     predictedLarge += (predictedLarge>0);
304 #endif /* ZSTD_C_PREDICT */
305 
306     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
307 
308     assert(ip <= iend-8);   /* required for h calculation */
309     hashTable[h] = current;   /* Update Hash Table */
310 
311     while (nbCompares-- && (matchIndex > windowLow)) {
312         U32* const nextPtr = bt + 2*(matchIndex & btMask);
313         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
314         assert(matchIndex < current);
315 
316 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
317         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
318         if (matchIndex == predictedSmall) {
319             /* no need to check length, result known */
320             *smallerPtr = matchIndex;
321             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
322             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
323             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
324             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
325             continue;
326         }
327         if (matchIndex == predictedLarge) {
328             *largerPtr = matchIndex;
329             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
330             largerPtr = nextPtr;
331             matchIndex = nextPtr[0];
332             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
333             continue;
334         }
335 #endif
336 
337         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
338             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if extDict is incorrectly set to 0 */
339             match = base + matchIndex;
340             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
341         } else {
342             match = dictBase + matchIndex;
343             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
344             if (matchIndex+matchLength >= dictLimit)
345                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
346         }
347 
348         if (matchLength > bestLength) {
349             bestLength = matchLength;
350             if (matchLength > matchEndIdx - matchIndex)
351                 matchEndIdx = matchIndex + (U32)matchLength;
352         }
353 
354         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
355             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
356         }
357 
358         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
359             /* match is smaller than current */
360             *smallerPtr = matchIndex;             /* update smaller idx */
361             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
362             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
363             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
364             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
365         } else {
366             /* match is larger than current */
367             *largerPtr = matchIndex;
368             commonLengthLarger = matchLength;
369             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
370             largerPtr = nextPtr;
371             matchIndex = nextPtr[0];
372     }   }
373 
374     *smallerPtr = *largerPtr = 0;
375     if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */
376     assert(matchEndIdx > current + 8);
377     return matchEndIdx - (current + 8);
378 }
379 
380 FORCE_INLINE_TEMPLATE
381 void ZSTD_updateTree_internal(
382                 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
383                 const BYTE* const ip, const BYTE* const iend,
384                 const U32 mls, const U32 extDict)
385 {
386     const BYTE* const base = ms->window.base;
387     U32 const target = (U32)(ip - base);
388     U32 idx = ms->nextToUpdate;
389     DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (extDict:%u)",
390                 idx, target, extDict);
391 
392     while(idx < target)
393         idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
394     ms->nextToUpdate = target;
395 }
396 
397 void ZSTD_updateTree(
398                 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
399                 const BYTE* ip, const BYTE* iend)
400 {
401     ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
402 }
403 
404 FORCE_INLINE_TEMPLATE
405 U32 ZSTD_insertBtAndGetAllMatches (
406                     ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
407                     const BYTE* const ip, const BYTE* const iLimit, int const extDict,
408                     U32 rep[ZSTD_REP_NUM], U32 const ll0,
409                     ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
410 {
411     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
412     const BYTE* const base = ms->window.base;
413     U32 const current = (U32)(ip-base);
414     U32 const hashLog = cParams->hashLog;
415     U32 const minMatch = (mls==3) ? 3 : 4;
416     U32* const hashTable = ms->hashTable;
417     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
418     U32 matchIndex  = hashTable[h];
419     U32* const bt   = ms->chainTable;
420     U32 const btLog = cParams->chainLog - 1;
421     U32 const btMask= (1U << btLog) - 1;
422     size_t commonLengthSmaller=0, commonLengthLarger=0;
423     const BYTE* const dictBase = ms->window.dictBase;
424     U32 const dictLimit = ms->window.dictLimit;
425     const BYTE* const dictEnd = dictBase + dictLimit;
426     const BYTE* const prefixStart = base + dictLimit;
427     U32 const btLow = btMask >= current ? 0 : current - btMask;
428     U32 const windowLow = ms->window.lowLimit;
429     U32* smallerPtr = bt + 2*(current&btMask);
430     U32* largerPtr  = bt + 2*(current&btMask) + 1;
431     U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
432     U32 dummy32;   /* to be nullified at the end */
433     U32 mnum = 0;
434     U32 nbCompares = 1U << cParams->searchLog;
435 
436     size_t bestLength = lengthToBeat-1;
437     DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
438 
439     /* check repCode */
440     {   U32 const lastR = ZSTD_REP_NUM + ll0;
441         U32 repCode;
442         for (repCode = ll0; repCode < lastR; repCode++) {
443             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
444             U32 const repIndex = current - repOffset;
445             U32 repLen = 0;
446             assert(current >= dictLimit);
447             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) {  /* equivalent to `current > repIndex >= dictLimit` */
448                 if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
449                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
450                 }
451             } else {  /* repIndex < dictLimit || repIndex >= current */
452                 const BYTE* const repMatch = dictBase + repIndex;
453                 assert(current >= windowLow);
454                 if ( extDict /* this case only valid in extDict mode */
455                   && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow)  /* equivalent to `current > repIndex >= windowLow` */
456                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
457                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
458                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
459             }   }
460             /* save longer solution */
461             if (repLen > bestLength) {
462                 DEBUGLOG(8, "found rep-match %u of length %u",
463                             repCode - ll0, (U32)repLen);
464                 bestLength = repLen;
465                 matches[mnum].off = repCode - ll0;
466                 matches[mnum].len = (U32)repLen;
467                 mnum++;
468                 if ( (repLen > sufficient_len)
469                    | (ip+repLen == iLimit) ) {  /* best possible */
470                     return mnum;
471     }   }   }   }
472 
473     /* HC3 match finder */
474     if ((mls == 3) /*static*/ && (bestLength < mls)) {
475         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
476         if ((matchIndex3 > windowLow)
477           & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
478             size_t mlen;
479             if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
480                 const BYTE* const match = base + matchIndex3;
481                 mlen = ZSTD_count(ip, match, iLimit);
482             } else {
483                 const BYTE* const match = dictBase + matchIndex3;
484                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
485             }
486 
487             /* save best solution */
488             if (mlen >= mls /* == 3 > bestLength */) {
489                 DEBUGLOG(8, "found small match with hlog3, of length %u",
490                             (U32)mlen);
491                 bestLength = mlen;
492                 assert(current > matchIndex3);
493                 assert(mnum==0);  /* no prior solution */
494                 matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
495                 matches[0].len = (U32)mlen;
496                 mnum = 1;
497                 if ( (mlen > sufficient_len) |
498                      (ip+mlen == iLimit) ) {  /* best possible length */
499                     ms->nextToUpdate = current+1;  /* skip insertion */
500                     return 1;
501     }   }   }   }
502 
503     hashTable[h] = current;   /* Update Hash Table */
504 
505     while (nbCompares-- && (matchIndex > windowLow)) {
506         U32* const nextPtr = bt + 2*(matchIndex & btMask);
507         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
508         const BYTE* match;
509         assert(current > matchIndex);
510 
511         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
512             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
513             match = base + matchIndex;
514             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
515         } else {
516             match = dictBase + matchIndex;
517             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
518             if (matchIndex+matchLength >= dictLimit)
519                 match = base + matchIndex;   /* prepare for match[matchLength] */
520         }
521 
522         if (matchLength > bestLength) {
523             DEBUGLOG(8, "found match of length %u at distance %u",
524                     (U32)matchLength, current - matchIndex);
525             assert(matchEndIdx > matchIndex);
526             if (matchLength > matchEndIdx - matchIndex)
527                 matchEndIdx = matchIndex + (U32)matchLength;
528             bestLength = matchLength;
529             matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
530             matches[mnum].len = (U32)matchLength;
531             mnum++;
532             if (matchLength > ZSTD_OPT_NUM) break;
533             if (ip+matchLength == iLimit) {  /* equal : no way to know if inf or sup */
534                 break;   /* drop, to preserve bt consistency (miss a little bit of compression) */
535             }
536         }
537 
538         if (match[matchLength] < ip[matchLength]) {
539             /* match smaller than current */
540             *smallerPtr = matchIndex;             /* update smaller idx */
541             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
542             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
543             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
544             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
545         } else {
546             *largerPtr = matchIndex;
547             commonLengthLarger = matchLength;
548             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
549             largerPtr = nextPtr;
550             matchIndex = nextPtr[0];
551     }   }
552 
553     *smallerPtr = *largerPtr = 0;
554 
555     assert(matchEndIdx > current+8);
556     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
557     return mnum;
558 }
559 
560 
561 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
562                         ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
563                         const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
564                         U32 rep[ZSTD_REP_NUM], U32 const ll0,
565                         ZSTD_match_t* matches, U32 const lengthToBeat)
566 {
567     U32 const matchLengthSearch = cParams->searchLength;
568     DEBUGLOG(7, "ZSTD_BtGetAllMatches");
569     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
570     ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
571     switch(matchLengthSearch)
572     {
573     case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
574     default :
575     case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
576     case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
577     case 7 :
578     case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
579     }
580 }
581 
582 
583 /*-*******************************
584 *  Optimal parser
585 *********************************/
586 typedef struct repcodes_s {
587     U32 rep[3];
588 } repcodes_t;
589 
590 repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
591 {
592     repcodes_t newReps;
593     if (offset >= ZSTD_REP_NUM) {  /* full offset */
594         newReps.rep[2] = rep[1];
595         newReps.rep[1] = rep[0];
596         newReps.rep[0] = offset - ZSTD_REP_MOVE;
597     } else {   /* repcode */
598         U32 const repCode = offset + ll0;
599         if (repCode > 0) {  /* note : if repCode==0, no change */
600             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
601             newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
602             newReps.rep[1] = rep[0];
603             newReps.rep[0] = currentOffset;
604         } else {   /* repCode == 0 */
605             memcpy(&newReps, rep, sizeof(newReps));
606         }
607     }
608     return newReps;
609 }
610 
611 
612 typedef struct {
613     const BYTE* anchor;
614     U32 litlen;
615     U32 rawLitCost;
616 } cachedLiteralPrice_t;
617 
618 static U32 ZSTD_rawLiteralsCost_cached(
619                             cachedLiteralPrice_t* const cachedLitPrice,
620                             const BYTE* const anchor, U32 const litlen,
621                             const optState_t* const optStatePtr)
622 {
623     U32 startCost;
624     U32 remainingLength;
625     const BYTE* startPosition;
626 
627     if (anchor == cachedLitPrice->anchor) {
628         startCost = cachedLitPrice->rawLitCost;
629         startPosition = anchor + cachedLitPrice->litlen;
630         assert(litlen >= cachedLitPrice->litlen);
631         remainingLength = litlen - cachedLitPrice->litlen;
632     } else {
633         startCost = 0;
634         startPosition = anchor;
635         remainingLength = litlen;
636     }
637 
638     {   U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
639         cachedLitPrice->anchor = anchor;
640         cachedLitPrice->litlen = litlen;
641         cachedLitPrice->rawLitCost = rawLitCost;
642         return rawLitCost;
643     }
644 }
645 
646 static U32 ZSTD_fullLiteralsCost_cached(
647                             cachedLiteralPrice_t* const cachedLitPrice,
648                             const BYTE* const anchor, U32 const litlen,
649                             const optState_t* const optStatePtr)
650 {
651     return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
652          + ZSTD_litLengthPrice(litlen, optStatePtr);
653 }
654 
655 static int ZSTD_literalsContribution_cached(
656                             cachedLiteralPrice_t* const cachedLitPrice,
657                             const BYTE* const anchor, U32 const litlen,
658                             const optState_t* const optStatePtr)
659 {
660     int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
661                            + ZSTD_litLengthContribution(litlen, optStatePtr);
662     return contribution;
663 }
664 
665 FORCE_INLINE_TEMPLATE
666 size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
667                                       U32 rep[ZSTD_REP_NUM],
668                                       ZSTD_compressionParameters const* cParams,
669                                       const void* src, size_t srcSize,
670                                       const int optLevel, const int extDict)
671 {
672     optState_t* const optStatePtr = &ms->opt;
673     const BYTE* const istart = (const BYTE*)src;
674     const BYTE* ip = istart;
675     const BYTE* anchor = istart;
676     const BYTE* const iend = istart + srcSize;
677     const BYTE* const ilimit = iend - 8;
678     const BYTE* const base = ms->window.base;
679     const BYTE* const prefixStart = base + ms->window.dictLimit;
680 
681     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
682     U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
683 
684     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
685     ZSTD_match_t* const matches = optStatePtr->matchTable;
686     cachedLiteralPrice_t cachedLitPrice;
687 
688     /* init */
689     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
690     ms->nextToUpdate3 = ms->nextToUpdate;
691     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
692     ip += (ip==prefixStart);
693     memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
694 
695     /* Match Loop */
696     while (ip < ilimit) {
697         U32 cur, last_pos = 0;
698         U32 best_mlen, best_off;
699 
700         /* find first match */
701         {   U32 const litlen = (U32)(ip - anchor);
702             U32 const ll0 = !litlen;
703             U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
704             if (!nbMatches) { ip++; continue; }
705 
706             /* initialize opt[0] */
707             { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
708             opt[0].mlen = 1;
709             opt[0].litlen = litlen;
710 
711             /* large match -> immediate encoding */
712             {   U32 const maxML = matches[nbMatches-1].len;
713                 DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
714                             nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
715 
716                 if (maxML > sufficient_len) {
717                     best_mlen = maxML;
718                     best_off = matches[nbMatches-1].off;
719                     DEBUGLOG(7, "large match (%u>%u), immediate encoding",
720                                 best_mlen, sufficient_len);
721                     cur = 0;
722                     last_pos = 1;
723                     goto _shortestPath;
724             }   }
725 
726             /* set prices for first matches starting position == 0 */
727             {   U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
728                 U32 pos;
729                 U32 matchNb;
730                 for (pos = 0; pos < minMatch; pos++) {
731                     opt[pos].mlen = 1;
732                     opt[pos].price = ZSTD_MAX_PRICE;
733                 }
734                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
735                     U32 const offset = matches[matchNb].off;
736                     U32 const end = matches[matchNb].len;
737                     repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
738                     for ( ; pos <= end ; pos++ ) {
739                         U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
740                         DEBUGLOG(7, "rPos:%u => set initial price : %u",
741                                     pos, matchPrice);
742                         opt[pos].mlen = pos;
743                         opt[pos].off = offset;
744                         opt[pos].litlen = litlen;
745                         opt[pos].price = matchPrice;
746                         memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
747                 }   }
748                 last_pos = pos-1;
749             }
750         }
751 
752         /* check further positions */
753         for (cur = 1; cur <= last_pos; cur++) {
754             const BYTE* const inr = ip + cur;
755             assert(cur < ZSTD_OPT_NUM);
756 
757             /* Fix current position with one literal if cheaper */
758             {   U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
759                 int price;  /* note : contribution can be negative */
760                 if (cur > litlen) {
761                     price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
762                 } else {
763                     price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
764                 }
765                 assert(price < 1000000000); /* overflow check */
766                 if (price <= opt[cur].price) {
767                     DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
768                                 cur, price, opt[cur].price);
769                     opt[cur].mlen = 1;
770                     opt[cur].off = 0;
771                     opt[cur].litlen = litlen;
772                     opt[cur].price = price;
773                     memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
774             }   }
775 
776             /* last match must start at a minimum distance of 8 from oend */
777             if (inr > ilimit) continue;
778 
779             if (cur == last_pos) break;
780 
781              if ( (optLevel==0) /*static*/
782                && (opt[cur+1].price <= opt[cur].price) )
783                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
784 
785             {   U32 const ll0 = (opt[cur].mlen != 1);
786                 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
787                 U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
788                 U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
789                 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
790                 U32 matchNb;
791                 if (!nbMatches) continue;
792 
793                 {   U32 const maxML = matches[nbMatches-1].len;
794                     DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
795                                 cur, nbMatches, maxML);
796 
797                     if ( (maxML > sufficient_len)
798                        | (cur + maxML >= ZSTD_OPT_NUM) ) {
799                         best_mlen = maxML;
800                         best_off = matches[nbMatches-1].off;
801                         last_pos = cur + 1;
802                         goto _shortestPath;
803                     }
804                 }
805 
806                 /* set prices using matches found at position == cur */
807                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
808                     U32 const offset = matches[matchNb].off;
809                     repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
810                     U32 const lastML = matches[matchNb].len;
811                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
812                     U32 mlen;
813 
814                     DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
815                                 matchNb, matches[matchNb].off, lastML, litlen);
816 
817                     for (mlen = lastML; mlen >= startML; mlen--) {
818                         U32 const pos = cur + mlen;
819                         int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
820 
821                         if ((pos > last_pos) || (price < opt[pos].price)) {
822                             DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
823                                         pos, price, opt[pos].price);
824                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
825                             opt[pos].mlen = mlen;
826                             opt[pos].off = offset;
827                             opt[pos].litlen = litlen;
828                             opt[pos].price = price;
829                             memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
830                         } else {
831                             if (optLevel==0) break;  /* gets ~+10% speed for about -0.01 ratio loss */
832                         }
833             }   }   }
834         }  /* for (cur = 1; cur <= last_pos; cur++) */
835 
836         best_mlen = opt[last_pos].mlen;
837         best_off = opt[last_pos].off;
838         cur = last_pos - best_mlen;
839 
840 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
841         assert(opt[0].mlen == 1);
842 
843         /* reverse traversal */
844         DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
845                     last_pos, cur);
846         {   U32 selectedMatchLength = best_mlen;
847             U32 selectedOffset = best_off;
848             U32 pos = cur;
849             while (1) {
850                 U32 const mlen = opt[pos].mlen;
851                 U32 const off = opt[pos].off;
852                 opt[pos].mlen = selectedMatchLength;
853                 opt[pos].off = selectedOffset;
854                 selectedMatchLength = mlen;
855                 selectedOffset = off;
856                 if (mlen > pos) break;
857                 pos -= mlen;
858         }   }
859 
860         /* save sequences */
861         {   U32 pos;
862             for (pos=0; pos < last_pos; ) {
863                 U32 const llen = (U32)(ip - anchor);
864                 U32 const mlen = opt[pos].mlen;
865                 U32 const offset = opt[pos].off;
866                 if (mlen == 1) { ip++; pos++; continue; }  /* literal position => move on */
867                 pos += mlen; ip += mlen;
868 
869                 /* repcodes update : like ZSTD_updateRep(), but update in place */
870                 if (offset >= ZSTD_REP_NUM) {  /* full offset */
871                     rep[2] = rep[1];
872                     rep[1] = rep[0];
873                     rep[0] = offset - ZSTD_REP_MOVE;
874                 } else {   /* repcode */
875                     U32 const repCode = offset + (llen==0);
876                     if (repCode) {  /* note : if repCode==0, no change */
877                         U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
878                         if (repCode >= 2) rep[2] = rep[1];
879                         rep[1] = rep[0];
880                         rep[0] = currentOffset;
881                     }
882                 }
883 
884                 ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
885                 ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
886                 anchor = ip;
887         }   }
888         ZSTD_setLog2Prices(optStatePtr);
889     }   /* while (ip < ilimit) */
890 
891     /* Return the last literals size */
892     return iend - anchor;
893 }
894 
895 
896 size_t ZSTD_compressBlock_btopt(
897         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
898         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
899 {
900     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
901     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
902 }
903 
904 size_t ZSTD_compressBlock_btultra(
905         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
906         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
907 {
908     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
909 }
910 
911 size_t ZSTD_compressBlock_btopt_extDict(
912         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
913         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
914 {
915     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
916 }
917 
918 size_t ZSTD_compressBlock_btultra_extDict(
919         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
920         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
921 {
922     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
923 }
924