1 /**
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree. An additional grant
7  * of patent rights can be found in the PATENTS file in the same directory.
8  */
9 
10 
11 
12 /*-*******************************************************
13 *  Compiler specifics
14 *********************************************************/
15 #ifdef _MSC_VER    /* Visual Studio */
16 #  define FORCE_INLINE static __forceinline
17 #  include <intrin.h>                    /* For Visual 2005 */
18 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
19 #else
20 #  ifdef __GNUC__
21 #    define FORCE_INLINE static inline __attribute__((always_inline))
22 #  else
23 #    define FORCE_INLINE static inline
24 #  endif
25 #endif
26 
27 
28 /*-*************************************
29 *  Dependencies
30 ***************************************/
31 #include <string.h>         /* memset */
32 #include "mem.h"
33 #define XXH_STATIC_LINKING_ONLY   /* XXH64_state_t */
34 #include "xxhash.h"         /* XXH_reset, update, digest */
35 #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
36 #include "fse.h"
37 #define HUF_STATIC_LINKING_ONLY
38 #include "huf.h"
39 #include "zstd_internal.h"  /* includes zstd.h */
40 
41 
42 /*-*************************************
43 *  Constants
44 ***************************************/
45 static const U32 g_searchStrength = 8;   /* control skip over incompressible data */
46 #define HASH_READ_SIZE 8
47 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
48 
49 
50 /*-*************************************
51 *  Helper functions
52 ***************************************/
ZSTD_compressBound(size_t srcSize)53 size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
54 
55 
56 /*-*************************************
57 *  Sequence storage
58 ***************************************/
ZSTD_resetSeqStore(seqStore_t * ssPtr)59 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
60 {
61     ssPtr->lit = ssPtr->litStart;
62     ssPtr->sequences = ssPtr->sequencesStart;
63     ssPtr->longLengthID = 0;
64 }
65 
66 
67 /*-*************************************
68 *  Context memory management
69 ***************************************/
70 struct ZSTD_CCtx_s
71 {
72     const BYTE* nextSrc;    /* next block here to continue on current prefix */
73     const BYTE* base;       /* All regular indexes relative to this position */
74     const BYTE* dictBase;   /* extDict indexes relative to this position */
75     U32   dictLimit;        /* below that point, need extDict */
76     U32   lowLimit;         /* below that point, no more data */
77     U32   nextToUpdate;     /* index from which to continue dictionary update */
78     U32   nextToUpdate3;    /* index from which to continue dictionary update */
79     U32   hashLog3;         /* dispatch table : larger == faster, more memory */
80     U32   loadedDictEnd;
81     ZSTD_compressionStage_e stage;
82     U32   rep[ZSTD_REP_NUM];
83     U32   savedRep[ZSTD_REP_NUM];
84     U32   dictID;
85     ZSTD_parameters params;
86     void* workSpace;
87     size_t workSpaceSize;
88     size_t blockSize;
89     U64 frameContentSize;
90     XXH64_state_t xxhState;
91     ZSTD_customMem customMem;
92 
93     seqStore_t seqStore;    /* sequences storage ptrs */
94     U32* hashTable;
95     U32* hashTable3;
96     U32* chainTable;
97     HUF_CElt* hufTable;
98     U32 flagStaticTables;
99     FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
100     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
101     FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
102 };
103 
ZSTD_createCCtx(void)104 ZSTD_CCtx* ZSTD_createCCtx(void)
105 {
106     return ZSTD_createCCtx_advanced(defaultCustomMem);
107 }
108 
ZSTD_createCCtx_advanced(ZSTD_customMem customMem)109 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
110 {
111     ZSTD_CCtx* cctx;
112 
113     if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
114     if (!customMem.customAlloc || !customMem.customFree) return NULL;
115 
116     cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
117     if (!cctx) return NULL;
118     memset(cctx, 0, sizeof(ZSTD_CCtx));
119     memcpy(&(cctx->customMem), &customMem, sizeof(ZSTD_customMem));
120     return cctx;
121 }
122 
ZSTD_freeCCtx(ZSTD_CCtx * cctx)123 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
124 {
125     if (cctx==NULL) return 0;   /* support free on NULL */
126     ZSTD_free(cctx->workSpace, cctx->customMem);
127     ZSTD_free(cctx, cctx->customMem);
128     return 0;   /* reserved as a potential error code in the future */
129 }
130 
ZSTD_sizeof_CCtx(const ZSTD_CCtx * cctx)131 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
132 {
133     return sizeof(*cctx) + cctx->workSpaceSize;
134 }
135 
ZSTD_getSeqStore(const ZSTD_CCtx * ctx)136 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx)   /* hidden interface */
137 {
138     return &(ctx->seqStore);
139 }
140 
141 
142 #define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; }
143 #define CLAMPCHECK(val,min,max) { if ((val<min) || (val>max)) return ERROR(compressionParameter_unsupported); }
144 
145 /** ZSTD_checkParams() :
146     ensure param values remain within authorized range.
147     @return : 0, or an error code if one value is beyond authorized range */
ZSTD_checkCParams(ZSTD_compressionParameters cParams)148 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
149 {
150     CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
151     CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
152     CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
153     CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
154     { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
155       U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
156       CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
157     CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
158     if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported);
159     return 0;
160 }
161 
162 
163 /** ZSTD_checkCParams_advanced() :
164     temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */
ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams,U64 srcSize)165 size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize)
166 {
167     if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams);
168     if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported);
169     if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */
170     if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN;    /* fake value - temporary work around */
171     if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN;       /* fake value - temporary work around */
172     return ZSTD_checkCParams(cParams);
173 }
174 
175 
176 /** ZSTD_adjustCParams() :
177     optimize cPar for a given input (`srcSize` and `dictSize`).
178     mostly downsizing to reduce memory consumption and initialization.
179     Both `srcSize` and `dictSize` are optional (use 0 if unknown),
180     but if both are 0, no optimization can be done.
181     Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
ZSTD_adjustCParams(ZSTD_compressionParameters cPar,unsigned long long srcSize,size_t dictSize)182 ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
183 {
184     if (srcSize+dictSize == 0) return cPar;   /* no size information available : no adjustment */
185 
186     /* resize params, to use less memory when necessary */
187     {   U32 const minSrcSize = (srcSize==0) ? 500 : 0;
188         U64 const rSize = srcSize + dictSize + minSrcSize;
189         if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
190             U32 const srcLog = ZSTD_highbit32((U32)(rSize)-1) + 1;
191             if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
192     }   }
193     if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
194     {   U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) || (cPar.strategy == ZSTD_btopt);
195         U32 const maxChainLog = cPar.windowLog+btPlus;
196         if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; }   /* <= ZSTD_CHAINLOG_MAX */
197 
198     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
199     if ((cPar.hashLog  < ZSTD_HASHLOG_MIN) && ( (U32)cPar.strategy >= (U32)ZSTD_btlazy2)) cPar.hashLog = ZSTD_HASHLOG_MIN;  /* required to ensure collision resistance in bt */
200 
201     return cPar;
202 }
203 
204 
ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)205 size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
206 {
207     size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
208     U32    const divider = (cParams.searchLength==3) ? 3 : 4;
209     size_t const maxNbSeq = blockSize / divider;
210     size_t const tokenSpace = blockSize + 11*maxNbSeq;
211 
212     size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
213     size_t const hSize = ((size_t)1) << cParams.hashLog;
214     U32    const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
215     size_t const h3Size = ((size_t)1) << hashLog3;
216     size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
217 
218     size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
219                           + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
220     size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
221                              + ((cParams.strategy == ZSTD_btopt) ? optSpace : 0);
222 
223     return sizeof(ZSTD_CCtx) + neededSpace;
224 }
225 
226 /*! ZSTD_resetCCtx_advanced() :
227     note : 'params' is expected to be validated */
ZSTD_resetCCtx_advanced(ZSTD_CCtx * zc,ZSTD_parameters params,U64 frameContentSize,U32 reset)228 static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
229                                        ZSTD_parameters params, U64 frameContentSize,
230                                        U32 reset)
231 {   /* note : params considered validated here */
232     size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
233     U32    const divider = (params.cParams.searchLength==3) ? 3 : 4;
234     size_t const maxNbSeq = blockSize / divider;
235     size_t const tokenSpace = blockSize + 11*maxNbSeq;
236     size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
237     size_t const hSize = ((size_t)1) << params.cParams.hashLog;
238     U32    const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
239     size_t const h3Size = ((size_t)1) << hashLog3;
240     size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
241     void* ptr;
242 
243     /* Check if workSpace is large enough, alloc a new one if needed */
244     {   size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
245                               + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
246         size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
247                               + ((params.cParams.strategy == ZSTD_btopt) ? optSpace : 0);
248         if (zc->workSpaceSize < neededSpace) {
249             ZSTD_free(zc->workSpace, zc->customMem);
250             zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
251             if (zc->workSpace == NULL) return ERROR(memory_allocation);
252             zc->workSpaceSize = neededSpace;
253     }   }
254 
255     if (reset) memset(zc->workSpace, 0, tableSpace );   /* reset only tables */
256     XXH64_reset(&zc->xxhState, 0);
257     zc->hashLog3 = hashLog3;
258     zc->hashTable = (U32*)(zc->workSpace);
259     zc->chainTable = zc->hashTable + hSize;
260     zc->hashTable3 = zc->chainTable + chainSize;
261     ptr = zc->hashTable3 + h3Size;
262     zc->hufTable = (HUF_CElt*)ptr;
263     zc->flagStaticTables = 0;
264     ptr = ((U32*)ptr) + 256;  /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
265 
266     zc->nextToUpdate = 1;
267     zc->nextSrc = NULL;
268     zc->base = NULL;
269     zc->dictBase = NULL;
270     zc->dictLimit = 0;
271     zc->lowLimit = 0;
272     zc->params = params;
273     zc->blockSize = blockSize;
274     zc->frameContentSize = frameContentSize;
275     { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
276 
277     if (params.cParams.strategy == ZSTD_btopt) {
278         zc->seqStore.litFreq = (U32*)ptr;
279         zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
280         zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
281         zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
282         ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
283         zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
284         ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
285         zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
286         ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
287         zc->seqStore.litLengthSum = 0;
288     }
289     zc->seqStore.sequencesStart = (seqDef*)ptr;
290     ptr = zc->seqStore.sequencesStart + maxNbSeq;
291     zc->seqStore.llCode = (BYTE*) ptr;
292     zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
293     zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
294     zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
295 
296     zc->stage = ZSTDcs_init;
297     zc->dictID = 0;
298     zc->loadedDictEnd = 0;
299 
300     return 0;
301 }
302 
303 
304 /*! ZSTD_copyCCtx() :
305 *   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
306 *   Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
307 *   @return : 0, or an error code */
ZSTD_copyCCtx(ZSTD_CCtx * dstCCtx,const ZSTD_CCtx * srcCCtx)308 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx)
309 {
310     if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
311 
312     memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
313     ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, srcCCtx->frameContentSize, 0);
314     dstCCtx->params.fParams.contentSizeFlag = 0;   /* content size different from the one set during srcCCtx init */
315 
316     /* copy tables */
317     {   size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
318         size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
319         size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
320         size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
321         memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
322     }
323 
324     /* copy dictionary offsets */
325     dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
326     dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
327     dstCCtx->nextSrc      = srcCCtx->nextSrc;
328     dstCCtx->base         = srcCCtx->base;
329     dstCCtx->dictBase     = srcCCtx->dictBase;
330     dstCCtx->dictLimit    = srcCCtx->dictLimit;
331     dstCCtx->lowLimit     = srcCCtx->lowLimit;
332     dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
333     dstCCtx->dictID       = srcCCtx->dictID;
334 
335     /* copy entropy tables */
336     dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
337     if (srcCCtx->flagStaticTables) {
338         memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
339         memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
340         memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
341         memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
342     }
343 
344     return 0;
345 }
346 
347 
348 /*! ZSTD_reduceTable() :
349 *   reduce table indexes by `reducerValue` */
ZSTD_reduceTable(U32 * const table,U32 const size,U32 const reducerValue)350 static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
351 {
352     U32 u;
353     for (u=0 ; u < size ; u++) {
354         if (table[u] < reducerValue) table[u] = 0;
355         else table[u] -= reducerValue;
356     }
357 }
358 
359 /*! ZSTD_reduceIndex() :
360 *   rescale all indexes to avoid future overflow (indexes are U32) */
ZSTD_reduceIndex(ZSTD_CCtx * zc,const U32 reducerValue)361 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
362 {
363     { U32 const hSize = 1 << zc->params.cParams.hashLog;
364       ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
365 
366     { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
367       ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
368 
369     { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
370       ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
371 }
372 
373 
374 /*-*******************************************************
375 *  Block entropic compression
376 *********************************************************/
377 
378 /* See zstd_compression_format.md for detailed format description */
379 
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize)380 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
381 {
382     if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
383     memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
384     MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
385     return ZSTD_blockHeaderSize+srcSize;
386 }
387 
388 
ZSTD_noCompressLiterals(void * dst,size_t dstCapacity,const void * src,size_t srcSize)389 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
390 {
391     BYTE* const ostart = (BYTE* const)dst;
392     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
393 
394     if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
395 
396     switch(flSize)
397     {
398         case 1: /* 2 - 1 - 5 */
399             ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
400             break;
401         case 2: /* 2 - 2 - 12 */
402             MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
403             break;
404         default:   /*note : should not be necessary : flSize is within {1,2,3} */
405         case 3: /* 2 - 2 - 20 */
406             MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
407             break;
408     }
409 
410     memcpy(ostart + flSize, src, srcSize);
411     return srcSize + flSize;
412 }
413 
ZSTD_compressRleLiteralsBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize)414 static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
415 {
416     BYTE* const ostart = (BYTE* const)dst;
417     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
418 
419     (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
420 
421     switch(flSize)
422     {
423         case 1: /* 2 - 1 - 5 */
424             ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
425             break;
426         case 2: /* 2 - 2 - 12 */
427             MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
428             break;
429         default:   /*note : should not be necessary : flSize is necessarily within {1,2,3} */
430         case 3: /* 2 - 2 - 20 */
431             MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
432             break;
433     }
434 
435     ostart[flSize] = *(const BYTE*)src;
436     return flSize+1;
437 }
438 
439 
ZSTD_minGain(size_t srcSize)440 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
441 
ZSTD_compressLiterals(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize)442 static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
443                                      void* dst, size_t dstCapacity,
444                                const void* src, size_t srcSize)
445 {
446     size_t const minGain = ZSTD_minGain(srcSize);
447     size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
448     BYTE*  const ostart = (BYTE*)dst;
449     U32 singleStream = srcSize < 256;
450     symbolEncodingType_e hType = set_compressed;
451     size_t cLitSize;
452 
453 
454     /* small ? don't even attempt compression (speed opt) */
455 #   define LITERAL_NOENTROPY 63
456     {   size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
457         if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
458     }
459 
460     if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall);   /* not enough space for compression */
461     if (zc->flagStaticTables && (lhSize==3)) {
462         hType = set_repeat;
463         singleStream = 1;
464         cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
465     } else {
466         cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11)
467                                 : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11);
468     }
469 
470     if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
471         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
472     if (cLitSize==1)
473         return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
474 
475     /* Build header */
476     switch(lhSize)
477     {
478     case 3: /* 2 - 2 - 10 - 10 */
479         {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
480             MEM_writeLE24(ostart, lhc);
481             break;
482         }
483     case 4: /* 2 - 2 - 14 - 14 */
484         {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
485             MEM_writeLE32(ostart, lhc);
486             break;
487         }
488     default:   /* should not be necessary, lhSize is only {3,4,5} */
489     case 5: /* 2 - 2 - 18 - 18 */
490         {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
491             MEM_writeLE32(ostart, lhc);
492             ostart[4] = (BYTE)(cLitSize >> 10);
493             break;
494         }
495     }
496     return lhSize+cLitSize;
497 }
498 
499 static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
500                                    8,  9, 10, 11, 12, 13, 14, 15,
501                                   16, 16, 17, 17, 18, 18, 19, 19,
502                                   20, 20, 20, 20, 21, 21, 21, 21,
503                                   22, 22, 22, 22, 22, 22, 22, 22,
504                                   23, 23, 23, 23, 23, 23, 23, 23,
505                                   24, 24, 24, 24, 24, 24, 24, 24,
506                                   24, 24, 24, 24, 24, 24, 24, 24 };
507 
508 static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
509                                   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
510                                   32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
511                                   38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
512                                   40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
513                                   41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
514                                   42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
515                                   42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
516 
517 
ZSTD_seqToCodes(const seqStore_t * seqStorePtr)518 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
519 {
520     BYTE const LL_deltaCode = 19;
521     BYTE const ML_deltaCode = 36;
522     const seqDef* const sequences = seqStorePtr->sequencesStart;
523     BYTE* const llCodeTable = seqStorePtr->llCode;
524     BYTE* const ofCodeTable = seqStorePtr->ofCode;
525     BYTE* const mlCodeTable = seqStorePtr->mlCode;
526     U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
527     U32 u;
528     for (u=0; u<nbSeq; u++) {
529         U32 const llv = sequences[u].litLength;
530         U32 const mlv = sequences[u].matchLength;
531         llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
532         ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
533         mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
534     }
535     if (seqStorePtr->longLengthID==1)
536         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
537     if (seqStorePtr->longLengthID==2)
538         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
539 }
540 
541 
ZSTD_compressSequences(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,size_t srcSize)542 size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
543                               void* dst, size_t dstCapacity,
544                               size_t srcSize)
545 {
546     const seqStore_t* seqStorePtr = &(zc->seqStore);
547     U32 count[MaxSeq+1];
548     S16 norm[MaxSeq+1];
549     FSE_CTable* CTable_LitLength = zc->litlengthCTable;
550     FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
551     FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
552     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
553     const seqDef* const sequences = seqStorePtr->sequencesStart;
554     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
555     const BYTE* const llCodeTable = seqStorePtr->llCode;
556     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
557     BYTE* const ostart = (BYTE*)dst;
558     BYTE* const oend = ostart + dstCapacity;
559     BYTE* op = ostart;
560     size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
561     BYTE* seqHead;
562 
563     /* Compress literals */
564     {   const BYTE* const literals = seqStorePtr->litStart;
565         size_t const litSize = seqStorePtr->lit - literals;
566         size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
567         if (ZSTD_isError(cSize)) return cSize;
568         op += cSize;
569     }
570 
571     /* Sequences Header */
572     if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
573     if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
574     else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
575     else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
576     if (nbSeq==0) goto _check_compressibility;
577 
578     /* seqHead : flags for FSE encoding type */
579     seqHead = op++;
580 
581 #define MIN_SEQ_FOR_DYNAMIC_FSE   64
582 #define MAX_SEQ_FOR_STATIC_FSE  1000
583 
584     /* convert length/distances into codes */
585     ZSTD_seqToCodes(seqStorePtr);
586 
587     /* CTable for Literal Lengths */
588     {   U32 max = MaxLL;
589         size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq);
590         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
591             *op++ = llCodeTable[0];
592             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
593             LLtype = set_rle;
594         } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
595             LLtype = set_repeat;
596         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
597             FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
598             LLtype = set_basic;
599         } else {
600             size_t nbSeq_1 = nbSeq;
601             const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
602             if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
603             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
604             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
605               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
606               op += NCountSize; }
607             FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
608             LLtype = set_compressed;
609     }   }
610 
611     /* CTable for Offsets */
612     {   U32 max = MaxOff;
613         size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
614         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
615             *op++ = ofCodeTable[0];
616             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
617             Offtype = set_rle;
618         } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
619             Offtype = set_repeat;
620         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
621             FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
622             Offtype = set_basic;
623         } else {
624             size_t nbSeq_1 = nbSeq;
625             const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
626             if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
627             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
628             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
629               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
630               op += NCountSize; }
631             FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
632             Offtype = set_compressed;
633     }   }
634 
635     /* CTable for MatchLengths */
636     {   U32 max = MaxML;
637         size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
638         if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
639             *op++ = *mlCodeTable;
640             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
641             MLtype = set_rle;
642         } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
643             MLtype = set_repeat;
644         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
645             FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
646             MLtype = set_basic;
647         } else {
648             size_t nbSeq_1 = nbSeq;
649             const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
650             if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
651             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
652             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
653               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
654               op += NCountSize; }
655             FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
656             MLtype = set_compressed;
657     }   }
658 
659     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
660     zc->flagStaticTables = 0;
661 
662     /* Encoding Sequences */
663     {   BIT_CStream_t blockStream;
664         FSE_CState_t  stateMatchLength;
665         FSE_CState_t  stateOffsetBits;
666         FSE_CState_t  stateLitLength;
667 
668         { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op);
669           if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); }   /* not enough space remaining */
670 
671         /* first symbols */
672         FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
673         FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
674         FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
675         BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
676         if (MEM_32bits()) BIT_flushBits(&blockStream);
677         BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
678         if (MEM_32bits()) BIT_flushBits(&blockStream);
679         BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
680         BIT_flushBits(&blockStream);
681 
682         {   size_t n;
683             for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
684                 BYTE const llCode = llCodeTable[n];
685                 BYTE const ofCode = ofCodeTable[n];
686                 BYTE const mlCode = mlCodeTable[n];
687                 U32  const llBits = LL_bits[llCode];
688                 U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
689                 U32  const mlBits = ML_bits[mlCode];
690                                                                                 /* (7)*/  /* (7)*/
691                 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
692                 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
693                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
694                 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
695                 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
696                     BIT_flushBits(&blockStream);                                /* (7)*/
697                 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
698                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
699                 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
700                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
701                 BIT_addBits(&blockStream, sequences[n].offset, ofBits);         /* 31 */
702                 BIT_flushBits(&blockStream);                                    /* (7)*/
703         }   }
704 
705         FSE_flushCState(&blockStream, &stateMatchLength);
706         FSE_flushCState(&blockStream, &stateOffsetBits);
707         FSE_flushCState(&blockStream, &stateLitLength);
708 
709         {   size_t const streamSize = BIT_closeCStream(&blockStream);
710             if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
711             op += streamSize;
712     }   }
713 
714     /* check compressibility */
715 _check_compressibility:
716     { size_t const minGain = ZSTD_minGain(srcSize);
717       size_t const maxCSize = srcSize - minGain;
718       if ((size_t)(op-ostart) >= maxCSize) return 0; }
719 
720     /* confirm repcodes */
721     { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
722 
723     return op - ostart;
724 }
725 
726 
727 /*! ZSTD_storeSeq() :
728     Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
729     `offsetCode` : distance to match, or 0 == repCode.
730     `matchCode` : matchLength - MINMATCH
731 */
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const void * literals,U32 offsetCode,size_t matchCode)732 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
733 {
734 #if 0  /* for debug */
735     static const BYTE* g_start = NULL;
736     const U32 pos = (U32)(literals - g_start);
737     if (g_start==NULL) g_start = literals;
738     //if ((pos > 1) && (pos < 50000))
739         printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
740                pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
741 #endif
742     /* copy Literals */
743     ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
744     seqStorePtr->lit += litLength;
745 
746     /* literal Length */
747     if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
748     seqStorePtr->sequences[0].litLength = (U16)litLength;
749 
750     /* match offset */
751     seqStorePtr->sequences[0].offset = offsetCode + 1;
752 
753     /* match Length */
754     if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
755     seqStorePtr->sequences[0].matchLength = (U16)matchCode;
756 
757     seqStorePtr->sequences++;
758 }
759 
760 
761 /*-*************************************
762 *  Match length counter
763 ***************************************/
ZSTD_NbCommonBytes(register size_t val)764 static unsigned ZSTD_NbCommonBytes (register size_t val)
765 {
766     if (MEM_isLittleEndian()) {
767         if (MEM_64bits()) {
768 #       if defined(_MSC_VER) && defined(_WIN64)
769             unsigned long r = 0;
770             _BitScanForward64( &r, (U64)val );
771             return (unsigned)(r>>3);
772 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
773             return (__builtin_ctzll((U64)val) >> 3);
774 #       else
775             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
776             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
777 #       endif
778         } else { /* 32 bits */
779 #       if defined(_MSC_VER)
780             unsigned long r=0;
781             _BitScanForward( &r, (U32)val );
782             return (unsigned)(r>>3);
783 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
784             return (__builtin_ctz((U32)val) >> 3);
785 #       else
786             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
787             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
788 #       endif
789         }
790     } else {  /* Big Endian CPU */
791         if (MEM_64bits()) {
792 #       if defined(_MSC_VER) && defined(_WIN64)
793             unsigned long r = 0;
794             _BitScanReverse64( &r, val );
795             return (unsigned)(r>>3);
796 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
797             return (__builtin_clzll(val) >> 3);
798 #       else
799             unsigned r;
800             const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
801             if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
802             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
803             r += (!val);
804             return r;
805 #       endif
806         } else { /* 32 bits */
807 #       if defined(_MSC_VER)
808             unsigned long r = 0;
809             _BitScanReverse( &r, (unsigned long)val );
810             return (unsigned)(r>>3);
811 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
812             return (__builtin_clz((U32)val) >> 3);
813 #       else
814             unsigned r;
815             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
816             r += (!val);
817             return r;
818 #       endif
819     }   }
820 }
821 
822 
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)823 static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
824 {
825     const BYTE* const pStart = pIn;
826     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
827 
828     while (pIn < pInLoopLimit) {
829         size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
830         if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
831         pIn += ZSTD_NbCommonBytes(diff);
832         return (size_t)(pIn - pStart);
833     }
834     if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
835     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
836     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
837     return (size_t)(pIn - pStart);
838 }
839 
840 /** ZSTD_count_2segments() :
841 *   can count match length with `ip` & `match` in 2 different segments.
842 *   convention : on reaching mEnd, match count continue starting from iStart
843 */
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)844 static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
845 {
846     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
847     size_t const matchLength = ZSTD_count(ip, match, vEnd);
848     if (match + matchLength != mEnd) return matchLength;
849     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
850 }
851 
852 
853 /*-*************************************
854 *  Hashes
855 ***************************************/
856 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)857 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)858 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); }   /* only in zstd_opt.h */
859 
860 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)861 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)862 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
863 
864 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)865 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)866 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
867 
868 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)869 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)870 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
871 
872 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)873 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)874 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
875 
876 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)877 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)878 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
879 
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)880 static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
881 {
882     switch(mls)
883     {
884     default:
885     case 4: return ZSTD_hash4Ptr(p, hBits);
886     case 5: return ZSTD_hash5Ptr(p, hBits);
887     case 6: return ZSTD_hash6Ptr(p, hBits);
888     case 7: return ZSTD_hash7Ptr(p, hBits);
889     case 8: return ZSTD_hash8Ptr(p, hBits);
890     }
891 }
892 
893 
894 /*-*************************************
895 *  Fast Scan
896 ***************************************/
ZSTD_fillHashTable(ZSTD_CCtx * zc,const void * end,const U32 mls)897 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
898 {
899     U32* const hashTable = zc->hashTable;
900     U32  const hBits = zc->params.cParams.hashLog;
901     const BYTE* const base = zc->base;
902     const BYTE* ip = base + zc->nextToUpdate;
903     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
904     const size_t fastHashFillStep = 3;
905 
906     while(ip <= iend) {
907         hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
908         ip += fastHashFillStep;
909     }
910 }
911 
912 
913 FORCE_INLINE
ZSTD_compressBlock_fast_generic(ZSTD_CCtx * cctx,const void * src,size_t srcSize,const U32 mls)914 void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
915                                const void* src, size_t srcSize,
916                                const U32 mls)
917 {
918     U32* const hashTable = cctx->hashTable;
919     U32  const hBits = cctx->params.cParams.hashLog;
920     seqStore_t* seqStorePtr = &(cctx->seqStore);
921     const BYTE* const base = cctx->base;
922     const BYTE* const istart = (const BYTE*)src;
923     const BYTE* ip = istart;
924     const BYTE* anchor = istart;
925     const U32   lowestIndex = cctx->dictLimit;
926     const BYTE* const lowest = base + lowestIndex;
927     const BYTE* const iend = istart + srcSize;
928     const BYTE* const ilimit = iend - HASH_READ_SIZE;
929     U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
930     U32 offsetSaved = 0;
931 
932     /* init */
933     ip += (ip==lowest);
934     {   U32 const maxRep = (U32)(ip-lowest);
935         if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
936         if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
937     }
938 
939     /* Main Search Loop */
940     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
941         size_t mLength;
942         size_t const h = ZSTD_hashPtr(ip, hBits, mls);
943         U32 const current = (U32)(ip-base);
944         U32 const matchIndex = hashTable[h];
945         const BYTE* match = base + matchIndex;
946         hashTable[h] = current;   /* update hash table */
947 
948         if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
949             mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
950             ip++;
951             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
952         } else {
953             U32 offset;
954             if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
955                 ip += ((ip-anchor) >> g_searchStrength) + 1;
956                 continue;
957             }
958             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
959             offset = (U32)(ip-match);
960             while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
961             offset_2 = offset_1;
962             offset_1 = offset;
963 
964             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
965         }
966 
967         /* match found */
968         ip += mLength;
969         anchor = ip;
970 
971         if (ip <= ilimit) {
972             /* Fill Table */
973             hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;  /* here because current+2 could be > iend-8 */
974             hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
975             /* check immediate repcode */
976             while ( (ip <= ilimit)
977                  && ( (offset_2>0)
978                  & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
979                 /* store sequence */
980                 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
981                 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; }  /* swap offset_2 <=> offset_1 */
982                 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
983                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
984                 ip += rLength;
985                 anchor = ip;
986                 continue;   /* faster when present ... (?) */
987     }   }   }
988 
989     /* save reps for next block */
990     cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
991     cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
992 
993     /* Last Literals */
994     {   size_t const lastLLSize = iend - anchor;
995         memcpy(seqStorePtr->lit, anchor, lastLLSize);
996         seqStorePtr->lit += lastLLSize;
997     }
998 }
999 
1000 
ZSTD_compressBlock_fast(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1001 static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
1002                        const void* src, size_t srcSize)
1003 {
1004     const U32 mls = ctx->params.cParams.searchLength;
1005     switch(mls)
1006     {
1007     default:
1008     case 4 :
1009         ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
1010     case 5 :
1011         ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
1012     case 6 :
1013         ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
1014     case 7 :
1015         ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
1016     }
1017 }
1018 
1019 
ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx * ctx,const void * src,size_t srcSize,const U32 mls)1020 static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
1021                                  const void* src, size_t srcSize,
1022                                  const U32 mls)
1023 {
1024     U32* hashTable = ctx->hashTable;
1025     const U32 hBits = ctx->params.cParams.hashLog;
1026     seqStore_t* seqStorePtr = &(ctx->seqStore);
1027     const BYTE* const base = ctx->base;
1028     const BYTE* const dictBase = ctx->dictBase;
1029     const BYTE* const istart = (const BYTE*)src;
1030     const BYTE* ip = istart;
1031     const BYTE* anchor = istart;
1032     const U32   lowestIndex = ctx->lowLimit;
1033     const BYTE* const dictStart = dictBase + lowestIndex;
1034     const U32   dictLimit = ctx->dictLimit;
1035     const BYTE* const lowPrefixPtr = base + dictLimit;
1036     const BYTE* const dictEnd = dictBase + dictLimit;
1037     const BYTE* const iend = istart + srcSize;
1038     const BYTE* const ilimit = iend - 8;
1039     U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1040 
1041     /* Search Loop */
1042     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
1043         const size_t h = ZSTD_hashPtr(ip, hBits, mls);
1044         const U32 matchIndex = hashTable[h];
1045         const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1046         const BYTE* match = matchBase + matchIndex;
1047         const U32 current = (U32)(ip-base);
1048         const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
1049         const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1050         const BYTE* repMatch = repBase + repIndex;
1051         size_t mLength;
1052         hashTable[h] = current;   /* update hash table */
1053 
1054         if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1055            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1056             const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1057             mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
1058             ip++;
1059             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1060         } else {
1061             if ( (matchIndex < lowestIndex) ||
1062                  (MEM_read32(match) != MEM_read32(ip)) ) {
1063                 ip += ((ip-anchor) >> g_searchStrength) + 1;
1064                 continue;
1065             }
1066             {   const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1067                 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1068                 U32 offset;
1069                 mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
1070                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
1071                 offset = current - matchIndex;
1072                 offset_2 = offset_1;
1073                 offset_1 = offset;
1074                 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1075         }   }
1076 
1077         /* found a match : store it */
1078         ip += mLength;
1079         anchor = ip;
1080 
1081         if (ip <= ilimit) {
1082             /* Fill Table */
1083 			hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
1084             hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1085             /* check immediate repcode */
1086             while (ip <= ilimit) {
1087                 U32 const current2 = (U32)(ip-base);
1088                 U32 const repIndex2 = current2 - offset_2;
1089                 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1090                 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
1091                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1092                     const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1093                     size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1094                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
1095                     ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1096                     hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
1097                     ip += repLength2;
1098                     anchor = ip;
1099                     continue;
1100                 }
1101                 break;
1102     }   }   }
1103 
1104     /* save reps for next block */
1105     ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1106 
1107     /* Last Literals */
1108     {   size_t const lastLLSize = iend - anchor;
1109         memcpy(seqStorePtr->lit, anchor, lastLLSize);
1110         seqStorePtr->lit += lastLLSize;
1111     }
1112 }
1113 
1114 
ZSTD_compressBlock_fast_extDict(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1115 static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
1116                          const void* src, size_t srcSize)
1117 {
1118     U32 const mls = ctx->params.cParams.searchLength;
1119     switch(mls)
1120     {
1121     default:
1122     case 4 :
1123         ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
1124     case 5 :
1125         ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
1126     case 6 :
1127         ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
1128     case 7 :
1129         ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
1130     }
1131 }
1132 
1133 
1134 /*-*************************************
1135 *  Double Fast
1136 ***************************************/
ZSTD_fillDoubleHashTable(ZSTD_CCtx * cctx,const void * end,const U32 mls)1137 static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
1138 {
1139     U32* const hashLarge = cctx->hashTable;
1140     U32  const hBitsL = cctx->params.cParams.hashLog;
1141     U32* const hashSmall = cctx->chainTable;
1142     U32  const hBitsS = cctx->params.cParams.chainLog;
1143     const BYTE* const base = cctx->base;
1144     const BYTE* ip = base + cctx->nextToUpdate;
1145     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
1146     const size_t fastHashFillStep = 3;
1147 
1148     while(ip <= iend) {
1149         hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
1150         hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
1151         ip += fastHashFillStep;
1152     }
1153 }
1154 
1155 
1156 FORCE_INLINE
ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx * cctx,const void * src,size_t srcSize,const U32 mls)1157 void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
1158                                  const void* src, size_t srcSize,
1159                                  const U32 mls)
1160 {
1161     U32* const hashLong = cctx->hashTable;
1162     const U32 hBitsL = cctx->params.cParams.hashLog;
1163     U32* const hashSmall = cctx->chainTable;
1164     const U32 hBitsS = cctx->params.cParams.chainLog;
1165     seqStore_t* seqStorePtr = &(cctx->seqStore);
1166     const BYTE* const base = cctx->base;
1167     const BYTE* const istart = (const BYTE*)src;
1168     const BYTE* ip = istart;
1169     const BYTE* anchor = istart;
1170     const U32 lowestIndex = cctx->dictLimit;
1171     const BYTE* const lowest = base + lowestIndex;
1172     const BYTE* const iend = istart + srcSize;
1173     const BYTE* const ilimit = iend - HASH_READ_SIZE;
1174     U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1175     U32 offsetSaved = 0;
1176 
1177     /* init */
1178     ip += (ip==lowest);
1179     {   U32 const maxRep = (U32)(ip-lowest);
1180         if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1181         if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
1182     }
1183 
1184     /* Main Search Loop */
1185     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
1186         size_t mLength;
1187         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
1188         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
1189         U32 const current = (U32)(ip-base);
1190         U32 const matchIndexL = hashLong[h2];
1191         U32 const matchIndexS = hashSmall[h];
1192         const BYTE* matchLong = base + matchIndexL;
1193         const BYTE* match = base + matchIndexS;
1194         hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
1195 
1196         if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
1197             mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
1198             ip++;
1199             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1200         } else {
1201             U32 offset;
1202             if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
1203                 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
1204                 offset = (U32)(ip-matchLong);
1205                 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1206             } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
1207                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1208                 U32 const matchIndex3 = hashLong[h3];
1209                 const BYTE* match3 = base + matchIndex3;
1210                 hashLong[h3] = current + 1;
1211                 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1212                     mLength = ZSTD_count(ip+9, match3+8, iend) + 8;
1213                     ip++;
1214                     offset = (U32)(ip-match3);
1215                     while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1216                 } else {
1217                     mLength = ZSTD_count(ip+4, match+4, iend) + 4;
1218                     offset = (U32)(ip-match);
1219                     while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1220                 }
1221             } else {
1222                 ip += ((ip-anchor) >> g_searchStrength) + 1;
1223                 continue;
1224             }
1225 
1226             offset_2 = offset_1;
1227             offset_1 = offset;
1228 
1229             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1230         }
1231 
1232         /* match found */
1233         ip += mLength;
1234         anchor = ip;
1235 
1236         if (ip <= ilimit) {
1237             /* Fill Table */
1238             hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
1239                 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;  /* here because current+2 could be > iend-8 */
1240             hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
1241                 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1242 
1243             /* check immediate repcode */
1244             while ( (ip <= ilimit)
1245                  && ( (offset_2>0)
1246                  & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1247                 /* store sequence */
1248                 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1249                 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
1250                 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
1251                 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
1252                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1253                 ip += rLength;
1254                 anchor = ip;
1255                 continue;   /* faster when present ... (?) */
1256     }   }   }
1257 
1258     /* save reps for next block */
1259     cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1260     cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1261 
1262     /* Last Literals */
1263     {   size_t const lastLLSize = iend - anchor;
1264         memcpy(seqStorePtr->lit, anchor, lastLLSize);
1265         seqStorePtr->lit += lastLLSize;
1266     }
1267 }
1268 
1269 
ZSTD_compressBlock_doubleFast(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1270 static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1271 {
1272     const U32 mls = ctx->params.cParams.searchLength;
1273     switch(mls)
1274     {
1275     default:
1276     case 4 :
1277         ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
1278     case 5 :
1279         ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
1280     case 6 :
1281         ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
1282     case 7 :
1283         ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
1284     }
1285 }
1286 
1287 
ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx * ctx,const void * src,size_t srcSize,const U32 mls)1288 static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
1289                                  const void* src, size_t srcSize,
1290                                  const U32 mls)
1291 {
1292     U32* const hashLong = ctx->hashTable;
1293     U32  const hBitsL = ctx->params.cParams.hashLog;
1294     U32* const hashSmall = ctx->chainTable;
1295     U32  const hBitsS = ctx->params.cParams.chainLog;
1296     seqStore_t* seqStorePtr = &(ctx->seqStore);
1297     const BYTE* const base = ctx->base;
1298     const BYTE* const dictBase = ctx->dictBase;
1299     const BYTE* const istart = (const BYTE*)src;
1300     const BYTE* ip = istart;
1301     const BYTE* anchor = istart;
1302     const U32   lowestIndex = ctx->lowLimit;
1303     const BYTE* const dictStart = dictBase + lowestIndex;
1304     const U32   dictLimit = ctx->dictLimit;
1305     const BYTE* const lowPrefixPtr = base + dictLimit;
1306     const BYTE* const dictEnd = dictBase + dictLimit;
1307     const BYTE* const iend = istart + srcSize;
1308     const BYTE* const ilimit = iend - 8;
1309     U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1310 
1311     /* Search Loop */
1312     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
1313         const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
1314         const U32 matchIndex = hashSmall[hSmall];
1315         const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1316         const BYTE* match = matchBase + matchIndex;
1317 
1318         const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
1319         const U32 matchLongIndex = hashLong[hLong];
1320         const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
1321         const BYTE* matchLong = matchLongBase + matchLongIndex;
1322 
1323         const U32 current = (U32)(ip-base);
1324         const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
1325         const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1326         const BYTE* repMatch = repBase + repIndex;
1327         size_t mLength;
1328         hashSmall[hSmall] = hashLong[hLong] = current;   /* update hash table */
1329 
1330         if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1331            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1332             const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1333             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
1334             ip++;
1335             ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1336         } else {
1337             if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
1338                 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
1339                 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
1340                 U32 offset;
1341                 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
1342                 offset = current - matchLongIndex;
1343                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
1344                 offset_2 = offset_1;
1345                 offset_1 = offset;
1346                 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1347 
1348             } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
1349                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1350                 U32 const matchIndex3 = hashLong[h3];
1351                 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
1352                 const BYTE* match3 = match3Base + matchIndex3;
1353                 U32 offset;
1354                 hashLong[h3] = current + 1;
1355                 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1356                     const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
1357                     const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
1358                     mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
1359                     ip++;
1360                     offset = current+1 - matchIndex3;
1361                     while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1362                 } else {
1363                     const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1364                     const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1365                     mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
1366                     offset = current - matchIndex;
1367                     while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
1368                 }
1369                 offset_2 = offset_1;
1370                 offset_1 = offset;
1371                 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1372 
1373             } else {
1374                 ip += ((ip-anchor) >> g_searchStrength) + 1;
1375                 continue;
1376         }   }
1377 
1378         /* found a match : store it */
1379         ip += mLength;
1380         anchor = ip;
1381 
1382         if (ip <= ilimit) {
1383             /* Fill Table */
1384 			hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
1385 			hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
1386             hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1387             hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
1388             /* check immediate repcode */
1389             while (ip <= ilimit) {
1390                 U32 const current2 = (U32)(ip-base);
1391                 U32 const repIndex2 = current2 - offset_2;
1392                 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1393                 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex))  /* intentional overflow */
1394                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1395                     const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1396                     size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1397                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
1398                     ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1399                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
1400                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
1401                     ip += repLength2;
1402                     anchor = ip;
1403                     continue;
1404                 }
1405                 break;
1406     }   }   }
1407 
1408     /* save reps for next block */
1409     ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1410 
1411     /* Last Literals */
1412     {   size_t const lastLLSize = iend - anchor;
1413         memcpy(seqStorePtr->lit, anchor, lastLLSize);
1414         seqStorePtr->lit += lastLLSize;
1415     }
1416 }
1417 
1418 
ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1419 static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
1420                          const void* src, size_t srcSize)
1421 {
1422     U32 const mls = ctx->params.cParams.searchLength;
1423     switch(mls)
1424     {
1425     default:
1426     case 4 :
1427         ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
1428     case 5 :
1429         ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
1430     case 6 :
1431         ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
1432     case 7 :
1433         ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
1434     }
1435 }
1436 
1437 
1438 /*-*************************************
1439 *  Binary Tree search
1440 ***************************************/
1441 /** ZSTD_insertBt1() : add one or multiple positions to tree.
1442 *   ip : assumed <= iend-8 .
1443 *   @return : nb of positions added */
ZSTD_insertBt1(ZSTD_CCtx * zc,const BYTE * const ip,const U32 mls,const BYTE * const iend,U32 nbCompares,U32 extDict)1444 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
1445                           U32 extDict)
1446 {
1447     U32*   const hashTable = zc->hashTable;
1448     U32    const hashLog = zc->params.cParams.hashLog;
1449     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
1450     U32*   const bt = zc->chainTable;
1451     U32    const btLog  = zc->params.cParams.chainLog - 1;
1452     U32    const btMask = (1 << btLog) - 1;
1453     U32 matchIndex = hashTable[h];
1454     size_t commonLengthSmaller=0, commonLengthLarger=0;
1455     const BYTE* const base = zc->base;
1456     const BYTE* const dictBase = zc->dictBase;
1457     const U32 dictLimit = zc->dictLimit;
1458     const BYTE* const dictEnd = dictBase + dictLimit;
1459     const BYTE* const prefixStart = base + dictLimit;
1460     const BYTE* match = base + matchIndex;
1461     const U32 current = (U32)(ip-base);
1462     const U32 btLow = btMask >= current ? 0 : current - btMask;
1463     U32* smallerPtr = bt + 2*(current&btMask);
1464     U32* largerPtr  = smallerPtr + 1;
1465     U32 dummy32;   /* to be nullified at the end */
1466     U32 const windowLow = zc->lowLimit;
1467     U32 matchEndIdx = current+8;
1468     size_t bestLength = 8;
1469 #ifdef ZSTD_C_PREDICT
1470     U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
1471     U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
1472     predictedSmall += (predictedSmall>0);
1473     predictedLarge += (predictedLarge>0);
1474 #endif /* ZSTD_C_PREDICT */
1475 
1476     hashTable[h] = current;   /* Update Hash Table */
1477 
1478     while (nbCompares-- && (matchIndex > windowLow)) {
1479         U32* nextPtr = bt + 2*(matchIndex & btMask);
1480         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
1481 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
1482         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
1483         if (matchIndex == predictedSmall) {
1484             /* no need to check length, result known */
1485             *smallerPtr = matchIndex;
1486             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
1487             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
1488             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
1489             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
1490             continue;
1491         }
1492         if (matchIndex == predictedLarge) {
1493             *largerPtr = matchIndex;
1494             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
1495             largerPtr = nextPtr;
1496             matchIndex = nextPtr[0];
1497             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
1498             continue;
1499         }
1500 #endif
1501         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1502             match = base + matchIndex;
1503             if (match[matchLength] == ip[matchLength])
1504                 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1505         } else {
1506             match = dictBase + matchIndex;
1507             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1508             if (matchIndex+matchLength >= dictLimit)
1509 				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
1510         }
1511 
1512         if (matchLength > bestLength) {
1513             bestLength = matchLength;
1514             if (matchLength > matchEndIdx - matchIndex)
1515                 matchEndIdx = matchIndex + (U32)matchLength;
1516         }
1517 
1518         if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
1519             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
1520 
1521         if (match[matchLength] < ip[matchLength]) {  /* necessarily within correct buffer */
1522             /* match is smaller than current */
1523             *smallerPtr = matchIndex;             /* update smaller idx */
1524             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
1525             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
1526             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
1527             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
1528         } else {
1529             /* match is larger than current */
1530             *largerPtr = matchIndex;
1531             commonLengthLarger = matchLength;
1532             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
1533             largerPtr = nextPtr;
1534             matchIndex = nextPtr[0];
1535     }   }
1536 
1537     *smallerPtr = *largerPtr = 0;
1538     if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */
1539     if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
1540     return 1;
1541 }
1542 
1543 
ZSTD_insertBtAndFindBestMatch(ZSTD_CCtx * zc,const BYTE * const ip,const BYTE * const iend,size_t * offsetPtr,U32 nbCompares,const U32 mls,U32 extDict)1544 static size_t ZSTD_insertBtAndFindBestMatch (
1545                         ZSTD_CCtx* zc,
1546                         const BYTE* const ip, const BYTE* const iend,
1547                         size_t* offsetPtr,
1548                         U32 nbCompares, const U32 mls,
1549                         U32 extDict)
1550 {
1551     U32*   const hashTable = zc->hashTable;
1552     U32    const hashLog = zc->params.cParams.hashLog;
1553     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
1554     U32*   const bt = zc->chainTable;
1555     U32    const btLog  = zc->params.cParams.chainLog - 1;
1556     U32    const btMask = (1 << btLog) - 1;
1557     U32 matchIndex  = hashTable[h];
1558     size_t commonLengthSmaller=0, commonLengthLarger=0;
1559     const BYTE* const base = zc->base;
1560     const BYTE* const dictBase = zc->dictBase;
1561     const U32 dictLimit = zc->dictLimit;
1562     const BYTE* const dictEnd = dictBase + dictLimit;
1563     const BYTE* const prefixStart = base + dictLimit;
1564     const U32 current = (U32)(ip-base);
1565     const U32 btLow = btMask >= current ? 0 : current - btMask;
1566     const U32 windowLow = zc->lowLimit;
1567     U32* smallerPtr = bt + 2*(current&btMask);
1568     U32* largerPtr  = bt + 2*(current&btMask) + 1;
1569     U32 matchEndIdx = current+8;
1570     U32 dummy32;   /* to be nullified at the end */
1571     size_t bestLength = 0;
1572 
1573     hashTable[h] = current;   /* Update Hash Table */
1574 
1575     while (nbCompares-- && (matchIndex > windowLow)) {
1576         U32* nextPtr = bt + 2*(matchIndex & btMask);
1577         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
1578         const BYTE* match;
1579 
1580         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1581             match = base + matchIndex;
1582             if (match[matchLength] == ip[matchLength])
1583                 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1584         } else {
1585             match = dictBase + matchIndex;
1586             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1587             if (matchIndex+matchLength >= dictLimit)
1588 				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
1589         }
1590 
1591         if (matchLength > bestLength) {
1592             if (matchLength > matchEndIdx - matchIndex)
1593                 matchEndIdx = matchIndex + (U32)matchLength;
1594             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
1595                 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
1596             if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
1597                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
1598         }
1599 
1600         if (match[matchLength] < ip[matchLength]) {
1601             /* match is smaller than current */
1602             *smallerPtr = matchIndex;             /* update smaller idx */
1603             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
1604             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
1605             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
1606             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
1607         } else {
1608             /* match is larger than current */
1609             *largerPtr = matchIndex;
1610             commonLengthLarger = matchLength;
1611             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
1612             largerPtr = nextPtr;
1613             matchIndex = nextPtr[0];
1614     }   }
1615 
1616     *smallerPtr = *largerPtr = 0;
1617 
1618     zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
1619     return bestLength;
1620 }
1621 
1622 
ZSTD_updateTree(ZSTD_CCtx * zc,const BYTE * const ip,const BYTE * const iend,const U32 nbCompares,const U32 mls)1623 static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1624 {
1625     const BYTE* const base = zc->base;
1626     const U32 target = (U32)(ip - base);
1627     U32 idx = zc->nextToUpdate;
1628 
1629     while(idx < target)
1630         idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
1631 }
1632 
1633 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
ZSTD_BtFindBestMatch(ZSTD_CCtx * zc,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 maxNbAttempts,const U32 mls)1634 static size_t ZSTD_BtFindBestMatch (
1635                         ZSTD_CCtx* zc,
1636                         const BYTE* const ip, const BYTE* const iLimit,
1637                         size_t* offsetPtr,
1638                         const U32 maxNbAttempts, const U32 mls)
1639 {
1640     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
1641     ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
1642     return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
1643 }
1644 
1645 
ZSTD_BtFindBestMatch_selectMLS(ZSTD_CCtx * zc,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 maxNbAttempts,const U32 matchLengthSearch)1646 static size_t ZSTD_BtFindBestMatch_selectMLS (
1647                         ZSTD_CCtx* zc,   /* Index table will be updated */
1648                         const BYTE* ip, const BYTE* const iLimit,
1649                         size_t* offsetPtr,
1650                         const U32 maxNbAttempts, const U32 matchLengthSearch)
1651 {
1652     switch(matchLengthSearch)
1653     {
1654     default :
1655     case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1656     case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1657     case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1658     }
1659 }
1660 
1661 
ZSTD_updateTree_extDict(ZSTD_CCtx * zc,const BYTE * const ip,const BYTE * const iend,const U32 nbCompares,const U32 mls)1662 static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1663 {
1664     const BYTE* const base = zc->base;
1665     const U32 target = (U32)(ip - base);
1666     U32 idx = zc->nextToUpdate;
1667 
1668     while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
1669 }
1670 
1671 
1672 /** Tree updater, providing best match */
ZSTD_BtFindBestMatch_extDict(ZSTD_CCtx * zc,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 maxNbAttempts,const U32 mls)1673 static size_t ZSTD_BtFindBestMatch_extDict (
1674                         ZSTD_CCtx* zc,
1675                         const BYTE* const ip, const BYTE* const iLimit,
1676                         size_t* offsetPtr,
1677                         const U32 maxNbAttempts, const U32 mls)
1678 {
1679     if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
1680     ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
1681     return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
1682 }
1683 
1684 
ZSTD_BtFindBestMatch_selectMLS_extDict(ZSTD_CCtx * zc,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 maxNbAttempts,const U32 matchLengthSearch)1685 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
1686                         ZSTD_CCtx* zc,   /* Index table will be updated */
1687                         const BYTE* ip, const BYTE* const iLimit,
1688                         size_t* offsetPtr,
1689                         const U32 maxNbAttempts, const U32 matchLengthSearch)
1690 {
1691     switch(matchLengthSearch)
1692     {
1693     default :
1694     case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1695     case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1696     case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1697     }
1698 }
1699 
1700 
1701 
1702 /* *********************************
1703 *  Hash Chain
1704 ***********************************/
1705 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & mask]
1706 
1707 /* Update chains up to ip (excluded)
1708    Assumption : always within prefix (ie. not within extDict) */
1709 FORCE_INLINE
ZSTD_insertAndFindFirstIndex(ZSTD_CCtx * zc,const BYTE * ip,U32 mls)1710 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
1711 {
1712     U32* const hashTable  = zc->hashTable;
1713     const U32 hashLog = zc->params.cParams.hashLog;
1714     U32* const chainTable = zc->chainTable;
1715     const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
1716     const BYTE* const base = zc->base;
1717     const U32 target = (U32)(ip - base);
1718     U32 idx = zc->nextToUpdate;
1719 
1720     while(idx < target) { /* catch up */
1721         size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
1722         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
1723         hashTable[h] = idx;
1724         idx++;
1725     }
1726 
1727     zc->nextToUpdate = target;
1728     return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
1729 }
1730 
1731 
1732 
1733 FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
ZSTD_HcFindBestMatch_generic(ZSTD_CCtx * zc,const BYTE * const ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 maxNbAttempts,const U32 mls,const U32 extDict)1734 size_t ZSTD_HcFindBestMatch_generic (
1735                         ZSTD_CCtx* zc,   /* Index table will be updated */
1736                         const BYTE* const ip, const BYTE* const iLimit,
1737                         size_t* offsetPtr,
1738                         const U32 maxNbAttempts, const U32 mls, const U32 extDict)
1739 {
1740     U32* const chainTable = zc->chainTable;
1741     const U32 chainSize = (1 << zc->params.cParams.chainLog);
1742     const U32 chainMask = chainSize-1;
1743     const BYTE* const base = zc->base;
1744     const BYTE* const dictBase = zc->dictBase;
1745     const U32 dictLimit = zc->dictLimit;
1746     const BYTE* const prefixStart = base + dictLimit;
1747     const BYTE* const dictEnd = dictBase + dictLimit;
1748     const U32 lowLimit = zc->lowLimit;
1749     const U32 current = (U32)(ip-base);
1750     const U32 minChain = current > chainSize ? current - chainSize : 0;
1751     int nbAttempts=maxNbAttempts;
1752     size_t ml=EQUAL_READ32-1;
1753 
1754     /* HC4 match finder */
1755     U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
1756 
1757     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
1758         const BYTE* match;
1759         size_t currentMl=0;
1760         if ((!extDict) || matchIndex >= dictLimit) {
1761             match = base + matchIndex;
1762             if (match[ml] == ip[ml])   /* potentially better */
1763                 currentMl = ZSTD_count(ip, match, iLimit);
1764         } else {
1765             match = dictBase + matchIndex;
1766             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
1767                 currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
1768         }
1769 
1770         /* save best solution */
1771         if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
1772 
1773         if (matchIndex <= minChain) break;
1774         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
1775     }
1776 
1777     return ml;
1778 }
1779 
1780 
ZSTD_HcFindBestMatch_selectMLS(ZSTD_CCtx * zc,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 maxNbAttempts,const U32 matchLengthSearch)1781 FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
1782                         ZSTD_CCtx* zc,
1783                         const BYTE* ip, const BYTE* const iLimit,
1784                         size_t* offsetPtr,
1785                         const U32 maxNbAttempts, const U32 matchLengthSearch)
1786 {
1787     switch(matchLengthSearch)
1788     {
1789     default :
1790     case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
1791     case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
1792     case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
1793     }
1794 }
1795 
1796 
ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_CCtx * zc,const BYTE * ip,const BYTE * const iLimit,size_t * offsetPtr,const U32 maxNbAttempts,const U32 matchLengthSearch)1797 FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
1798                         ZSTD_CCtx* zc,
1799                         const BYTE* ip, const BYTE* const iLimit,
1800                         size_t* offsetPtr,
1801                         const U32 maxNbAttempts, const U32 matchLengthSearch)
1802 {
1803     switch(matchLengthSearch)
1804     {
1805     default :
1806     case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
1807     case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
1808     case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
1809     }
1810 }
1811 
1812 
1813 /* *******************************
1814 *  Common parser - lazy strategy
1815 *********************************/
1816 FORCE_INLINE
ZSTD_compressBlock_lazy_generic(ZSTD_CCtx * ctx,const void * src,size_t srcSize,const U32 searchMethod,const U32 depth)1817 void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
1818                                      const void* src, size_t srcSize,
1819                                      const U32 searchMethod, const U32 depth)
1820 {
1821     seqStore_t* seqStorePtr = &(ctx->seqStore);
1822     const BYTE* const istart = (const BYTE*)src;
1823     const BYTE* ip = istart;
1824     const BYTE* anchor = istart;
1825     const BYTE* const iend = istart + srcSize;
1826     const BYTE* const ilimit = iend - 8;
1827     const BYTE* const base = ctx->base + ctx->dictLimit;
1828 
1829     U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
1830     U32 const mls = ctx->params.cParams.searchLength;
1831 
1832     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
1833                         size_t* offsetPtr,
1834                         U32 maxNbAttempts, U32 matchLengthSearch);
1835     searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
1836     U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
1837 
1838     /* init */
1839     ip += (ip==base);
1840     ctx->nextToUpdate3 = ctx->nextToUpdate;
1841     {   U32 const maxRep = (U32)(ip-base);
1842         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
1843         if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1844     }
1845 
1846     /* Match Loop */
1847     while (ip < ilimit) {
1848         size_t matchLength=0;
1849         size_t offset=0;
1850         const BYTE* start=ip+1;
1851 
1852         /* check repCode */
1853         if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
1854             /* repcode : we take it */
1855             matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1856             if (depth==0) goto _storeSequence;
1857         }
1858 
1859         /* first search (depth 0) */
1860         {   size_t offsetFound = 99999999;
1861             size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
1862             if (ml2 > matchLength)
1863                 matchLength = ml2, start = ip, offset=offsetFound;
1864         }
1865 
1866         if (matchLength < EQUAL_READ32) {
1867             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
1868             continue;
1869         }
1870 
1871         /* let's try to find a better solution */
1872         if (depth>=1)
1873         while (ip<ilimit) {
1874             ip ++;
1875             if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1876                 size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1877                 int const gain2 = (int)(mlRep * 3);
1878                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1879                 if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
1880                     matchLength = mlRep, offset = 0, start = ip;
1881             }
1882             {   size_t offset2=99999999;
1883                 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1884                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
1885                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1886                 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1887                     matchLength = ml2, offset = offset2, start = ip;
1888                     continue;   /* search a better one */
1889             }   }
1890 
1891             /* let's find an even better one */
1892             if ((depth==2) && (ip<ilimit)) {
1893                 ip ++;
1894                 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1895                     size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1896                     int const gain2 = (int)(ml2 * 4);
1897                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1898                     if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
1899                         matchLength = ml2, offset = 0, start = ip;
1900                 }
1901                 {   size_t offset2=99999999;
1902                     size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1903                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
1904                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1905                     if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1906                         matchLength = ml2, offset = offset2, start = ip;
1907                         continue;
1908             }   }   }
1909             break;  /* nothing found : store previous solution */
1910         }
1911 
1912         /* catch up */
1913         if (offset) {
1914             while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE]))   /* only search for offset within prefix */
1915                 { start--; matchLength++; }
1916             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1917         }
1918 
1919         /* store sequence */
1920 _storeSequence:
1921         {   size_t const litLength = start - anchor;
1922             ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1923             anchor = ip = start + matchLength;
1924         }
1925 
1926         /* check immediate repcode */
1927         while ( (ip <= ilimit)
1928              && ((offset_2>0)
1929              & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1930             /* store sequence */
1931             matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
1932             offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
1933             ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
1934             ip += matchLength;
1935             anchor = ip;
1936             continue;   /* faster when present ... (?) */
1937     }   }
1938 
1939     /* Save reps for next block */
1940     ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
1941     ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
1942 
1943     /* Last Literals */
1944     {   size_t const lastLLSize = iend - anchor;
1945         memcpy(seqStorePtr->lit, anchor, lastLLSize);
1946         seqStorePtr->lit += lastLLSize;
1947     }
1948 }
1949 
1950 
ZSTD_compressBlock_btlazy2(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1951 static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1952 {
1953     ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
1954 }
1955 
ZSTD_compressBlock_lazy2(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1956 static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1957 {
1958     ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
1959 }
1960 
ZSTD_compressBlock_lazy(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1961 static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1962 {
1963     ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
1964 }
1965 
ZSTD_compressBlock_greedy(ZSTD_CCtx * ctx,const void * src,size_t srcSize)1966 static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1967 {
1968     ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
1969 }
1970 
1971 
1972 FORCE_INLINE
ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx * ctx,const void * src,size_t srcSize,const U32 searchMethod,const U32 depth)1973 void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
1974                                      const void* src, size_t srcSize,
1975                                      const U32 searchMethod, const U32 depth)
1976 {
1977     seqStore_t* seqStorePtr = &(ctx->seqStore);
1978     const BYTE* const istart = (const BYTE*)src;
1979     const BYTE* ip = istart;
1980     const BYTE* anchor = istart;
1981     const BYTE* const iend = istart + srcSize;
1982     const BYTE* const ilimit = iend - 8;
1983     const BYTE* const base = ctx->base;
1984     const U32 dictLimit = ctx->dictLimit;
1985     const U32 lowestIndex = ctx->lowLimit;
1986     const BYTE* const prefixStart = base + dictLimit;
1987     const BYTE* const dictBase = ctx->dictBase;
1988     const BYTE* const dictEnd  = dictBase + dictLimit;
1989     const BYTE* const dictStart  = dictBase + ctx->lowLimit;
1990 
1991     const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
1992     const U32 mls = ctx->params.cParams.searchLength;
1993 
1994     typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
1995                         size_t* offsetPtr,
1996                         U32 maxNbAttempts, U32 matchLengthSearch);
1997     searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
1998 
1999     U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
2000 
2001     /* init */
2002     ctx->nextToUpdate3 = ctx->nextToUpdate;
2003     ip += (ip == prefixStart);
2004 
2005     /* Match Loop */
2006     while (ip < ilimit) {
2007         size_t matchLength=0;
2008         size_t offset=0;
2009         const BYTE* start=ip+1;
2010         U32 current = (U32)(ip-base);
2011 
2012         /* check repCode */
2013         {   const U32 repIndex = (U32)(current+1 - offset_1);
2014             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2015             const BYTE* const repMatch = repBase + repIndex;
2016             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))   /* intentional overflow */
2017             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
2018                 /* repcode detected we should take it */
2019                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2020                 matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2021                 if (depth==0) goto _storeSequence;
2022         }   }
2023 
2024         /* first search (depth 0) */
2025         {   size_t offsetFound = 99999999;
2026             size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
2027             if (ml2 > matchLength)
2028                 matchLength = ml2, start = ip, offset=offsetFound;
2029         }
2030 
2031          if (matchLength < EQUAL_READ32) {
2032             ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
2033             continue;
2034         }
2035 
2036         /* let's try to find a better solution */
2037         if (depth>=1)
2038         while (ip<ilimit) {
2039             ip ++;
2040             current++;
2041             /* check repCode */
2042             if (offset) {
2043                 const U32 repIndex = (U32)(current - offset_1);
2044                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2045                 const BYTE* const repMatch = repBase + repIndex;
2046                 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
2047                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2048                     /* repcode detected */
2049                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2050                     size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2051                     int const gain2 = (int)(repLength * 3);
2052                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
2053                     if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2054                         matchLength = repLength, offset = 0, start = ip;
2055             }   }
2056 
2057             /* search match, depth 1 */
2058             {   size_t offset2=99999999;
2059                 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2060                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
2061                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
2062                 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2063                     matchLength = ml2, offset = offset2, start = ip;
2064                     continue;   /* search a better one */
2065             }   }
2066 
2067             /* let's find an even better one */
2068             if ((depth==2) && (ip<ilimit)) {
2069                 ip ++;
2070                 current++;
2071                 /* check repCode */
2072                 if (offset) {
2073                     const U32 repIndex = (U32)(current - offset_1);
2074                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2075                     const BYTE* const repMatch = repBase + repIndex;
2076                     if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
2077                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
2078                         /* repcode detected */
2079                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2080                         size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2081                         int gain2 = (int)(repLength * 4);
2082                         int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
2083                         if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2084                             matchLength = repLength, offset = 0, start = ip;
2085                 }   }
2086 
2087                 /* search match, depth 2 */
2088                 {   size_t offset2=99999999;
2089                     size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2090                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
2091                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
2092                     if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2093                         matchLength = ml2, offset = offset2, start = ip;
2094                         continue;
2095             }   }   }
2096             break;  /* nothing found : store previous solution */
2097         }
2098 
2099         /* catch up */
2100         if (offset) {
2101             U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
2102             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2103             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
2104             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
2105             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2106         }
2107 
2108         /* store sequence */
2109 _storeSequence:
2110         {   size_t const litLength = start - anchor;
2111             ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
2112             anchor = ip = start + matchLength;
2113         }
2114 
2115         /* check immediate repcode */
2116         while (ip <= ilimit) {
2117             const U32 repIndex = (U32)((ip-base) - offset_2);
2118             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2119             const BYTE* const repMatch = repBase + repIndex;
2120             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */
2121             if (MEM_read32(ip) == MEM_read32(repMatch)) {
2122                 /* repcode detected we should take it */
2123                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2124                 matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2125                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
2126                 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2127                 ip += matchLength;
2128                 anchor = ip;
2129                 continue;   /* faster when present ... (?) */
2130             }
2131             break;
2132     }   }
2133 
2134     /* Save reps for next block */
2135     ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
2136 
2137     /* Last Literals */
2138     {   size_t const lastLLSize = iend - anchor;
2139         memcpy(seqStorePtr->lit, anchor, lastLLSize);
2140         seqStorePtr->lit += lastLLSize;
2141     }
2142 }
2143 
2144 
ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx * ctx,const void * src,size_t srcSize)2145 void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2146 {
2147     ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
2148 }
2149 
ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx * ctx,const void * src,size_t srcSize)2150 static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2151 {
2152     ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
2153 }
2154 
ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx * ctx,const void * src,size_t srcSize)2155 static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2156 {
2157     ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
2158 }
2159 
ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx * ctx,const void * src,size_t srcSize)2160 static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2161 {
2162     ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
2163 }
2164 
2165 
2166 /* The optimal parser */
2167 #include "zstd_opt.h"
2168 
ZSTD_compressBlock_btopt(ZSTD_CCtx * ctx,const void * src,size_t srcSize)2169 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2170 {
2171 #ifdef ZSTD_OPT_H_91842398743
2172     ZSTD_compressBlock_opt_generic(ctx, src, srcSize);
2173 #else
2174     (void)ctx; (void)src; (void)srcSize;
2175     return;
2176 #endif
2177 }
2178 
ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx * ctx,const void * src,size_t srcSize)2179 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2180 {
2181 #ifdef ZSTD_OPT_H_91842398743
2182     ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize);
2183 #else
2184     (void)ctx; (void)src; (void)srcSize;
2185     return;
2186 #endif
2187 }
2188 
2189 
2190 typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
2191 
ZSTD_selectBlockCompressor(ZSTD_strategy strat,int extDict)2192 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2193 {
2194     static const ZSTD_blockCompressor blockCompressor[2][7] = {
2195         { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt },
2196         { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict }
2197     };
2198 
2199     return blockCompressor[extDict][(U32)strat];
2200 }
2201 
2202 
ZSTD_compressBlock_internal(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize)2203 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2204 {
2205     ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
2206     const BYTE* const base = zc->base;
2207     const BYTE* const istart = (const BYTE*)src;
2208     const U32 current = (U32)(istart-base);
2209     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0;   /* don't even attempt compression below a certain srcSize */
2210     ZSTD_resetSeqStore(&(zc->seqStore));
2211     if (current > zc->nextToUpdate + 384)
2212         zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));   /* update tree not updated after finding very long rep matches */
2213     blockCompressor(zc, src, srcSize);
2214     return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
2215 }
2216 
2217 
2218 /*! ZSTD_compress_generic() :
2219 *   Compress a chunk of data into one or multiple blocks.
2220 *   All blocks will be terminated, all input will be consumed.
2221 *   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2222 *   Frame is supposed already started (header already produced)
2223 *   @return : compressed size, or an error code
2224 */
ZSTD_compress_generic(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastFrameChunk)2225 static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
2226                                      void* dst, size_t dstCapacity,
2227                                const void* src, size_t srcSize,
2228                                      U32 lastFrameChunk)
2229 {
2230     size_t blockSize = cctx->blockSize;
2231     size_t remaining = srcSize;
2232     const BYTE* ip = (const BYTE*)src;
2233     BYTE* const ostart = (BYTE*)dst;
2234     BYTE* op = ostart;
2235     U32 const maxDist = 1 << cctx->params.cParams.windowLog;
2236 
2237     if (cctx->params.fParams.checksumFlag)
2238         XXH64_update(&cctx->xxhState, src, srcSize);
2239 
2240     while (remaining) {
2241         U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2242         size_t cSize;
2243 
2244         if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall);   /* not enough space to store compressed block */
2245         if (remaining < blockSize) blockSize = remaining;
2246 
2247         /* preemptive overflow correction */
2248         if (cctx->lowLimit > (1<<30)) {
2249             U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt);
2250             U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1;
2251             U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */);
2252             U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog);   /* preserve position % chainSize, ensure current-repcode doesn't underflow */
2253             U32 const correction = cctx->lowLimit - newLowLimit;
2254             ZSTD_reduceIndex(cctx, correction);
2255             cctx->base += correction;
2256             cctx->dictBase += correction;
2257             cctx->lowLimit = newLowLimit;
2258             cctx->dictLimit -= correction;
2259             if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
2260             else cctx->nextToUpdate -= correction;
2261         }
2262 
2263         if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
2264             /* enforce maxDist */
2265             U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
2266             if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
2267             if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
2268         }
2269 
2270         cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
2271         if (ZSTD_isError(cSize)) return cSize;
2272 
2273         if (cSize == 0) {  /* block is not compressible */
2274             U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2275             if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2276             MEM_writeLE32(op, cBlockHeader24);   /* no pb, 4th byte will be overwritten */
2277             memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2278             cSize = ZSTD_blockHeaderSize+blockSize;
2279         } else {
2280             U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2281             MEM_writeLE24(op, cBlockHeader24);
2282             cSize += ZSTD_blockHeaderSize;
2283         }
2284 
2285         remaining -= blockSize;
2286         dstCapacity -= cSize;
2287         ip += blockSize;
2288         op += cSize;
2289     }
2290 
2291     if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2292     return op-ostart;
2293 }
2294 
2295 
ZSTD_writeFrameHeader(void * dst,size_t dstCapacity,ZSTD_parameters params,U64 pledgedSrcSize,U32 dictID)2296 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2297                                     ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
2298 {   BYTE* const op = (BYTE*)dst;
2299     U32   const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
2300     U32   const checksumFlag = params.fParams.checksumFlag>0;
2301     U32   const windowSize = 1U << params.cParams.windowLog;
2302     U32   const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
2303     BYTE  const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2304     U32   const fcsCode = params.fParams.contentSizeFlag ?
2305                      (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) :   /* 0-3 */
2306                       0;
2307     BYTE  const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2308     size_t pos;
2309 
2310     if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2311 
2312     MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2313     op[4] = frameHeaderDecriptionByte; pos=5;
2314     if (!singleSegment) op[pos++] = windowLogByte;
2315     switch(dictIDSizeCode)
2316     {
2317         default:   /* impossible */
2318         case 0 : break;
2319         case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2320         case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2321         case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2322     }
2323     switch(fcsCode)
2324     {
2325         default:   /* impossible */
2326         case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
2327         case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2328         case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2329         case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2330     }
2331     return pos;
2332 }
2333 
2334 
ZSTD_compressContinue_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 frame,U32 lastFrameChunk)2335 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2336                               void* dst, size_t dstCapacity,
2337                         const void* src, size_t srcSize,
2338                                U32 frame, U32 lastFrameChunk)
2339 {
2340     const BYTE* const ip = (const BYTE*) src;
2341     size_t fhSize = 0;
2342 
2343     if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong);   /* missing init (ZSTD_compressBegin) */
2344 
2345     if (frame && (cctx->stage==ZSTDcs_init)) {
2346         fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
2347         if (ZSTD_isError(fhSize)) return fhSize;
2348         dstCapacity -= fhSize;
2349         dst = (char*)dst + fhSize;
2350         cctx->stage = ZSTDcs_ongoing;
2351     }
2352 
2353     /* Check if blocks follow each other */
2354     if (src != cctx->nextSrc) {
2355         /* not contiguous */
2356         ptrdiff_t const delta = cctx->nextSrc - ip;
2357         cctx->lowLimit = cctx->dictLimit;
2358         cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
2359         cctx->dictBase = cctx->base;
2360         cctx->base -= delta;
2361         cctx->nextToUpdate = cctx->dictLimit;
2362         if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit;   /* too small extDict */
2363     }
2364 
2365     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
2366     if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
2367         ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
2368         U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
2369         cctx->lowLimit = lowLimitMax;
2370     }
2371 
2372     cctx->nextSrc = ip + srcSize;
2373 
2374     {   size_t const cSize = frame ?
2375                              ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2376                              ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2377         if (ZSTD_isError(cSize)) return cSize;
2378         return cSize + fhSize;
2379     }
2380 }
2381 
2382 
ZSTD_compressContinue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)2383 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
2384                               void* dst, size_t dstCapacity,
2385                         const void* src, size_t srcSize)
2386 {
2387     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
2388 }
2389 
2390 
ZSTD_getBlockSizeMax(ZSTD_CCtx * cctx)2391 size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx)
2392 {
2393     return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog);
2394 }
2395 
ZSTD_compressBlock(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)2396 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2397 {
2398     size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
2399     if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
2400     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
2401 }
2402 
2403 
ZSTD_loadDictionaryContent(ZSTD_CCtx * zc,const void * src,size_t srcSize)2404 static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2405 {
2406     const BYTE* const ip = (const BYTE*) src;
2407     const BYTE* const iend = ip + srcSize;
2408 
2409     /* input becomes current prefix */
2410     zc->lowLimit = zc->dictLimit;
2411     zc->dictLimit = (U32)(zc->nextSrc - zc->base);
2412     zc->dictBase = zc->base;
2413     zc->base += ip - zc->nextSrc;
2414     zc->nextToUpdate = zc->dictLimit;
2415     zc->loadedDictEnd = (U32)(iend - zc->base);
2416 
2417     zc->nextSrc = iend;
2418     if (srcSize <= HASH_READ_SIZE) return 0;
2419 
2420     switch(zc->params.cParams.strategy)
2421     {
2422     case ZSTD_fast:
2423         ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
2424         break;
2425 
2426     case ZSTD_dfast:
2427         ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
2428         break;
2429 
2430     case ZSTD_greedy:
2431     case ZSTD_lazy:
2432     case ZSTD_lazy2:
2433         ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
2434         break;
2435 
2436     case ZSTD_btlazy2:
2437     case ZSTD_btopt:
2438         ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
2439         break;
2440 
2441     default:
2442         return ERROR(GENERIC);   /* strategy doesn't exist; impossible */
2443     }
2444 
2445     zc->nextToUpdate = zc->loadedDictEnd;
2446     return 0;
2447 }
2448 
2449 
2450 /* Dictionary format :
2451      Magic == ZSTD_DICT_MAGIC (4 bytes)
2452      HUF_writeCTable(256)
2453      FSE_writeNCount(off)
2454      FSE_writeNCount(ml)
2455      FSE_writeNCount(ll)
2456      RepOffsets
2457      Dictionary content
2458 */
2459 /*! ZSTD_loadDictEntropyStats() :
2460     @return : size read from dictionary
2461     note : magic number supposed already checked */
ZSTD_loadDictEntropyStats(ZSTD_CCtx * cctx,const void * dict,size_t dictSize)2462 static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
2463 {
2464     const BYTE* dictPtr = (const BYTE*)dict;
2465     const BYTE* const dictEnd = dictPtr + dictSize;
2466 
2467     {   size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
2468         if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
2469         dictPtr += hufHeaderSize;
2470     }
2471 
2472     {   short offcodeNCount[MaxOff+1];
2473         unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
2474         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2475         if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2476         { size_t const errorCode = FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
2477           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
2478         dictPtr += offcodeHeaderSize;
2479     }
2480 
2481     {   short matchlengthNCount[MaxML+1];
2482         unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
2483         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
2484         if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2485         { size_t const errorCode = FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
2486           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
2487         dictPtr += matchlengthHeaderSize;
2488     }
2489 
2490     {   short litlengthNCount[MaxLL+1];
2491         unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
2492         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
2493         if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2494         { size_t const errorCode = FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
2495           if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
2496         dictPtr += litlengthHeaderSize;
2497     }
2498 
2499     if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2500     cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2501     cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2502     cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2503     dictPtr += 12;
2504 
2505     cctx->flagStaticTables = 1;
2506     return dictPtr - (const BYTE*)dict;
2507 }
2508 
2509 /** ZSTD_compress_insertDictionary() :
2510 *   @return : 0, or an error code */
ZSTD_compress_insertDictionary(ZSTD_CCtx * zc,const void * dict,size_t dictSize)2511 static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
2512 {
2513     if ((dict==NULL) || (dictSize<=8)) return 0;
2514 
2515     /* default : dict is pure content */
2516     if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2517     zc->dictID = zc->params.fParams.noDictIDFlag ? 0 :  MEM_readLE32((const char*)dict+4);
2518 
2519     /* known magic number : dict is parsed for entropy stats and content */
2520     {   size_t const eSize_8 = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8);
2521         size_t const eSize = eSize_8 + 8;
2522         if (ZSTD_isError(eSize_8)) return eSize_8;
2523         return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
2524     }
2525 }
2526 
2527 
2528 /*! ZSTD_compressBegin_internal() :
2529 *   @return : 0, or an error code */
ZSTD_compressBegin_internal(ZSTD_CCtx * zc,const void * dict,size_t dictSize,ZSTD_parameters params,U64 pledgedSrcSize)2530 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc,
2531                              const void* dict, size_t dictSize,
2532                                    ZSTD_parameters params, U64 pledgedSrcSize)
2533 {
2534     size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, pledgedSrcSize, 1);
2535     if (ZSTD_isError(resetError)) return resetError;
2536 
2537     return ZSTD_compress_insertDictionary(zc, dict, dictSize);
2538 }
2539 
2540 
2541 /*! ZSTD_compressBegin_advanced() :
2542 *   @return : 0, or an error code */
ZSTD_compressBegin_advanced(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pledgedSrcSize)2543 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
2544                              const void* dict, size_t dictSize,
2545                                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
2546 {
2547     /* compression parameters verification and optimization */
2548     { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize);
2549       if (ZSTD_isError(errorCode)) return errorCode; }
2550 
2551     return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
2552 }
2553 
2554 
ZSTD_compressBegin_usingDict(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,int compressionLevel)2555 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2556 {
2557     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
2558     return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
2559 }
2560 
2561 
ZSTD_compressBegin(ZSTD_CCtx * zc,int compressionLevel)2562 size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
2563 {
2564     return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
2565 }
2566 
2567 
2568 /*! ZSTD_writeEpilogue() :
2569 *   Ends a frame.
2570 *   @return : nb of bytes written into dst (or an error code) */
ZSTD_writeEpilogue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity)2571 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
2572 {
2573     BYTE* const ostart = (BYTE*)dst;
2574     BYTE* op = ostart;
2575     size_t fhSize = 0;
2576 
2577     if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong);  /* init missing */
2578 
2579     /* special case : empty frame */
2580     if (cctx->stage == ZSTDcs_init) {
2581         fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
2582         if (ZSTD_isError(fhSize)) return fhSize;
2583         dstCapacity -= fhSize;
2584         op += fhSize;
2585         cctx->stage = ZSTDcs_ongoing;
2586     }
2587 
2588     if (cctx->stage != ZSTDcs_ending) {
2589         /* write one last empty block, make it the "last" block */
2590         U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
2591         if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2592         MEM_writeLE32(op, cBlockHeader24);
2593         op += ZSTD_blockHeaderSize;
2594         dstCapacity -= ZSTD_blockHeaderSize;
2595     }
2596 
2597     if (cctx->params.fParams.checksumFlag) {
2598         U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
2599         if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2600         MEM_writeLE32(op, checksum);
2601         op += 4;
2602     }
2603 
2604     cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
2605     return op-ostart;
2606 }
2607 
2608 
ZSTD_compressEnd(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)2609 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
2610                          void* dst, size_t dstCapacity,
2611                    const void* src, size_t srcSize)
2612 {
2613     size_t endResult;
2614     size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
2615     if (ZSTD_isError(cSize)) return cSize;
2616     endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
2617     if (ZSTD_isError(endResult)) return endResult;
2618     return cSize + endResult;
2619 }
2620 
2621 
ZSTD_compress_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,ZSTD_parameters params)2622 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2623                                void* dst, size_t dstCapacity,
2624                          const void* src, size_t srcSize,
2625                          const void* dict,size_t dictSize,
2626                                ZSTD_parameters params)
2627 {
2628     size_t const errorCode = ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize);
2629     if(ZSTD_isError(errorCode)) return errorCode;
2630 
2631     return ZSTD_compressEnd(cctx, dst,  dstCapacity, src, srcSize);
2632 }
2633 
ZSTD_compress_advanced(ZSTD_CCtx * ctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,ZSTD_parameters params)2634 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
2635                                void* dst, size_t dstCapacity,
2636                          const void* src, size_t srcSize,
2637                          const void* dict,size_t dictSize,
2638                                ZSTD_parameters params)
2639 {
2640     size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize);
2641     if (ZSTD_isError(errorCode)) return errorCode;
2642     return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2643 }
2644 
ZSTD_compress_usingDict(ZSTD_CCtx * ctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel)2645 size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
2646 {
2647     ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
2648     params.fParams.contentSizeFlag = 1;
2649     return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2650 }
2651 
ZSTD_compressCCtx(ZSTD_CCtx * ctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)2652 size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2653 {
2654     return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2655 }
2656 
ZSTD_compress(void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)2657 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2658 {
2659     size_t result;
2660     ZSTD_CCtx ctxBody;
2661     memset(&ctxBody, 0, sizeof(ctxBody));
2662     memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem));
2663     result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
2664     ZSTD_free(ctxBody.workSpace, defaultCustomMem);  /* can't free ctxBody itself, as it's on stack; free only heap content */
2665     return result;
2666 }
2667 
2668 
2669 /* =====  Dictionary API  ===== */
2670 
2671 struct ZSTD_CDict_s {
2672     void* dictContent;
2673     size_t dictContentSize;
2674     ZSTD_CCtx* refContext;
2675 };  /* typedef'd tp ZSTD_CDict within "zstd.h" */
2676 
ZSTD_createCDict_advanced(const void * dict,size_t dictSize,ZSTD_parameters params,ZSTD_customMem customMem)2677 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
2678 {
2679     if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2680     if (!customMem.customAlloc || !customMem.customFree) return NULL;
2681 
2682     {   ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2683         void* const dictContent = ZSTD_malloc(dictSize, customMem);
2684         ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2685 
2686         if (!dictContent || !cdict || !cctx) {
2687             ZSTD_free(dictContent, customMem);
2688             ZSTD_free(cdict, customMem);
2689             ZSTD_free(cctx, customMem);
2690             return NULL;
2691         }
2692 
2693         memcpy(dictContent, dict, dictSize);
2694         {   size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
2695             if (ZSTD_isError(errorCode)) {
2696                 ZSTD_free(dictContent, customMem);
2697                 ZSTD_free(cdict, customMem);
2698                 ZSTD_free(cctx, customMem);
2699                 return NULL;
2700         }   }
2701 
2702         cdict->dictContent = dictContent;
2703         cdict->dictContentSize = dictSize;
2704         cdict->refContext = cctx;
2705         return cdict;
2706     }
2707 }
2708 
ZSTD_createCDict(const void * dict,size_t dictSize,int compressionLevel)2709 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
2710 {
2711     ZSTD_customMem const allocator = { NULL, NULL, NULL };
2712     ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2713     params.fParams.contentSizeFlag = 1;
2714     return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
2715 }
2716 
ZSTD_freeCDict(ZSTD_CDict * cdict)2717 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
2718 {
2719     if (cdict==NULL) return 0;   /* support free on NULL */
2720     {   ZSTD_customMem cMem = cdict->refContext->customMem;
2721         ZSTD_freeCCtx(cdict->refContext);
2722         ZSTD_free(cdict->dictContent, cMem);
2723         ZSTD_free(cdict, cMem);
2724         return 0;
2725     }
2726 }
2727 
2728 /*! ZSTD_compress_usingCDict() :
2729 *   Compression using a digested Dictionary.
2730 *   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
2731 *   Note that compression level is decided during dictionary creation */
ZSTD_compress_usingCDict(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict)2732 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
2733                                            void* dst, size_t dstCapacity,
2734                                      const void* src, size_t srcSize,
2735                                      const ZSTD_CDict* cdict)
2736 {
2737     size_t const errorCode = ZSTD_copyCCtx(cctx, cdict->refContext);
2738     if (ZSTD_isError(errorCode)) return errorCode;
2739 
2740     if (cdict->refContext->params.fParams.contentSizeFlag==1) {
2741         cctx->params.fParams.contentSizeFlag = 1;
2742         cctx->frameContentSize = srcSize;
2743     }
2744 
2745     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2746 }
2747 
2748 
2749 
2750 /* ******************************************************************
2751 *  Streaming
2752 ********************************************************************/
2753 
2754 typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
2755 
2756 struct ZSTD_CStream_s {
2757     ZSTD_CCtx* zc;
2758     char*  inBuff;
2759     size_t inBuffSize;
2760     size_t inToCompress;
2761     size_t inBuffPos;
2762     size_t inBuffTarget;
2763     size_t blockSize;
2764     char*  outBuff;
2765     size_t outBuffSize;
2766     size_t outBuffContentSize;
2767     size_t outBuffFlushedSize;
2768     ZSTD_cStreamStage stage;
2769     U32    checksum;
2770     U32    frameEnded;
2771     ZSTD_customMem customMem;
2772 };   /* typedef'd to ZSTD_CStream within "zstd.h" */
2773 
ZSTD_createCStream(void)2774 ZSTD_CStream* ZSTD_createCStream(void)
2775 {
2776     return ZSTD_createCStream_advanced(defaultCustomMem);
2777 }
2778 
ZSTD_createCStream_advanced(ZSTD_customMem customMem)2779 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
2780 {
2781     ZSTD_CStream* zcs;
2782 
2783     if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2784     if (!customMem.customAlloc || !customMem.customFree) return NULL;
2785 
2786     zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
2787     if (zcs==NULL) return NULL;
2788     memset(zcs, 0, sizeof(ZSTD_CStream));
2789     memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
2790     zcs->zc = ZSTD_createCCtx_advanced(customMem);
2791     if (zcs->zc == NULL) { ZSTD_freeCStream(zcs); return NULL; }
2792     return zcs;
2793 }
2794 
ZSTD_freeCStream(ZSTD_CStream * zcs)2795 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
2796 {
2797     if (zcs==NULL) return 0;   /* support free on NULL */
2798     {   ZSTD_customMem const cMem = zcs->customMem;
2799         ZSTD_freeCCtx(zcs->zc);
2800         ZSTD_free(zcs->inBuff, cMem);
2801         ZSTD_free(zcs->outBuff, cMem);
2802         ZSTD_free(zcs, cMem);
2803         return 0;
2804     }
2805 }
2806 
2807 
2808 /*======   Initialization   ======*/
2809 
ZSTD_CStreamInSize(void)2810 size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
ZSTD_CStreamOutSize(void)2811 size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
2812 
ZSTD_initCStream_advanced(ZSTD_CStream * zcs,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pledgedSrcSize)2813 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2814                                  const void* dict, size_t dictSize,
2815                                  ZSTD_parameters params, unsigned long long pledgedSrcSize)
2816 {
2817     /* allocate buffers */
2818     {   size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
2819         if (zcs->inBuffSize < neededInBuffSize) {
2820             zcs->inBuffSize = neededInBuffSize;
2821             ZSTD_free(zcs->inBuff, zcs->customMem);  /* should not be necessary */
2822             zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
2823             if (zcs->inBuff == NULL) return ERROR(memory_allocation);
2824         }
2825         zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
2826     }
2827     if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
2828         zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
2829         ZSTD_free(zcs->outBuff, zcs->customMem);   /* should not be necessary */
2830         zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
2831         if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2832     }
2833 
2834     { size_t const errorCode = ZSTD_compressBegin_advanced(zcs->zc, dict, dictSize, params, pledgedSrcSize);
2835       if (ZSTD_isError(errorCode)) return errorCode; }
2836 
2837     zcs->inToCompress = 0;
2838     zcs->inBuffPos = 0;
2839     zcs->inBuffTarget = zcs->blockSize;
2840     zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
2841     zcs->stage = zcss_load;
2842     zcs->checksum = params.fParams.checksumFlag > 0;
2843     zcs->frameEnded = 0;
2844     return 0;   /* ready to go */
2845 }
2846 
ZSTD_initCStream_usingDict(ZSTD_CStream * zcs,const void * dict,size_t dictSize,int compressionLevel)2847 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
2848 {
2849     ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
2850     return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
2851 }
2852 
ZSTD_initCStream(ZSTD_CStream * zcs,int compressionLevel)2853 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
2854 {
2855     return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
2856 }
2857 
ZSTD_sizeof_CStream(const ZSTD_CStream * zcs)2858 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
2859 {
2860     return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->zc) + zcs->outBuffSize + zcs->inBuffSize;
2861 }
2862 
2863 /*======   Compression   ======*/
2864 
2865 typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
2866 
ZSTD_limitCopy(void * dst,size_t dstCapacity,const void * src,size_t srcSize)2867 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2868 {
2869     size_t const length = MIN(dstCapacity, srcSize);
2870     memcpy(dst, src, length);
2871     return length;
2872 }
2873 
ZSTD_compressStream_generic(ZSTD_CStream * zcs,void * dst,size_t * dstCapacityPtr,const void * src,size_t * srcSizePtr,ZSTD_flush_e const flush)2874 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
2875                               void* dst, size_t* dstCapacityPtr,
2876                         const void* src, size_t* srcSizePtr,
2877                               ZSTD_flush_e const flush)
2878 {
2879     U32 someMoreWork = 1;
2880     const char* const istart = (const char*)src;
2881     const char* const iend = istart + *srcSizePtr;
2882     const char* ip = istart;
2883     char* const ostart = (char*)dst;
2884     char* const oend = ostart + *dstCapacityPtr;
2885     char* op = ostart;
2886 
2887     while (someMoreWork) {
2888         switch(zcs->stage)
2889         {
2890         case zcss_init: return ERROR(init_missing);   /* call ZBUFF_compressInit() first ! */
2891 
2892         case zcss_load:
2893             /* complete inBuffer */
2894             {   size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
2895                 size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip);
2896                 zcs->inBuffPos += loaded;
2897                 ip += loaded;
2898                 if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) {
2899                     someMoreWork = 0; break;  /* not enough input to get a full block : stop there, wait for more */
2900             }   }
2901             /* compress current block (note : this stage cannot be stopped in the middle) */
2902             {   void* cDst;
2903                 size_t cSize;
2904                 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
2905                 size_t oSize = oend-op;
2906                 if (oSize >= ZSTD_compressBound(iSize))
2907                     cDst = op;   /* compress directly into output buffer (avoid flush stage) */
2908                 else
2909                     cDst = zcs->outBuff, oSize = zcs->outBuffSize;
2910                 cSize = (flush == zsf_end) ?
2911                         ZSTD_compressEnd(zcs->zc, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
2912                         ZSTD_compressContinue(zcs->zc, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
2913                 if (ZSTD_isError(cSize)) return cSize;
2914                 if (flush == zsf_end) zcs->frameEnded = 1;
2915                 /* prepare next block */
2916                 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
2917                 if (zcs->inBuffTarget > zcs->inBuffSize)
2918                     zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;   /* note : inBuffSize >= blockSize */
2919                 zcs->inToCompress = zcs->inBuffPos;
2920                 if (cDst == op) { op += cSize; break; }   /* no need to flush */
2921                 zcs->outBuffContentSize = cSize;
2922                 zcs->outBuffFlushedSize = 0;
2923                 zcs->stage = zcss_flush;   /* pass-through to flush stage */
2924             }
2925 
2926         case zcss_flush:
2927             {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
2928                 size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
2929                 op += flushed;
2930                 zcs->outBuffFlushedSize += flushed;
2931                 if (toFlush!=flushed) { someMoreWork = 0; break; }  /* dst too small to store flushed data : stop there */
2932                 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
2933                 zcs->stage = zcss_load;
2934                 break;
2935             }
2936 
2937         case zcss_final:
2938             someMoreWork = 0;   /* do nothing */
2939             break;
2940 
2941         default:
2942             return ERROR(GENERIC);   /* impossible */
2943         }
2944     }
2945 
2946     *srcSizePtr = ip - istart;
2947     *dstCapacityPtr = op - ostart;
2948     if (zcs->frameEnded) return 0;
2949     {   size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
2950         if (hintInSize==0) hintInSize = zcs->blockSize;
2951         return hintInSize;
2952     }
2953 }
2954 
ZSTD_compressStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output,ZSTD_inBuffer * input)2955 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
2956 {
2957     size_t sizeRead = input->size - input->pos;
2958     size_t sizeWritten = output->size - output->pos;
2959     size_t const result = ZSTD_compressStream_generic(zcs,
2960                                                       (char*)(output->dst) + output->pos, &sizeWritten,
2961                                                       (const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
2962     input->pos += sizeRead;
2963     output->pos += sizeWritten;
2964     return result;
2965 }
2966 
2967 
2968 /*======   Finalize   ======*/
2969 
2970 /*! ZSTD_flushStream() :
2971 *   @return : amount of data remaining to flush */
ZSTD_flushStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)2972 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
2973 {
2974     size_t srcSize = 0;
2975     size_t sizeWritten = output->size - output->pos;
2976     size_t const result = ZSTD_compressStream_generic(zcs,
2977                                                       (char*)(output->dst) + output->pos, &sizeWritten,
2978                                                       &srcSize, &srcSize, /* use a valid src address instead of NULL */
2979                                                       zsf_flush);
2980     output->pos += sizeWritten;
2981     if (ZSTD_isError(result)) return result;
2982     return zcs->outBuffContentSize - zcs->outBuffFlushedSize;   /* remaining to flush */
2983 }
2984 
2985 
ZSTD_endStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)2986 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
2987 {
2988     BYTE* const ostart = (BYTE*)(output->dst) + output->pos;
2989     BYTE* const oend = (BYTE*)(output->dst) + output->size;
2990     BYTE* op = ostart;
2991 
2992     if (zcs->stage != zcss_final) {
2993         /* flush whatever remains */
2994         size_t srcSize = 0;
2995         size_t sizeWritten = output->size - output->pos;
2996         size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end);  /* use a valid src address instead of NULL */
2997         size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
2998         op += sizeWritten;
2999         if (remainingToFlush) {
3000             output->pos += sizeWritten;
3001             return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
3002         }
3003         /* create epilogue */
3004         zcs->stage = zcss_final;
3005         zcs->outBuffContentSize = !notEnded ? 0 :
3006             ZSTD_compressEnd(zcs->zc, zcs->outBuff, zcs->outBuffSize, NULL, 0);  /* write epilogue, including final empty block, into outBuff */
3007     }
3008 
3009     /* flush epilogue */
3010     {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3011         size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3012         op += flushed;
3013         zcs->outBuffFlushedSize += flushed;
3014         output->pos += op-ostart;
3015         if (toFlush==flushed) zcs->stage = zcss_init;  /* end reached */
3016         return toFlush - flushed;
3017     }
3018 }
3019 
3020 
3021 
3022 /*-=====  Pre-defined compression levels  =====-*/
3023 
3024 #define ZSTD_DEFAULT_CLEVEL 1
3025 #define ZSTD_MAX_CLEVEL     22
ZSTD_maxCLevel(void)3026 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
3027 
3028 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3029 {   /* "default" */
3030     /* W,  C,  H,  S,  L, TL, strat */
3031     { 18, 12, 12,  1,  7, 16, ZSTD_fast    },  /* level  0 - never used */
3032     { 19, 13, 14,  1,  7, 16, ZSTD_fast    },  /* level  1 */
3033     { 19, 15, 16,  1,  6, 16, ZSTD_fast    },  /* level  2 */
3034     { 20, 16, 17,  1,  5, 16, ZSTD_dfast   },  /* level  3.*/
3035     { 20, 18, 18,  1,  5, 16, ZSTD_dfast   },  /* level  4.*/
3036     { 20, 15, 18,  3,  5, 16, ZSTD_greedy  },  /* level  5 */
3037     { 21, 16, 19,  2,  5, 16, ZSTD_lazy    },  /* level  6 */
3038     { 21, 17, 20,  3,  5, 16, ZSTD_lazy    },  /* level  7 */
3039     { 21, 18, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
3040     { 21, 20, 20,  3,  5, 16, ZSTD_lazy2   },  /* level  9 */
3041     { 21, 19, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
3042     { 22, 20, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
3043     { 22, 20, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
3044     { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 13 */
3045     { 22, 21, 22,  6,  5, 16, ZSTD_lazy2   },  /* level 14 */
3046     { 22, 21, 21,  5,  5, 16, ZSTD_btlazy2 },  /* level 15 */
3047     { 23, 22, 22,  5,  5, 16, ZSTD_btlazy2 },  /* level 16 */
3048     { 23, 21, 22,  4,  5, 24, ZSTD_btopt   },  /* level 17 */
3049     { 23, 23, 22,  6,  5, 32, ZSTD_btopt   },  /* level 18 */
3050     { 23, 23, 22,  6,  3, 48, ZSTD_btopt   },  /* level 19 */
3051     { 25, 25, 23,  7,  3, 64, ZSTD_btopt   },  /* level 20 */
3052     { 26, 26, 23,  7,  3,256, ZSTD_btopt   },  /* level 21 */
3053     { 27, 27, 25,  9,  3,512, ZSTD_btopt   },  /* level 22 */
3054 },
3055 {   /* for srcSize <= 256 KB */
3056     /* W,  C,  H,  S,  L,  T, strat */
3057     {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - not used */
3058     { 18, 13, 14,  1,  6,  8, ZSTD_fast    },  /* level  1 */
3059     { 18, 14, 13,  1,  5,  8, ZSTD_dfast   },  /* level  2 */
3060     { 18, 16, 15,  1,  5,  8, ZSTD_dfast   },  /* level  3 */
3061     { 18, 15, 17,  1,  5,  8, ZSTD_greedy  },  /* level  4.*/
3062     { 18, 16, 17,  4,  5,  8, ZSTD_greedy  },  /* level  5.*/
3063     { 18, 16, 17,  3,  5,  8, ZSTD_lazy    },  /* level  6.*/
3064     { 18, 17, 17,  4,  4,  8, ZSTD_lazy    },  /* level  7 */
3065     { 18, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
3066     { 18, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
3067     { 18, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
3068     { 18, 18, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 11.*/
3069     { 18, 18, 17,  7,  4,  8, ZSTD_lazy2   },  /* level 12.*/
3070     { 18, 19, 17,  6,  4,  8, ZSTD_btlazy2 },  /* level 13 */
3071     { 18, 18, 18,  4,  4, 16, ZSTD_btopt   },  /* level 14.*/
3072     { 18, 18, 18,  4,  3, 16, ZSTD_btopt   },  /* level 15.*/
3073     { 18, 19, 18,  6,  3, 32, ZSTD_btopt   },  /* level 16.*/
3074     { 18, 19, 18,  8,  3, 64, ZSTD_btopt   },  /* level 17.*/
3075     { 18, 19, 18,  9,  3,128, ZSTD_btopt   },  /* level 18.*/
3076     { 18, 19, 18, 10,  3,256, ZSTD_btopt   },  /* level 19.*/
3077     { 18, 19, 18, 11,  3,512, ZSTD_btopt   },  /* level 20.*/
3078     { 18, 19, 18, 12,  3,512, ZSTD_btopt   },  /* level 21.*/
3079     { 18, 19, 18, 13,  3,512, ZSTD_btopt   },  /* level 22.*/
3080 },
3081 {   /* for srcSize <= 128 KB */
3082     /* W,  C,  H,  S,  L,  T, strat */
3083     { 17, 12, 12,  1,  7,  8, ZSTD_fast    },  /* level  0 - not used */
3084     { 17, 12, 13,  1,  6,  8, ZSTD_fast    },  /* level  1 */
3085     { 17, 13, 16,  1,  5,  8, ZSTD_fast    },  /* level  2 */
3086     { 17, 16, 16,  2,  5,  8, ZSTD_dfast   },  /* level  3 */
3087     { 17, 13, 15,  3,  4,  8, ZSTD_greedy  },  /* level  4 */
3088     { 17, 15, 17,  4,  4,  8, ZSTD_greedy  },  /* level  5 */
3089     { 17, 16, 17,  3,  4,  8, ZSTD_lazy    },  /* level  6 */
3090     { 17, 15, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  7 */
3091     { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
3092     { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
3093     { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
3094     { 17, 17, 17,  7,  4,  8, ZSTD_lazy2   },  /* level 11 */
3095     { 17, 17, 17,  8,  4,  8, ZSTD_lazy2   },  /* level 12 */
3096     { 17, 18, 17,  6,  4,  8, ZSTD_btlazy2 },  /* level 13.*/
3097     { 17, 17, 17,  7,  3,  8, ZSTD_btopt   },  /* level 14.*/
3098     { 17, 17, 17,  7,  3, 16, ZSTD_btopt   },  /* level 15.*/
3099     { 17, 18, 17,  7,  3, 32, ZSTD_btopt   },  /* level 16.*/
3100     { 17, 18, 17,  7,  3, 64, ZSTD_btopt   },  /* level 17.*/
3101     { 17, 18, 17,  7,  3,256, ZSTD_btopt   },  /* level 18.*/
3102     { 17, 18, 17,  8,  3,256, ZSTD_btopt   },  /* level 19.*/
3103     { 17, 18, 17,  9,  3,256, ZSTD_btopt   },  /* level 20.*/
3104     { 17, 18, 17, 10,  3,256, ZSTD_btopt   },  /* level 21.*/
3105     { 17, 18, 17, 11,  3,512, ZSTD_btopt   },  /* level 22.*/
3106 },
3107 {   /* for srcSize <= 16 KB */
3108     /* W,  C,  H,  S,  L,  T, strat */
3109     { 14, 12, 12,  1,  7,  6, ZSTD_fast    },  /* level  0 - not used */
3110     { 14, 14, 14,  1,  6,  6, ZSTD_fast    },  /* level  1 */
3111     { 14, 14, 14,  1,  4,  6, ZSTD_fast    },  /* level  2 */
3112     { 14, 14, 14,  1,  4,  6, ZSTD_dfast   },  /* level  3.*/
3113     { 14, 14, 14,  4,  4,  6, ZSTD_greedy  },  /* level  4.*/
3114     { 14, 14, 14,  3,  4,  6, ZSTD_lazy    },  /* level  5.*/
3115     { 14, 14, 14,  4,  4,  6, ZSTD_lazy2   },  /* level  6 */
3116     { 14, 14, 14,  5,  4,  6, ZSTD_lazy2   },  /* level  7 */
3117     { 14, 14, 14,  6,  4,  6, ZSTD_lazy2   },  /* level  8.*/
3118     { 14, 15, 14,  6,  4,  6, ZSTD_btlazy2 },  /* level  9.*/
3119     { 14, 15, 14,  3,  3,  6, ZSTD_btopt   },  /* level 10.*/
3120     { 14, 15, 14,  6,  3,  8, ZSTD_btopt   },  /* level 11.*/
3121     { 14, 15, 14,  6,  3, 16, ZSTD_btopt   },  /* level 12.*/
3122     { 14, 15, 14,  6,  3, 24, ZSTD_btopt   },  /* level 13.*/
3123     { 14, 15, 15,  6,  3, 48, ZSTD_btopt   },  /* level 14.*/
3124     { 14, 15, 15,  6,  3, 64, ZSTD_btopt   },  /* level 15.*/
3125     { 14, 15, 15,  6,  3, 96, ZSTD_btopt   },  /* level 16.*/
3126     { 14, 15, 15,  6,  3,128, ZSTD_btopt   },  /* level 17.*/
3127     { 14, 15, 15,  6,  3,256, ZSTD_btopt   },  /* level 18.*/
3128     { 14, 15, 15,  7,  3,256, ZSTD_btopt   },  /* level 19.*/
3129     { 14, 15, 15,  8,  3,256, ZSTD_btopt   },  /* level 20.*/
3130     { 14, 15, 15,  9,  3,256, ZSTD_btopt   },  /* level 21.*/
3131     { 14, 15, 15, 10,  3,256, ZSTD_btopt   },  /* level 22.*/
3132 },
3133 };
3134 
3135 /*! ZSTD_getCParams() :
3136 *   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
3137 *   Size values are optional, provide 0 if not known or unused */
ZSTD_getCParams(int compressionLevel,unsigned long long srcSize,size_t dictSize)3138 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
3139 {
3140     ZSTD_compressionParameters cp;
3141     size_t const addedSize = srcSize ? 0 : 500;
3142     U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
3143     U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
3144     if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL;   /* 0 == default; no negative compressionLevel yet */
3145     if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
3146     cp = ZSTD_defaultCParameters[tableID][compressionLevel];
3147     if (MEM_32bits()) {   /* auto-correction, for 32-bits mode */
3148         if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
3149         if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
3150         if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
3151     }
3152     cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
3153     return cp;
3154 }
3155 
3156 /*! ZSTD_getParams() :
3157 *   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
3158 *   All fields of `ZSTD_frameParameters` are set to default (0) */
ZSTD_getParams(int compressionLevel,unsigned long long srcSize,size_t dictSize)3159 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
3160     ZSTD_parameters params;
3161     ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
3162     memset(&params, 0, sizeof(params));
3163     params.cParams = cParams;
3164     return params;
3165 }
3166