1 /*
2  * Copyright (c) Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* This header contains definitions
12  * that shall **only** be used by modules within lib/compress.
13  */
14 
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17 
18 /*-*************************************
19 *  Dependencies
20 ***************************************/
21 #include "../common/zstd_internal.h"
22 #include "zstd_cwksp.h"
23 #ifdef ZSTD_MULTITHREAD
24 #  include "zstdmt_compress.h"
25 #endif
26 
27 #if defined (__cplusplus)
28 extern "C" {
29 #endif
30 
31 /*-*************************************
32 *  Constants
33 ***************************************/
34 #define kSearchStrength      8
35 #define HASH_READ_SIZE       8
36 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
37                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38                                        It's not a big deal though : candidate will just be sorted again.
39                                        Additionally, candidate position 1 will be lost.
40                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
42                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
43 
44 
45 /*-*************************************
46 *  Context memory management
47 ***************************************/
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
50 
51 typedef struct ZSTD_prefixDict_s {
52     const void* dict;
53     size_t dictSize;
54     ZSTD_dictContentType_e dictContentType;
55 } ZSTD_prefixDict;
56 
57 typedef struct {
58     void* dictBuffer;
59     void const* dict;
60     size_t dictSize;
61     ZSTD_dictContentType_e dictContentType;
62     ZSTD_CDict* cdict;
63 } ZSTD_localDict;
64 
65 typedef struct {
66     HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
67     HUF_repeat repeatMode;
68 } ZSTD_hufCTables_t;
69 
70 typedef struct {
71     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
72     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
73     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
74     FSE_repeat offcode_repeatMode;
75     FSE_repeat matchlength_repeatMode;
76     FSE_repeat litlength_repeatMode;
77 } ZSTD_fseCTables_t;
78 
79 typedef struct {
80     ZSTD_hufCTables_t huf;
81     ZSTD_fseCTables_t fse;
82 } ZSTD_entropyCTables_t;
83 
84 /***********************************************
85 *  Entropy buffer statistics structs and funcs *
86 ***********************************************/
87 /** ZSTD_hufCTablesMetadata_t :
88  *  Stores Literals Block Type for a super-block in hType, and
89  *  huffman tree description in hufDesBuffer.
90  *  hufDesSize refers to the size of huffman tree description in bytes.
91  *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
92 typedef struct {
93     symbolEncodingType_e hType;
94     BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
95     size_t hufDesSize;
96 } ZSTD_hufCTablesMetadata_t;
97 
98 /** ZSTD_fseCTablesMetadata_t :
99  *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
100  *  fse tables in fseTablesBuffer.
101  *  fseTablesSize refers to the size of fse tables in bytes.
102  *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
103 typedef struct {
104     symbolEncodingType_e llType;
105     symbolEncodingType_e ofType;
106     symbolEncodingType_e mlType;
107     BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
108     size_t fseTablesSize;
109     size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
110 } ZSTD_fseCTablesMetadata_t;
111 
112 typedef struct {
113     ZSTD_hufCTablesMetadata_t hufMetadata;
114     ZSTD_fseCTablesMetadata_t fseMetadata;
115 } ZSTD_entropyCTablesMetadata_t;
116 
117 /** ZSTD_buildBlockEntropyStats() :
118  *  Builds entropy for the block.
119  *  @return : 0 on success or error code */
120 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
121                              const ZSTD_entropyCTables_t* prevEntropy,
122                                    ZSTD_entropyCTables_t* nextEntropy,
123                              const ZSTD_CCtx_params* cctxParams,
124                                    ZSTD_entropyCTablesMetadata_t* entropyMetadata,
125                                    void* workspace, size_t wkspSize);
126 
127 /*********************************
128 *  Compression internals structs *
129 *********************************/
130 
131 typedef struct {
132     U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
133     U32 len;            /* Raw length of match */
134 } ZSTD_match_t;
135 
136 typedef struct {
137     U32 offset;         /* Offset of sequence */
138     U32 litLength;      /* Length of literals prior to match */
139     U32 matchLength;    /* Raw length of match */
140 } rawSeq;
141 
142 typedef struct {
143   rawSeq* seq;          /* The start of the sequences */
144   size_t pos;           /* The index in seq where reading stopped. pos <= size. */
145   size_t posInSequence; /* The position within the sequence at seq[pos] where reading
146                            stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
147   size_t size;          /* The number of sequences. <= capacity. */
148   size_t capacity;      /* The capacity starting from `seq` pointer */
149 } rawSeqStore_t;
150 
151 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
152 
153 typedef struct {
154     int price;
155     U32 off;
156     U32 mlen;
157     U32 litlen;
158     U32 rep[ZSTD_REP_NUM];
159 } ZSTD_optimal_t;
160 
161 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
162 
163 typedef struct {
164     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
165     unsigned* litFreq;           /* table of literals statistics, of size 256 */
166     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
167     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
168     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
169     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
170     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
171 
172     U32  litSum;                 /* nb of literals */
173     U32  litLengthSum;           /* nb of litLength codes */
174     U32  matchLengthSum;         /* nb of matchLength codes */
175     U32  offCodeSum;             /* nb of offset codes */
176     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
177     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
178     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
179     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
180     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
181     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
182     ZSTD_literalCompressionMode_e literalCompressionMode;
183 } optState_t;
184 
185 typedef struct {
186   ZSTD_entropyCTables_t entropy;
187   U32 rep[ZSTD_REP_NUM];
188 } ZSTD_compressedBlockState_t;
189 
190 typedef struct {
191     BYTE const* nextSrc;       /* next block here to continue on current prefix */
192     BYTE const* base;          /* All regular indexes relative to this position */
193     BYTE const* dictBase;      /* extDict indexes relative to this position */
194     U32 dictLimit;             /* below that point, need extDict */
195     U32 lowLimit;              /* below that point, no more valid data */
196     U32 nbOverflowCorrections; /* Number of times overflow correction has run since
197                                 * ZSTD_window_init(). Useful for debugging coredumps
198                                 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
199                                 */
200 } ZSTD_window_t;
201 
202 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
203 
204 #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
205 
206 struct ZSTD_matchState_t {
207     ZSTD_window_t window;   /* State for window round buffer management */
208     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
209                              * When loadedDictEnd != 0, a dictionary is in use, and still valid.
210                              * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
211                              * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
212                              * When dict referential is copied into active context (i.e. not attached),
213                              * loadedDictEnd == dictSize, since referential starts from zero.
214                              */
215     U32 nextToUpdate;       /* index from which to continue table update */
216     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
217 
218     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
219     U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
220     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
221 
222     U32* hashTable;
223     U32* hashTable3;
224     U32* chainTable;
225 
226     U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
227 
228     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
229                                * dedicated dictionary search structure.
230                                */
231     optState_t opt;         /* optimal parser state */
232     const ZSTD_matchState_t* dictMatchState;
233     ZSTD_compressionParameters cParams;
234     const rawSeqStore_t* ldmSeqStore;
235 };
236 
237 typedef struct {
238     ZSTD_compressedBlockState_t* prevCBlock;
239     ZSTD_compressedBlockState_t* nextCBlock;
240     ZSTD_matchState_t matchState;
241 } ZSTD_blockState_t;
242 
243 typedef struct {
244     U32 offset;
245     U32 checksum;
246 } ldmEntry_t;
247 
248 typedef struct {
249     BYTE const* split;
250     U32 hash;
251     U32 checksum;
252     ldmEntry_t* bucket;
253 } ldmMatchCandidate_t;
254 
255 #define LDM_BATCH_SIZE 64
256 
257 typedef struct {
258     ZSTD_window_t window;   /* State for the window round buffer management */
259     ldmEntry_t* hashTable;
260     U32 loadedDictEnd;
261     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
262     size_t splitIndices[LDM_BATCH_SIZE];
263     ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
264 } ldmState_t;
265 
266 typedef struct {
267     U32 enableLdm;          /* 1 if enable long distance matching */
268     U32 hashLog;            /* Log size of hashTable */
269     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
270     U32 minMatchLength;     /* Minimum match length */
271     U32 hashRateLog;       /* Log number of entries to skip */
272     U32 windowLog;          /* Window log for the LDM */
273 } ldmParams_t;
274 
275 typedef struct {
276     int collectSequences;
277     ZSTD_Sequence* seqStart;
278     size_t seqIndex;
279     size_t maxSequences;
280 } SeqCollector;
281 
282 struct ZSTD_CCtx_params_s {
283     ZSTD_format_e format;
284     ZSTD_compressionParameters cParams;
285     ZSTD_frameParameters fParams;
286 
287     int compressionLevel;
288     int forceWindow;           /* force back-references to respect limit of
289                                 * 1<<wLog, even for dictionary */
290     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
291                                 * No target when targetCBlockSize == 0.
292                                 * There is no guarantee on compressed block size */
293     int srcSizeHint;           /* User's best guess of source size.
294                                 * Hint is not valid when srcSizeHint == 0.
295                                 * There is no guarantee that hint is close to actual source size */
296 
297     ZSTD_dictAttachPref_e attachDictPref;
298     ZSTD_literalCompressionMode_e literalCompressionMode;
299 
300     /* Multithreading: used to pass parameters to mtctx */
301     int nbWorkers;
302     size_t jobSize;
303     int overlapLog;
304     int rsyncable;
305 
306     /* Long distance matching parameters */
307     ldmParams_t ldmParams;
308 
309     /* Dedicated dict search algorithm trigger */
310     int enableDedicatedDictSearch;
311 
312     /* Input/output buffer modes */
313     ZSTD_bufferMode_e inBufferMode;
314     ZSTD_bufferMode_e outBufferMode;
315 
316     /* Sequence compression API */
317     ZSTD_sequenceFormat_e blockDelimiters;
318     int validateSequences;
319 
320     /* Block splitting */
321     int splitBlocks;
322 
323     /* Param for deciding whether to use row-based matchfinder */
324     ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
325 
326     /* Always load a dictionary in ext-dict mode (not prefix mode)? */
327     int deterministicRefPrefix;
328 
329     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
330     ZSTD_customMem customMem;
331 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
332 
333 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
334 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
335 
336 /**
337  * Indicates whether this compression proceeds directly from user-provided
338  * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
339  * whether the context needs to buffer the input/output (ZSTDb_buffered).
340  */
341 typedef enum {
342     ZSTDb_not_buffered,
343     ZSTDb_buffered
344 } ZSTD_buffered_policy_e;
345 
346 struct ZSTD_CCtx_s {
347     ZSTD_compressionStage_e stage;
348     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
349     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
350     ZSTD_CCtx_params requestedParams;
351     ZSTD_CCtx_params appliedParams;
352     ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
353     U32   dictID;
354     size_t dictContentSize;
355 
356     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
357     size_t blockSize;
358     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
359     unsigned long long consumedSrcSize;
360     unsigned long long producedCSize;
361     XXH64_state_t xxhState;
362     ZSTD_customMem customMem;
363     ZSTD_threadPool* pool;
364     size_t staticSize;
365     SeqCollector seqCollector;
366     int isFirstBlock;
367     int initialized;
368 
369     seqStore_t seqStore;      /* sequences storage ptrs */
370     ldmState_t ldmState;      /* long distance matching state */
371     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
372     size_t maxNbLdmSequences;
373     rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
374     ZSTD_blockState_t blockState;
375     U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
376 
377     /* Wether we are streaming or not */
378     ZSTD_buffered_policy_e bufferedPolicy;
379 
380     /* streaming */
381     char*  inBuff;
382     size_t inBuffSize;
383     size_t inToCompress;
384     size_t inBuffPos;
385     size_t inBuffTarget;
386     char*  outBuff;
387     size_t outBuffSize;
388     size_t outBuffContentSize;
389     size_t outBuffFlushedSize;
390     ZSTD_cStreamStage streamStage;
391     U32    frameEnded;
392 
393     /* Stable in/out buffer verification */
394     ZSTD_inBuffer expectedInBuffer;
395     size_t expectedOutBufferSize;
396 
397     /* Dictionary */
398     ZSTD_localDict localDict;
399     const ZSTD_CDict* cdict;
400     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
401 
402     /* Multi-threading */
403 #ifdef ZSTD_MULTITHREAD
404     ZSTDMT_CCtx* mtctx;
405 #endif
406 
407     /* Tracing */
408 #if ZSTD_TRACE
409     ZSTD_TraceCtx traceCtx;
410 #endif
411 };
412 
413 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
414 
415 typedef enum {
416     ZSTD_noDict = 0,
417     ZSTD_extDict = 1,
418     ZSTD_dictMatchState = 2,
419     ZSTD_dedicatedDictSearch = 3
420 } ZSTD_dictMode_e;
421 
422 typedef enum {
423     ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
424                                  * In this mode we use both the srcSize and the dictSize
425                                  * when selecting and adjusting parameters.
426                                  */
427     ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
428                                  * In this mode we only take the srcSize into account when selecting
429                                  * and adjusting parameters.
430                                  */
431     ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
432                                  * In this mode we take both the source size and the dictionary size
433                                  * into account when selecting and adjusting the parameters.
434                                  */
435     ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
436                                  * We don't know what these parameters are for. We default to the legacy
437                                  * behavior of taking both the source size and the dict size into account
438                                  * when selecting and adjusting parameters.
439                                  */
440 } ZSTD_cParamMode_e;
441 
442 typedef size_t (*ZSTD_blockCompressor) (
443         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
444         void const* src, size_t srcSize);
445 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
446 
447 
ZSTD_LLcode(U32 litLength)448 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
449 {
450     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
451                                        8,  9, 10, 11, 12, 13, 14, 15,
452                                       16, 16, 17, 17, 18, 18, 19, 19,
453                                       20, 20, 20, 20, 21, 21, 21, 21,
454                                       22, 22, 22, 22, 22, 22, 22, 22,
455                                       23, 23, 23, 23, 23, 23, 23, 23,
456                                       24, 24, 24, 24, 24, 24, 24, 24,
457                                       24, 24, 24, 24, 24, 24, 24, 24 };
458     static const U32 LL_deltaCode = 19;
459     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
460 }
461 
462 /* ZSTD_MLcode() :
463  * note : mlBase = matchLength - MINMATCH;
464  *        because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)465 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
466 {
467     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
468                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
469                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
470                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
471                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
472                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
473                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
474                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
475     static const U32 ML_deltaCode = 36;
476     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
477 }
478 
479 typedef struct repcodes_s {
480     U32 rep[3];
481 } repcodes_t;
482 
ZSTD_updateRep(U32 const rep[3],U32 const offset,U32 const ll0)483 MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
484 {
485     repcodes_t newReps;
486     if (offset >= ZSTD_REP_NUM) {  /* full offset */
487         newReps.rep[2] = rep[1];
488         newReps.rep[1] = rep[0];
489         newReps.rep[0] = offset - ZSTD_REP_MOVE;
490     } else {   /* repcode */
491         U32 const repCode = offset + ll0;
492         if (repCode > 0) {  /* note : if repCode==0, no change */
493             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
494             newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
495             newReps.rep[1] = rep[0];
496             newReps.rep[0] = currentOffset;
497         } else {   /* repCode == 0 */
498             ZSTD_memcpy(&newReps, rep, sizeof(newReps));
499         }
500     }
501     return newReps;
502 }
503 
504 /* ZSTD_cParam_withinBounds:
505  * @return 1 if value is within cParam bounds,
506  * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)507 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
508 {
509     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
510     if (ZSTD_isError(bounds.error)) return 0;
511     if (value < bounds.lowerBound) return 0;
512     if (value > bounds.upperBound) return 0;
513     return 1;
514 }
515 
516 /* ZSTD_noCompressBlock() :
517  * Writes uncompressed block to dst buffer from given src.
518  * Returns the size of the block */
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)519 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
520 {
521     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
522     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
523                     dstSize_tooSmall, "dst buf too small for uncompressed block");
524     MEM_writeLE24(dst, cBlockHeader24);
525     ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
526     return ZSTD_blockHeaderSize + srcSize;
527 }
528 
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)529 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
530 {
531     BYTE* const op = (BYTE*)dst;
532     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
533     RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
534     MEM_writeLE24(op, cBlockHeader);
535     op[3] = src;
536     return 4;
537 }
538 
539 
540 /* ZSTD_minGain() :
541  * minimum compression required
542  * to generate a compress block or a compressed literals section.
543  * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)544 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
545 {
546     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
547     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
548     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
549     return (srcSize >> minlog) + 2;
550 }
551 
ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params * cctxParams)552 MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
553 {
554     switch (cctxParams->literalCompressionMode) {
555     case ZSTD_lcm_huffman:
556         return 0;
557     case ZSTD_lcm_uncompressed:
558         return 1;
559     default:
560         assert(0 /* impossible: pre-validated */);
561         /* fall-through */
562     case ZSTD_lcm_auto:
563         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
564     }
565 }
566 
567 /*! ZSTD_safecopyLiterals() :
568  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
569  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
570  *  large copies.
571  */
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)572 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
573     assert(iend > ilimit_w);
574     if (ip <= ilimit_w) {
575         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
576         op += ilimit_w - ip;
577         ip = ilimit_w;
578     }
579     while (ip < iend) *op++ = *ip++;
580 }
581 
582 /*! ZSTD_storeSeq() :
583  *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
584  *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
585  *  `mlBase` : matchLength - MINMATCH
586  *  Allowed to overread literals up to litLimit.
587 */
588 HINT_INLINE UNUSED_ATTR
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offCode,size_t mlBase)589 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
590 {
591     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
592     BYTE const* const litEnd = literals + litLength;
593 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
594     static const BYTE* g_start = NULL;
595     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
596     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
597         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
598                pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
599     }
600 #endif
601     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
602     /* copy Literals */
603     assert(seqStorePtr->maxNbLit <= 128 KB);
604     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
605     assert(literals + litLength <= litLimit);
606     if (litEnd <= litLimit_w) {
607         /* Common case we can use wildcopy.
608 	 * First copy 16 bytes, because literals are likely short.
609 	 */
610         assert(WILDCOPY_OVERLENGTH >= 16);
611         ZSTD_copy16(seqStorePtr->lit, literals);
612         if (litLength > 16) {
613             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
614         }
615     } else {
616         ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
617     }
618     seqStorePtr->lit += litLength;
619 
620     /* literal Length */
621     if (litLength>0xFFFF) {
622         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
623         seqStorePtr->longLengthType = ZSTD_llt_literalLength;
624         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
625     }
626     seqStorePtr->sequences[0].litLength = (U16)litLength;
627 
628     /* match offset */
629     seqStorePtr->sequences[0].offset = offCode + 1;
630 
631     /* match Length */
632     if (mlBase>0xFFFF) {
633         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
634         seqStorePtr->longLengthType = ZSTD_llt_matchLength;
635         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
636     }
637     seqStorePtr->sequences[0].matchLength = (U16)mlBase;
638 
639     seqStorePtr->sequences++;
640 }
641 
642 
643 /*-*************************************
644 *  Match length counter
645 ***************************************/
ZSTD_NbCommonBytes(size_t val)646 static unsigned ZSTD_NbCommonBytes (size_t val)
647 {
648     if (MEM_isLittleEndian()) {
649         if (MEM_64bits()) {
650 #       if defined(_MSC_VER) && defined(_WIN64)
651 #           if STATIC_BMI2
652                 return _tzcnt_u64(val) >> 3;
653 #           else
654                 unsigned long r = 0;
655                 return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
656 #           endif
657 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
658             return (__builtin_ctzll((U64)val) >> 3);
659 #       else
660             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
661                                                      0, 3, 1, 3, 1, 4, 2, 7,
662                                                      0, 2, 3, 6, 1, 5, 3, 5,
663                                                      1, 3, 4, 4, 2, 5, 6, 7,
664                                                      7, 0, 1, 2, 3, 3, 4, 6,
665                                                      2, 6, 5, 5, 3, 4, 5, 6,
666                                                      7, 1, 2, 4, 6, 4, 4, 5,
667                                                      7, 2, 6, 5, 7, 6, 7, 7 };
668             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
669 #       endif
670         } else { /* 32 bits */
671 #       if defined(_MSC_VER)
672             unsigned long r=0;
673             return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
674 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
675             return (__builtin_ctz((U32)val) >> 3);
676 #       else
677             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
678                                                      3, 2, 2, 1, 3, 2, 0, 1,
679                                                      3, 3, 1, 2, 2, 2, 2, 0,
680                                                      3, 1, 2, 0, 1, 0, 1, 1 };
681             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
682 #       endif
683         }
684     } else {  /* Big Endian CPU */
685         if (MEM_64bits()) {
686 #       if defined(_MSC_VER) && defined(_WIN64)
687 #           if STATIC_BMI2
688 			    return _lzcnt_u64(val) >> 3;
689 #           else
690 			    unsigned long r = 0;
691 			    return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
692 #           endif
693 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
694             return (__builtin_clzll(val) >> 3);
695 #       else
696             unsigned r;
697             const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
698             if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
699             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
700             r += (!val);
701             return r;
702 #       endif
703         } else { /* 32 bits */
704 #       if defined(_MSC_VER)
705             unsigned long r = 0;
706             return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
707 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
708             return (__builtin_clz((U32)val) >> 3);
709 #       else
710             unsigned r;
711             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
712             r += (!val);
713             return r;
714 #       endif
715     }   }
716 }
717 
718 
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)719 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
720 {
721     const BYTE* const pStart = pIn;
722     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
723 
724     if (pIn < pInLoopLimit) {
725         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
726           if (diff) return ZSTD_NbCommonBytes(diff); }
727         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
728         while (pIn < pInLoopLimit) {
729             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
730             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
731             pIn += ZSTD_NbCommonBytes(diff);
732             return (size_t)(pIn - pStart);
733     }   }
734     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
735     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
736     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
737     return (size_t)(pIn - pStart);
738 }
739 
740 /** ZSTD_count_2segments() :
741  *  can count match length with `ip` & `match` in 2 different segments.
742  *  convention : on reaching mEnd, match count continue starting from iStart
743  */
744 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)745 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
746                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
747 {
748     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
749     size_t const matchLength = ZSTD_count(ip, match, vEnd);
750     if (match + matchLength != mEnd) return matchLength;
751     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
752     DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
753     DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
754     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
755     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
756     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
757 }
758 
759 
760 /*-*************************************
761  *  Hashes
762  ***************************************/
763 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)764 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)765 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
766 
767 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)768 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)769 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
770 
771 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)772 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)773 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
774 
775 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)776 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)777 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
778 
779 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)780 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)781 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
782 
783 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)784 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)785 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
786 
787 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)788 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
789 {
790     switch(mls)
791     {
792     default:
793     case 4: return ZSTD_hash4Ptr(p, hBits);
794     case 5: return ZSTD_hash5Ptr(p, hBits);
795     case 6: return ZSTD_hash6Ptr(p, hBits);
796     case 7: return ZSTD_hash7Ptr(p, hBits);
797     case 8: return ZSTD_hash8Ptr(p, hBits);
798     }
799 }
800 
801 /** ZSTD_ipow() :
802  * Return base^exponent.
803  */
ZSTD_ipow(U64 base,U64 exponent)804 static U64 ZSTD_ipow(U64 base, U64 exponent)
805 {
806     U64 power = 1;
807     while (exponent) {
808       if (exponent & 1) power *= base;
809       exponent >>= 1;
810       base *= base;
811     }
812     return power;
813 }
814 
815 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
816 
817 /** ZSTD_rollingHash_append() :
818  * Add the buffer to the hash value.
819  */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)820 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
821 {
822     BYTE const* istart = (BYTE const*)buf;
823     size_t pos;
824     for (pos = 0; pos < size; ++pos) {
825         hash *= prime8bytes;
826         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
827     }
828     return hash;
829 }
830 
831 /** ZSTD_rollingHash_compute() :
832  * Compute the rolling hash value of the buffer.
833  */
ZSTD_rollingHash_compute(void const * buf,size_t size)834 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
835 {
836     return ZSTD_rollingHash_append(0, buf, size);
837 }
838 
839 /** ZSTD_rollingHash_primePower() :
840  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
841  * over a window of length bytes.
842  */
ZSTD_rollingHash_primePower(U32 length)843 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
844 {
845     return ZSTD_ipow(prime8bytes, length - 1);
846 }
847 
848 /** ZSTD_rollingHash_rotate() :
849  * Rotate the rolling hash by one byte.
850  */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)851 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
852 {
853     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
854     hash *= prime8bytes;
855     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
856     return hash;
857 }
858 
859 /*-*************************************
860 *  Round buffer management
861 ***************************************/
862 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
863 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
864 #endif
865 /* Max current allowed */
866 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
867 /* Maximum chunk size before overflow correction needs to be called again */
868 #define ZSTD_CHUNKSIZE_MAX                                                     \
869     ( ((U32)-1)                  /* Maximum ending current index */            \
870     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
871 
872 /**
873  * ZSTD_window_clear():
874  * Clears the window containing the history by simply setting it to empty.
875  */
ZSTD_window_clear(ZSTD_window_t * window)876 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
877 {
878     size_t const endT = (size_t)(window->nextSrc - window->base);
879     U32 const end = (U32)endT;
880 
881     window->lowLimit = end;
882     window->dictLimit = end;
883 }
884 
ZSTD_window_isEmpty(ZSTD_window_t const window)885 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
886 {
887     return window.dictLimit == 1 &&
888            window.lowLimit == 1 &&
889            (window.nextSrc - window.base) == 1;
890 }
891 
892 /**
893  * ZSTD_window_hasExtDict():
894  * Returns non-zero if the window has a non-empty extDict.
895  */
ZSTD_window_hasExtDict(ZSTD_window_t const window)896 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
897 {
898     return window.lowLimit < window.dictLimit;
899 }
900 
901 /**
902  * ZSTD_matchState_dictMode():
903  * Inspects the provided matchState and figures out what dictMode should be
904  * passed to the compressor.
905  */
ZSTD_matchState_dictMode(const ZSTD_matchState_t * ms)906 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
907 {
908     return ZSTD_window_hasExtDict(ms->window) ?
909         ZSTD_extDict :
910         ms->dictMatchState != NULL ?
911             (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
912             ZSTD_noDict;
913 }
914 
915 /* Defining this macro to non-zero tells zstd to run the overflow correction
916  * code much more frequently. This is very inefficient, and should only be
917  * used for tests and fuzzers.
918  */
919 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
920 #  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
921 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
922 #  else
923 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
924 #  endif
925 #endif
926 
927 /**
928  * ZSTD_window_canOverflowCorrect():
929  * Returns non-zero if the indices are large enough for overflow correction
930  * to work correctly without impacting compression ratio.
931  */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)932 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
933                                               U32 cycleLog,
934                                               U32 maxDist,
935                                               U32 loadedDictEnd,
936                                               void const* src)
937 {
938     U32 const cycleSize = 1u << cycleLog;
939     U32 const curr = (U32)((BYTE const*)src - window.base);
940     U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
941 
942     /* Adjust the min index to backoff the overflow correction frequency,
943      * so we don't waste too much CPU in overflow correction. If this
944      * computation overflows we don't really care, we just need to make
945      * sure it is at least minIndexToOverflowCorrect.
946      */
947     U32 const adjustment = window.nbOverflowCorrections + 1;
948     U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
949                                   minIndexToOverflowCorrect);
950     U32 const indexLargeEnough = curr > adjustedIndex;
951 
952     /* Only overflow correct early if the dictionary is invalidated already,
953      * so we don't hurt compression ratio.
954      */
955     U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
956 
957     return indexLargeEnough && dictionaryInvalidated;
958 }
959 
960 /**
961  * ZSTD_window_needOverflowCorrection():
962  * Returns non-zero if the indices are getting too large and need overflow
963  * protection.
964  */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)965 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
966                                                   U32 cycleLog,
967                                                   U32 maxDist,
968                                                   U32 loadedDictEnd,
969                                                   void const* src,
970                                                   void const* srcEnd)
971 {
972     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
973     if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
974         if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
975             return 1;
976         }
977     }
978     return curr > ZSTD_CURRENT_MAX;
979 }
980 
981 /**
982  * ZSTD_window_correctOverflow():
983  * Reduces the indices to protect from index overflow.
984  * Returns the correction made to the indices, which must be applied to every
985  * stored index.
986  *
987  * The least significant cycleLog bits of the indices must remain the same,
988  * which may be 0. Every index up to maxDist in the past must be valid.
989  */
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)990 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
991                                            U32 maxDist, void const* src)
992 {
993     /* preemptive overflow correction:
994      * 1. correction is large enough:
995      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
996      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
997      *
998      *    current - newCurrent
999      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1000      *    > (3<<29) - (1<<chainLog)
1001      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1002      *    > 1<<29
1003      *
1004      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1005      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1006      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1007      *    In 32-bit mode we are safe, because (chainLog <= 29), so
1008      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1009      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1010      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1011      */
1012     U32 const cycleSize = 1u << cycleLog;
1013     U32 const cycleMask = cycleSize - 1;
1014     U32 const curr = (U32)((BYTE const*)src - window->base);
1015     U32 const currentCycle0 = curr & cycleMask;
1016     /* Exclude zero so that newCurrent - maxDist >= 1. */
1017     U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
1018     U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
1019     U32 const correction = curr - newCurrent;
1020     /* maxDist must be a power of two so that:
1021      *   (newCurrent & cycleMask) == (curr & cycleMask)
1022      * This is required to not corrupt the chains / binary tree.
1023      */
1024     assert((maxDist & (maxDist - 1)) == 0);
1025     assert((curr & cycleMask) == (newCurrent & cycleMask));
1026     assert(curr > newCurrent);
1027     if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1028         /* Loose bound, should be around 1<<29 (see above) */
1029         assert(correction > 1<<28);
1030     }
1031 
1032     window->base += correction;
1033     window->dictBase += correction;
1034     if (window->lowLimit <= correction) window->lowLimit = 1;
1035     else window->lowLimit -= correction;
1036     if (window->dictLimit <= correction) window->dictLimit = 1;
1037     else window->dictLimit -= correction;
1038 
1039     /* Ensure we can still reference the full window. */
1040     assert(newCurrent >= maxDist);
1041     assert(newCurrent - maxDist >= 1);
1042     /* Ensure that lowLimit and dictLimit didn't underflow. */
1043     assert(window->lowLimit <= newCurrent);
1044     assert(window->dictLimit <= newCurrent);
1045 
1046     ++window->nbOverflowCorrections;
1047 
1048     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1049              window->lowLimit);
1050     return correction;
1051 }
1052 
1053 /**
1054  * ZSTD_window_enforceMaxDist():
1055  * Updates lowLimit so that:
1056  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1057  *
1058  * It ensures index is valid as long as index >= lowLimit.
1059  * This must be called before a block compression call.
1060  *
1061  * loadedDictEnd is only defined if a dictionary is in use for current compression.
1062  * As the name implies, loadedDictEnd represents the index at end of dictionary.
1063  * The value lies within context's referential, it can be directly compared to blockEndIdx.
1064  *
1065  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1066  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1067  * This is because dictionaries are allowed to be referenced fully
1068  * as long as the last byte of the dictionary is in the window.
1069  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1070  *
1071  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1072  * In dictMatchState mode, lowLimit and dictLimit are the same,
1073  * and the dictionary is below them.
1074  * forceWindow and dictMatchState are therefore incompatible.
1075  */
1076 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1077 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1078                      const void* blockEnd,
1079                            U32   maxDist,
1080                            U32*  loadedDictEndPtr,
1081                      const ZSTD_matchState_t** dictMatchStatePtr)
1082 {
1083     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1084     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1085     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1086                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1087 
1088     /* - When there is no dictionary : loadedDictEnd == 0.
1089          In which case, the test (blockEndIdx > maxDist) is merely to avoid
1090          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1091        - When there is a standard dictionary :
1092          Index referential is copied from the dictionary,
1093          which means it starts from 0.
1094          In which case, loadedDictEnd == dictSize,
1095          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1096          since `blockEndIdx` also starts from zero.
1097        - When there is an attached dictionary :
1098          loadedDictEnd is expressed within the referential of the context,
1099          so it can be directly compared against blockEndIdx.
1100     */
1101     if (blockEndIdx > maxDist + loadedDictEnd) {
1102         U32 const newLowLimit = blockEndIdx - maxDist;
1103         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1104         if (window->dictLimit < window->lowLimit) {
1105             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1106                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1107             window->dictLimit = window->lowLimit;
1108         }
1109         /* On reaching window size, dictionaries are invalidated */
1110         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1111         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1112     }
1113 }
1114 
1115 /* Similar to ZSTD_window_enforceMaxDist(),
1116  * but only invalidates dictionary
1117  * when input progresses beyond window size.
1118  * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1119  *              loadedDictEnd uses same referential as window->base
1120  *              maxDist is the window size */
1121 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1122 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1123                        const void* blockEnd,
1124                              U32   maxDist,
1125                              U32*  loadedDictEndPtr,
1126                        const ZSTD_matchState_t** dictMatchStatePtr)
1127 {
1128     assert(loadedDictEndPtr != NULL);
1129     assert(dictMatchStatePtr != NULL);
1130     {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1131         U32 const loadedDictEnd = *loadedDictEndPtr;
1132         DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1133                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1134         assert(blockEndIdx >= loadedDictEnd);
1135 
1136         if (blockEndIdx > loadedDictEnd + maxDist) {
1137             /* On reaching window size, dictionaries are invalidated.
1138              * For simplification, if window size is reached anywhere within next block,
1139              * the dictionary is invalidated for the full block.
1140              */
1141             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1142             *loadedDictEndPtr = 0;
1143             *dictMatchStatePtr = NULL;
1144         } else {
1145             if (*loadedDictEndPtr != 0) {
1146                 DEBUGLOG(6, "dictionary considered valid for current block");
1147     }   }   }
1148 }
1149 
ZSTD_window_init(ZSTD_window_t * window)1150 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1151     ZSTD_memset(window, 0, sizeof(*window));
1152     window->base = (BYTE const*)"";
1153     window->dictBase = (BYTE const*)"";
1154     window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
1155     window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
1156     window->nextSrc = window->base + 1;   /* see issue #1241 */
1157     window->nbOverflowCorrections = 0;
1158 }
1159 
1160 /**
1161  * ZSTD_window_update():
1162  * Updates the window by appending [src, src + srcSize) to the window.
1163  * If it is not contiguous, the current prefix becomes the extDict, and we
1164  * forget about the extDict. Handles overlap of the prefix and extDict.
1165  * Returns non-zero if the segment is contiguous.
1166  */
ZSTD_window_update(ZSTD_window_t * window,void const * src,size_t srcSize,int forceNonContiguous)1167 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1168                                   void const* src, size_t srcSize,
1169                                   int forceNonContiguous)
1170 {
1171     BYTE const* const ip = (BYTE const*)src;
1172     U32 contiguous = 1;
1173     DEBUGLOG(5, "ZSTD_window_update");
1174     if (srcSize == 0)
1175         return contiguous;
1176     assert(window->base != NULL);
1177     assert(window->dictBase != NULL);
1178     /* Check if blocks follow each other */
1179     if (src != window->nextSrc || forceNonContiguous) {
1180         /* not contiguous */
1181         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1182         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1183         window->lowLimit = window->dictLimit;
1184         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1185         window->dictLimit = (U32)distanceFromBase;
1186         window->dictBase = window->base;
1187         window->base = ip - distanceFromBase;
1188         /* ms->nextToUpdate = window->dictLimit; */
1189         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1190         contiguous = 0;
1191     }
1192     window->nextSrc = ip + srcSize;
1193     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1194     if ( (ip+srcSize > window->dictBase + window->lowLimit)
1195        & (ip < window->dictBase + window->dictLimit)) {
1196         ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1197         U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1198         window->lowLimit = lowLimitMax;
1199         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1200     }
1201     return contiguous;
1202 }
1203 
1204 /**
1205  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1206  */
ZSTD_getLowestMatchIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1207 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1208 {
1209     U32    const maxDistance = 1U << windowLog;
1210     U32    const lowestValid = ms->window.lowLimit;
1211     U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1212     U32    const isDictionary = (ms->loadedDictEnd != 0);
1213     /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1214      * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1215      * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1216      */
1217     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1218     return matchLowest;
1219 }
1220 
1221 /**
1222  * Returns the lowest allowed match index in the prefix.
1223  */
ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1224 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1225 {
1226     U32    const maxDistance = 1U << windowLog;
1227     U32    const lowestValid = ms->window.dictLimit;
1228     U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1229     U32    const isDictionary = (ms->loadedDictEnd != 0);
1230     /* When computing the lowest prefix index we need to take the dictionary into account to handle
1231      * the edge case where the dictionary and the source are contiguous in memory.
1232      */
1233     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1234     return matchLowest;
1235 }
1236 
1237 
1238 
1239 /* debug functions */
1240 #if (DEBUGLEVEL>=2)
1241 
ZSTD_fWeight(U32 rawStat)1242 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1243 {
1244     U32 const fp_accuracy = 8;
1245     U32 const fp_multiplier = (1 << fp_accuracy);
1246     U32 const newStat = rawStat + 1;
1247     U32 const hb = ZSTD_highbit32(newStat);
1248     U32 const BWeight = hb * fp_multiplier;
1249     U32 const FWeight = (newStat << fp_accuracy) >> hb;
1250     U32 const weight = BWeight + FWeight;
1251     assert(hb + fp_accuracy < 31);
1252     return (double)weight / fp_multiplier;
1253 }
1254 
1255 /* display a table content,
1256  * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1257 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1258 {
1259     unsigned u, sum;
1260     for (u=0, sum=0; u<=max; u++) sum += table[u];
1261     DEBUGLOG(2, "total nb elts: %u", sum);
1262     for (u=0; u<=max; u++) {
1263         DEBUGLOG(2, "%2u: %5u  (%.2f)",
1264                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1265     }
1266 }
1267 
1268 #endif
1269 
1270 
1271 #if defined (__cplusplus)
1272 }
1273 #endif
1274 
1275 /* ===============================================================
1276  * Shared internal declarations
1277  * These prototypes may be called from sources not in lib/compress
1278  * =============================================================== */
1279 
1280 /* ZSTD_loadCEntropy() :
1281  * dict : must point at beginning of a valid zstd dictionary.
1282  * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1283  * assumptions : magic number supposed already checked
1284  *               and dictSize >= 8 */
1285 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1286                          const void* const dict, size_t dictSize);
1287 
1288 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1289 
1290 /* ==============================================================
1291  * Private declarations
1292  * These prototypes shall only be called from within lib/compress
1293  * ============================================================== */
1294 
1295 /* ZSTD_getCParamsFromCCtxParams() :
1296  * cParams are built depending on compressionLevel, src size hints,
1297  * LDM and manually set compression parameters.
1298  * Note: srcSizeHint == 0 means 0!
1299  */
1300 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1301         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1302 
1303 /*! ZSTD_initCStream_internal() :
1304  *  Private use only. Init streaming operation.
1305  *  expects params to be valid.
1306  *  must receive dict, or cdict, or none, but not both.
1307  *  @return : 0, or an error code */
1308 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1309                      const void* dict, size_t dictSize,
1310                      const ZSTD_CDict* cdict,
1311                      const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1312 
1313 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1314 
1315 /*! ZSTD_getCParamsFromCDict() :
1316  *  as the name implies */
1317 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1318 
1319 /* ZSTD_compressBegin_advanced_internal() :
1320  * Private use only. To be called from zstdmt_compress.c. */
1321 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1322                                     const void* dict, size_t dictSize,
1323                                     ZSTD_dictContentType_e dictContentType,
1324                                     ZSTD_dictTableLoadMethod_e dtlm,
1325                                     const ZSTD_CDict* cdict,
1326                                     const ZSTD_CCtx_params* params,
1327                                     unsigned long long pledgedSrcSize);
1328 
1329 /* ZSTD_compress_advanced_internal() :
1330  * Private use only. To be called from zstdmt_compress.c. */
1331 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1332                                        void* dst, size_t dstCapacity,
1333                                  const void* src, size_t srcSize,
1334                                  const void* dict,size_t dictSize,
1335                                  const ZSTD_CCtx_params* params);
1336 
1337 
1338 /* ZSTD_writeLastEmptyBlock() :
1339  * output an empty Block with end-of-frame mark to complete a frame
1340  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1341  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1342  */
1343 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1344 
1345 
1346 /* ZSTD_referenceExternalSequences() :
1347  * Must be called before starting a compression operation.
1348  * seqs must parse a prefix of the source.
1349  * This cannot be used when long range matching is enabled.
1350  * Zstd will use these sequences, and pass the literals to a secondary block
1351  * compressor.
1352  * @return : An error code on failure.
1353  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1354  * access and data corruption.
1355  */
1356 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1357 
1358 /** ZSTD_cycleLog() :
1359  *  condition for correct operation : hashLog > 1 */
1360 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1361 
1362 /** ZSTD_CCtx_trace() :
1363  *  Trace the end of a compression call.
1364  */
1365 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1366 
1367 #endif /* ZSTD_COMPRESS_H */
1368