1 /*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* This header contains definitions
12 * that shall **only** be used by modules within lib/compress.
13 */
14
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17
18 /*-*************************************
19 * Dependencies
20 ***************************************/
21 #include "../common/zstd_internal.h"
22 #include "zstd_cwksp.h"
23 #ifdef ZSTD_MULTITHREAD
24 # include "zstdmt_compress.h"
25 #endif
26
27 #if defined (__cplusplus)
28 extern "C" {
29 #endif
30
31 /*-*************************************
32 * Constants
33 ***************************************/
34 #define kSearchStrength 8
35 #define HASH_READ_SIZE 8
36 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
37 It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38 It's not a big deal though : candidate will just be sorted again.
39 Additionally, candidate position 1 will be lost.
40 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
42 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
43
44
45 /*-*************************************
46 * Context memory management
47 ***************************************/
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
50
51 typedef struct ZSTD_prefixDict_s {
52 const void* dict;
53 size_t dictSize;
54 ZSTD_dictContentType_e dictContentType;
55 } ZSTD_prefixDict;
56
57 typedef struct {
58 void* dictBuffer;
59 void const* dict;
60 size_t dictSize;
61 ZSTD_dictContentType_e dictContentType;
62 ZSTD_CDict* cdict;
63 } ZSTD_localDict;
64
65 typedef struct {
66 HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
67 HUF_repeat repeatMode;
68 } ZSTD_hufCTables_t;
69
70 typedef struct {
71 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
72 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
73 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
74 FSE_repeat offcode_repeatMode;
75 FSE_repeat matchlength_repeatMode;
76 FSE_repeat litlength_repeatMode;
77 } ZSTD_fseCTables_t;
78
79 typedef struct {
80 ZSTD_hufCTables_t huf;
81 ZSTD_fseCTables_t fse;
82 } ZSTD_entropyCTables_t;
83
84 /***********************************************
85 * Entropy buffer statistics structs and funcs *
86 ***********************************************/
87 /** ZSTD_hufCTablesMetadata_t :
88 * Stores Literals Block Type for a super-block in hType, and
89 * huffman tree description in hufDesBuffer.
90 * hufDesSize refers to the size of huffman tree description in bytes.
91 * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
92 typedef struct {
93 symbolEncodingType_e hType;
94 BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
95 size_t hufDesSize;
96 } ZSTD_hufCTablesMetadata_t;
97
98 /** ZSTD_fseCTablesMetadata_t :
99 * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
100 * fse tables in fseTablesBuffer.
101 * fseTablesSize refers to the size of fse tables in bytes.
102 * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
103 typedef struct {
104 symbolEncodingType_e llType;
105 symbolEncodingType_e ofType;
106 symbolEncodingType_e mlType;
107 BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
108 size_t fseTablesSize;
109 size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
110 } ZSTD_fseCTablesMetadata_t;
111
112 typedef struct {
113 ZSTD_hufCTablesMetadata_t hufMetadata;
114 ZSTD_fseCTablesMetadata_t fseMetadata;
115 } ZSTD_entropyCTablesMetadata_t;
116
117 /** ZSTD_buildBlockEntropyStats() :
118 * Builds entropy for the block.
119 * @return : 0 on success or error code */
120 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
121 const ZSTD_entropyCTables_t* prevEntropy,
122 ZSTD_entropyCTables_t* nextEntropy,
123 const ZSTD_CCtx_params* cctxParams,
124 ZSTD_entropyCTablesMetadata_t* entropyMetadata,
125 void* workspace, size_t wkspSize);
126
127 /*********************************
128 * Compression internals structs *
129 *********************************/
130
131 typedef struct {
132 U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
133 U32 len; /* Raw length of match */
134 } ZSTD_match_t;
135
136 typedef struct {
137 U32 offset; /* Offset of sequence */
138 U32 litLength; /* Length of literals prior to match */
139 U32 matchLength; /* Raw length of match */
140 } rawSeq;
141
142 typedef struct {
143 rawSeq* seq; /* The start of the sequences */
144 size_t pos; /* The index in seq where reading stopped. pos <= size. */
145 size_t posInSequence; /* The position within the sequence at seq[pos] where reading
146 stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
147 size_t size; /* The number of sequences. <= capacity. */
148 size_t capacity; /* The capacity starting from `seq` pointer */
149 } rawSeqStore_t;
150
151 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
152
153 typedef struct {
154 int price;
155 U32 off;
156 U32 mlen;
157 U32 litlen;
158 U32 rep[ZSTD_REP_NUM];
159 } ZSTD_optimal_t;
160
161 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
162
163 typedef struct {
164 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
165 unsigned* litFreq; /* table of literals statistics, of size 256 */
166 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
167 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
168 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
169 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
170 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
171
172 U32 litSum; /* nb of literals */
173 U32 litLengthSum; /* nb of litLength codes */
174 U32 matchLengthSum; /* nb of matchLength codes */
175 U32 offCodeSum; /* nb of offset codes */
176 U32 litSumBasePrice; /* to compare to log2(litfreq) */
177 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
178 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
179 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
180 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
181 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
182 ZSTD_literalCompressionMode_e literalCompressionMode;
183 } optState_t;
184
185 typedef struct {
186 ZSTD_entropyCTables_t entropy;
187 U32 rep[ZSTD_REP_NUM];
188 } ZSTD_compressedBlockState_t;
189
190 typedef struct {
191 BYTE const* nextSrc; /* next block here to continue on current prefix */
192 BYTE const* base; /* All regular indexes relative to this position */
193 BYTE const* dictBase; /* extDict indexes relative to this position */
194 U32 dictLimit; /* below that point, need extDict */
195 U32 lowLimit; /* below that point, no more valid data */
196 U32 nbOverflowCorrections; /* Number of times overflow correction has run since
197 * ZSTD_window_init(). Useful for debugging coredumps
198 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
199 */
200 } ZSTD_window_t;
201
202 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
203
204 #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
205
206 struct ZSTD_matchState_t {
207 ZSTD_window_t window; /* State for window round buffer management */
208 U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
209 * When loadedDictEnd != 0, a dictionary is in use, and still valid.
210 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
211 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
212 * When dict referential is copied into active context (i.e. not attached),
213 * loadedDictEnd == dictSize, since referential starts from zero.
214 */
215 U32 nextToUpdate; /* index from which to continue table update */
216 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
217
218 U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
219 U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
220 U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
221
222 U32* hashTable;
223 U32* hashTable3;
224 U32* chainTable;
225
226 U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
227
228 int dedicatedDictSearch; /* Indicates whether this matchState is using the
229 * dedicated dictionary search structure.
230 */
231 optState_t opt; /* optimal parser state */
232 const ZSTD_matchState_t* dictMatchState;
233 ZSTD_compressionParameters cParams;
234 const rawSeqStore_t* ldmSeqStore;
235 };
236
237 typedef struct {
238 ZSTD_compressedBlockState_t* prevCBlock;
239 ZSTD_compressedBlockState_t* nextCBlock;
240 ZSTD_matchState_t matchState;
241 } ZSTD_blockState_t;
242
243 typedef struct {
244 U32 offset;
245 U32 checksum;
246 } ldmEntry_t;
247
248 typedef struct {
249 BYTE const* split;
250 U32 hash;
251 U32 checksum;
252 ldmEntry_t* bucket;
253 } ldmMatchCandidate_t;
254
255 #define LDM_BATCH_SIZE 64
256
257 typedef struct {
258 ZSTD_window_t window; /* State for the window round buffer management */
259 ldmEntry_t* hashTable;
260 U32 loadedDictEnd;
261 BYTE* bucketOffsets; /* Next position in bucket to insert entry */
262 size_t splitIndices[LDM_BATCH_SIZE];
263 ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
264 } ldmState_t;
265
266 typedef struct {
267 U32 enableLdm; /* 1 if enable long distance matching */
268 U32 hashLog; /* Log size of hashTable */
269 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
270 U32 minMatchLength; /* Minimum match length */
271 U32 hashRateLog; /* Log number of entries to skip */
272 U32 windowLog; /* Window log for the LDM */
273 } ldmParams_t;
274
275 typedef struct {
276 int collectSequences;
277 ZSTD_Sequence* seqStart;
278 size_t seqIndex;
279 size_t maxSequences;
280 } SeqCollector;
281
282 struct ZSTD_CCtx_params_s {
283 ZSTD_format_e format;
284 ZSTD_compressionParameters cParams;
285 ZSTD_frameParameters fParams;
286
287 int compressionLevel;
288 int forceWindow; /* force back-references to respect limit of
289 * 1<<wLog, even for dictionary */
290 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
291 * No target when targetCBlockSize == 0.
292 * There is no guarantee on compressed block size */
293 int srcSizeHint; /* User's best guess of source size.
294 * Hint is not valid when srcSizeHint == 0.
295 * There is no guarantee that hint is close to actual source size */
296
297 ZSTD_dictAttachPref_e attachDictPref;
298 ZSTD_literalCompressionMode_e literalCompressionMode;
299
300 /* Multithreading: used to pass parameters to mtctx */
301 int nbWorkers;
302 size_t jobSize;
303 int overlapLog;
304 int rsyncable;
305
306 /* Long distance matching parameters */
307 ldmParams_t ldmParams;
308
309 /* Dedicated dict search algorithm trigger */
310 int enableDedicatedDictSearch;
311
312 /* Input/output buffer modes */
313 ZSTD_bufferMode_e inBufferMode;
314 ZSTD_bufferMode_e outBufferMode;
315
316 /* Sequence compression API */
317 ZSTD_sequenceFormat_e blockDelimiters;
318 int validateSequences;
319
320 /* Block splitting */
321 int splitBlocks;
322
323 /* Param for deciding whether to use row-based matchfinder */
324 ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
325
326 /* Always load a dictionary in ext-dict mode (not prefix mode)? */
327 int deterministicRefPrefix;
328
329 /* Internal use, for createCCtxParams() and freeCCtxParams() only */
330 ZSTD_customMem customMem;
331 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
332
333 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
334 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
335
336 /**
337 * Indicates whether this compression proceeds directly from user-provided
338 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
339 * whether the context needs to buffer the input/output (ZSTDb_buffered).
340 */
341 typedef enum {
342 ZSTDb_not_buffered,
343 ZSTDb_buffered
344 } ZSTD_buffered_policy_e;
345
346 struct ZSTD_CCtx_s {
347 ZSTD_compressionStage_e stage;
348 int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
349 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
350 ZSTD_CCtx_params requestedParams;
351 ZSTD_CCtx_params appliedParams;
352 ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
353 U32 dictID;
354 size_t dictContentSize;
355
356 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
357 size_t blockSize;
358 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
359 unsigned long long consumedSrcSize;
360 unsigned long long producedCSize;
361 XXH64_state_t xxhState;
362 ZSTD_customMem customMem;
363 ZSTD_threadPool* pool;
364 size_t staticSize;
365 SeqCollector seqCollector;
366 int isFirstBlock;
367 int initialized;
368
369 seqStore_t seqStore; /* sequences storage ptrs */
370 ldmState_t ldmState; /* long distance matching state */
371 rawSeq* ldmSequences; /* Storage for the ldm output sequences */
372 size_t maxNbLdmSequences;
373 rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
374 ZSTD_blockState_t blockState;
375 U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
376
377 /* Wether we are streaming or not */
378 ZSTD_buffered_policy_e bufferedPolicy;
379
380 /* streaming */
381 char* inBuff;
382 size_t inBuffSize;
383 size_t inToCompress;
384 size_t inBuffPos;
385 size_t inBuffTarget;
386 char* outBuff;
387 size_t outBuffSize;
388 size_t outBuffContentSize;
389 size_t outBuffFlushedSize;
390 ZSTD_cStreamStage streamStage;
391 U32 frameEnded;
392
393 /* Stable in/out buffer verification */
394 ZSTD_inBuffer expectedInBuffer;
395 size_t expectedOutBufferSize;
396
397 /* Dictionary */
398 ZSTD_localDict localDict;
399 const ZSTD_CDict* cdict;
400 ZSTD_prefixDict prefixDict; /* single-usage dictionary */
401
402 /* Multi-threading */
403 #ifdef ZSTD_MULTITHREAD
404 ZSTDMT_CCtx* mtctx;
405 #endif
406
407 /* Tracing */
408 #if ZSTD_TRACE
409 ZSTD_TraceCtx traceCtx;
410 #endif
411 };
412
413 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
414
415 typedef enum {
416 ZSTD_noDict = 0,
417 ZSTD_extDict = 1,
418 ZSTD_dictMatchState = 2,
419 ZSTD_dedicatedDictSearch = 3
420 } ZSTD_dictMode_e;
421
422 typedef enum {
423 ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
424 * In this mode we use both the srcSize and the dictSize
425 * when selecting and adjusting parameters.
426 */
427 ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
428 * In this mode we only take the srcSize into account when selecting
429 * and adjusting parameters.
430 */
431 ZSTD_cpm_createCDict = 2, /* Creating a CDict.
432 * In this mode we take both the source size and the dictionary size
433 * into account when selecting and adjusting the parameters.
434 */
435 ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
436 * We don't know what these parameters are for. We default to the legacy
437 * behavior of taking both the source size and the dict size into account
438 * when selecting and adjusting parameters.
439 */
440 } ZSTD_cParamMode_e;
441
442 typedef size_t (*ZSTD_blockCompressor) (
443 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
444 void const* src, size_t srcSize);
445 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
446
447
ZSTD_LLcode(U32 litLength)448 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
449 {
450 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
451 8, 9, 10, 11, 12, 13, 14, 15,
452 16, 16, 17, 17, 18, 18, 19, 19,
453 20, 20, 20, 20, 21, 21, 21, 21,
454 22, 22, 22, 22, 22, 22, 22, 22,
455 23, 23, 23, 23, 23, 23, 23, 23,
456 24, 24, 24, 24, 24, 24, 24, 24,
457 24, 24, 24, 24, 24, 24, 24, 24 };
458 static const U32 LL_deltaCode = 19;
459 return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
460 }
461
462 /* ZSTD_MLcode() :
463 * note : mlBase = matchLength - MINMATCH;
464 * because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)465 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
466 {
467 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
468 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
469 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
470 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
471 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
472 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
473 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
474 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
475 static const U32 ML_deltaCode = 36;
476 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
477 }
478
479 typedef struct repcodes_s {
480 U32 rep[3];
481 } repcodes_t;
482
ZSTD_updateRep(U32 const rep[3],U32 const offset,U32 const ll0)483 MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
484 {
485 repcodes_t newReps;
486 if (offset >= ZSTD_REP_NUM) { /* full offset */
487 newReps.rep[2] = rep[1];
488 newReps.rep[1] = rep[0];
489 newReps.rep[0] = offset - ZSTD_REP_MOVE;
490 } else { /* repcode */
491 U32 const repCode = offset + ll0;
492 if (repCode > 0) { /* note : if repCode==0, no change */
493 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
494 newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
495 newReps.rep[1] = rep[0];
496 newReps.rep[0] = currentOffset;
497 } else { /* repCode == 0 */
498 ZSTD_memcpy(&newReps, rep, sizeof(newReps));
499 }
500 }
501 return newReps;
502 }
503
504 /* ZSTD_cParam_withinBounds:
505 * @return 1 if value is within cParam bounds,
506 * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)507 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
508 {
509 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
510 if (ZSTD_isError(bounds.error)) return 0;
511 if (value < bounds.lowerBound) return 0;
512 if (value > bounds.upperBound) return 0;
513 return 1;
514 }
515
516 /* ZSTD_noCompressBlock() :
517 * Writes uncompressed block to dst buffer from given src.
518 * Returns the size of the block */
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)519 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
520 {
521 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
522 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
523 dstSize_tooSmall, "dst buf too small for uncompressed block");
524 MEM_writeLE24(dst, cBlockHeader24);
525 ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
526 return ZSTD_blockHeaderSize + srcSize;
527 }
528
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)529 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
530 {
531 BYTE* const op = (BYTE*)dst;
532 U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
533 RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
534 MEM_writeLE24(op, cBlockHeader);
535 op[3] = src;
536 return 4;
537 }
538
539
540 /* ZSTD_minGain() :
541 * minimum compression required
542 * to generate a compress block or a compressed literals section.
543 * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)544 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
545 {
546 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
547 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
548 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
549 return (srcSize >> minlog) + 2;
550 }
551
ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params * cctxParams)552 MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
553 {
554 switch (cctxParams->literalCompressionMode) {
555 case ZSTD_lcm_huffman:
556 return 0;
557 case ZSTD_lcm_uncompressed:
558 return 1;
559 default:
560 assert(0 /* impossible: pre-validated */);
561 /* fall-through */
562 case ZSTD_lcm_auto:
563 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
564 }
565 }
566
567 /*! ZSTD_safecopyLiterals() :
568 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
569 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
570 * large copies.
571 */
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)572 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
573 assert(iend > ilimit_w);
574 if (ip <= ilimit_w) {
575 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
576 op += ilimit_w - ip;
577 ip = ilimit_w;
578 }
579 while (ip < iend) *op++ = *ip++;
580 }
581
582 /*! ZSTD_storeSeq() :
583 * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
584 * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
585 * `mlBase` : matchLength - MINMATCH
586 * Allowed to overread literals up to litLimit.
587 */
588 HINT_INLINE UNUSED_ATTR
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offCode,size_t mlBase)589 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
590 {
591 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
592 BYTE const* const litEnd = literals + litLength;
593 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
594 static const BYTE* g_start = NULL;
595 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
596 { U32 const pos = (U32)((const BYTE*)literals - g_start);
597 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
598 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
599 }
600 #endif
601 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
602 /* copy Literals */
603 assert(seqStorePtr->maxNbLit <= 128 KB);
604 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
605 assert(literals + litLength <= litLimit);
606 if (litEnd <= litLimit_w) {
607 /* Common case we can use wildcopy.
608 * First copy 16 bytes, because literals are likely short.
609 */
610 assert(WILDCOPY_OVERLENGTH >= 16);
611 ZSTD_copy16(seqStorePtr->lit, literals);
612 if (litLength > 16) {
613 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
614 }
615 } else {
616 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
617 }
618 seqStorePtr->lit += litLength;
619
620 /* literal Length */
621 if (litLength>0xFFFF) {
622 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
623 seqStorePtr->longLengthType = ZSTD_llt_literalLength;
624 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
625 }
626 seqStorePtr->sequences[0].litLength = (U16)litLength;
627
628 /* match offset */
629 seqStorePtr->sequences[0].offset = offCode + 1;
630
631 /* match Length */
632 if (mlBase>0xFFFF) {
633 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
634 seqStorePtr->longLengthType = ZSTD_llt_matchLength;
635 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
636 }
637 seqStorePtr->sequences[0].matchLength = (U16)mlBase;
638
639 seqStorePtr->sequences++;
640 }
641
642
643 /*-*************************************
644 * Match length counter
645 ***************************************/
ZSTD_NbCommonBytes(size_t val)646 static unsigned ZSTD_NbCommonBytes (size_t val)
647 {
648 if (MEM_isLittleEndian()) {
649 if (MEM_64bits()) {
650 # if defined(_MSC_VER) && defined(_WIN64)
651 # if STATIC_BMI2
652 return _tzcnt_u64(val) >> 3;
653 # else
654 unsigned long r = 0;
655 return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
656 # endif
657 # elif defined(__GNUC__) && (__GNUC__ >= 4)
658 return (__builtin_ctzll((U64)val) >> 3);
659 # else
660 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
661 0, 3, 1, 3, 1, 4, 2, 7,
662 0, 2, 3, 6, 1, 5, 3, 5,
663 1, 3, 4, 4, 2, 5, 6, 7,
664 7, 0, 1, 2, 3, 3, 4, 6,
665 2, 6, 5, 5, 3, 4, 5, 6,
666 7, 1, 2, 4, 6, 4, 4, 5,
667 7, 2, 6, 5, 7, 6, 7, 7 };
668 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
669 # endif
670 } else { /* 32 bits */
671 # if defined(_MSC_VER)
672 unsigned long r=0;
673 return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
674 # elif defined(__GNUC__) && (__GNUC__ >= 3)
675 return (__builtin_ctz((U32)val) >> 3);
676 # else
677 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
678 3, 2, 2, 1, 3, 2, 0, 1,
679 3, 3, 1, 2, 2, 2, 2, 0,
680 3, 1, 2, 0, 1, 0, 1, 1 };
681 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
682 # endif
683 }
684 } else { /* Big Endian CPU */
685 if (MEM_64bits()) {
686 # if defined(_MSC_VER) && defined(_WIN64)
687 # if STATIC_BMI2
688 return _lzcnt_u64(val) >> 3;
689 # else
690 unsigned long r = 0;
691 return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
692 # endif
693 # elif defined(__GNUC__) && (__GNUC__ >= 4)
694 return (__builtin_clzll(val) >> 3);
695 # else
696 unsigned r;
697 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
698 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
699 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
700 r += (!val);
701 return r;
702 # endif
703 } else { /* 32 bits */
704 # if defined(_MSC_VER)
705 unsigned long r = 0;
706 return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
707 # elif defined(__GNUC__) && (__GNUC__ >= 3)
708 return (__builtin_clz((U32)val) >> 3);
709 # else
710 unsigned r;
711 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
712 r += (!val);
713 return r;
714 # endif
715 } }
716 }
717
718
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)719 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
720 {
721 const BYTE* const pStart = pIn;
722 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
723
724 if (pIn < pInLoopLimit) {
725 { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
726 if (diff) return ZSTD_NbCommonBytes(diff); }
727 pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
728 while (pIn < pInLoopLimit) {
729 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
730 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
731 pIn += ZSTD_NbCommonBytes(diff);
732 return (size_t)(pIn - pStart);
733 } }
734 if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
735 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
736 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
737 return (size_t)(pIn - pStart);
738 }
739
740 /** ZSTD_count_2segments() :
741 * can count match length with `ip` & `match` in 2 different segments.
742 * convention : on reaching mEnd, match count continue starting from iStart
743 */
744 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)745 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
746 const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
747 {
748 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
749 size_t const matchLength = ZSTD_count(ip, match, vEnd);
750 if (match + matchLength != mEnd) return matchLength;
751 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
752 DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
753 DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
754 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
755 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
756 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
757 }
758
759
760 /*-*************************************
761 * Hashes
762 ***************************************/
763 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)764 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)765 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
766
767 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)768 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)769 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
770
771 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)772 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)773 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
774
775 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)776 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)777 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
778
779 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)780 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)781 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
782
783 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)784 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)785 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
786
787 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)788 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
789 {
790 switch(mls)
791 {
792 default:
793 case 4: return ZSTD_hash4Ptr(p, hBits);
794 case 5: return ZSTD_hash5Ptr(p, hBits);
795 case 6: return ZSTD_hash6Ptr(p, hBits);
796 case 7: return ZSTD_hash7Ptr(p, hBits);
797 case 8: return ZSTD_hash8Ptr(p, hBits);
798 }
799 }
800
801 /** ZSTD_ipow() :
802 * Return base^exponent.
803 */
ZSTD_ipow(U64 base,U64 exponent)804 static U64 ZSTD_ipow(U64 base, U64 exponent)
805 {
806 U64 power = 1;
807 while (exponent) {
808 if (exponent & 1) power *= base;
809 exponent >>= 1;
810 base *= base;
811 }
812 return power;
813 }
814
815 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
816
817 /** ZSTD_rollingHash_append() :
818 * Add the buffer to the hash value.
819 */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)820 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
821 {
822 BYTE const* istart = (BYTE const*)buf;
823 size_t pos;
824 for (pos = 0; pos < size; ++pos) {
825 hash *= prime8bytes;
826 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
827 }
828 return hash;
829 }
830
831 /** ZSTD_rollingHash_compute() :
832 * Compute the rolling hash value of the buffer.
833 */
ZSTD_rollingHash_compute(void const * buf,size_t size)834 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
835 {
836 return ZSTD_rollingHash_append(0, buf, size);
837 }
838
839 /** ZSTD_rollingHash_primePower() :
840 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
841 * over a window of length bytes.
842 */
ZSTD_rollingHash_primePower(U32 length)843 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
844 {
845 return ZSTD_ipow(prime8bytes, length - 1);
846 }
847
848 /** ZSTD_rollingHash_rotate() :
849 * Rotate the rolling hash by one byte.
850 */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)851 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
852 {
853 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
854 hash *= prime8bytes;
855 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
856 return hash;
857 }
858
859 /*-*************************************
860 * Round buffer management
861 ***************************************/
862 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
863 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
864 #endif
865 /* Max current allowed */
866 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
867 /* Maximum chunk size before overflow correction needs to be called again */
868 #define ZSTD_CHUNKSIZE_MAX \
869 ( ((U32)-1) /* Maximum ending current index */ \
870 - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
871
872 /**
873 * ZSTD_window_clear():
874 * Clears the window containing the history by simply setting it to empty.
875 */
ZSTD_window_clear(ZSTD_window_t * window)876 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
877 {
878 size_t const endT = (size_t)(window->nextSrc - window->base);
879 U32 const end = (U32)endT;
880
881 window->lowLimit = end;
882 window->dictLimit = end;
883 }
884
ZSTD_window_isEmpty(ZSTD_window_t const window)885 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
886 {
887 return window.dictLimit == 1 &&
888 window.lowLimit == 1 &&
889 (window.nextSrc - window.base) == 1;
890 }
891
892 /**
893 * ZSTD_window_hasExtDict():
894 * Returns non-zero if the window has a non-empty extDict.
895 */
ZSTD_window_hasExtDict(ZSTD_window_t const window)896 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
897 {
898 return window.lowLimit < window.dictLimit;
899 }
900
901 /**
902 * ZSTD_matchState_dictMode():
903 * Inspects the provided matchState and figures out what dictMode should be
904 * passed to the compressor.
905 */
ZSTD_matchState_dictMode(const ZSTD_matchState_t * ms)906 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
907 {
908 return ZSTD_window_hasExtDict(ms->window) ?
909 ZSTD_extDict :
910 ms->dictMatchState != NULL ?
911 (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
912 ZSTD_noDict;
913 }
914
915 /* Defining this macro to non-zero tells zstd to run the overflow correction
916 * code much more frequently. This is very inefficient, and should only be
917 * used for tests and fuzzers.
918 */
919 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
920 # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
921 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
922 # else
923 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
924 # endif
925 #endif
926
927 /**
928 * ZSTD_window_canOverflowCorrect():
929 * Returns non-zero if the indices are large enough for overflow correction
930 * to work correctly without impacting compression ratio.
931 */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)932 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
933 U32 cycleLog,
934 U32 maxDist,
935 U32 loadedDictEnd,
936 void const* src)
937 {
938 U32 const cycleSize = 1u << cycleLog;
939 U32 const curr = (U32)((BYTE const*)src - window.base);
940 U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
941
942 /* Adjust the min index to backoff the overflow correction frequency,
943 * so we don't waste too much CPU in overflow correction. If this
944 * computation overflows we don't really care, we just need to make
945 * sure it is at least minIndexToOverflowCorrect.
946 */
947 U32 const adjustment = window.nbOverflowCorrections + 1;
948 U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
949 minIndexToOverflowCorrect);
950 U32 const indexLargeEnough = curr > adjustedIndex;
951
952 /* Only overflow correct early if the dictionary is invalidated already,
953 * so we don't hurt compression ratio.
954 */
955 U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
956
957 return indexLargeEnough && dictionaryInvalidated;
958 }
959
960 /**
961 * ZSTD_window_needOverflowCorrection():
962 * Returns non-zero if the indices are getting too large and need overflow
963 * protection.
964 */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)965 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
966 U32 cycleLog,
967 U32 maxDist,
968 U32 loadedDictEnd,
969 void const* src,
970 void const* srcEnd)
971 {
972 U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
973 if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
974 if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
975 return 1;
976 }
977 }
978 return curr > ZSTD_CURRENT_MAX;
979 }
980
981 /**
982 * ZSTD_window_correctOverflow():
983 * Reduces the indices to protect from index overflow.
984 * Returns the correction made to the indices, which must be applied to every
985 * stored index.
986 *
987 * The least significant cycleLog bits of the indices must remain the same,
988 * which may be 0. Every index up to maxDist in the past must be valid.
989 */
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)990 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
991 U32 maxDist, void const* src)
992 {
993 /* preemptive overflow correction:
994 * 1. correction is large enough:
995 * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
996 * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
997 *
998 * current - newCurrent
999 * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1000 * > (3<<29) - (1<<chainLog)
1001 * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
1002 * > 1<<29
1003 *
1004 * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1005 * After correction, current is less than (1<<chainLog + 1<<windowLog).
1006 * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1007 * In 32-bit mode we are safe, because (chainLog <= 29), so
1008 * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1009 * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1010 * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1011 */
1012 U32 const cycleSize = 1u << cycleLog;
1013 U32 const cycleMask = cycleSize - 1;
1014 U32 const curr = (U32)((BYTE const*)src - window->base);
1015 U32 const currentCycle0 = curr & cycleMask;
1016 /* Exclude zero so that newCurrent - maxDist >= 1. */
1017 U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
1018 U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
1019 U32 const correction = curr - newCurrent;
1020 /* maxDist must be a power of two so that:
1021 * (newCurrent & cycleMask) == (curr & cycleMask)
1022 * This is required to not corrupt the chains / binary tree.
1023 */
1024 assert((maxDist & (maxDist - 1)) == 0);
1025 assert((curr & cycleMask) == (newCurrent & cycleMask));
1026 assert(curr > newCurrent);
1027 if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1028 /* Loose bound, should be around 1<<29 (see above) */
1029 assert(correction > 1<<28);
1030 }
1031
1032 window->base += correction;
1033 window->dictBase += correction;
1034 if (window->lowLimit <= correction) window->lowLimit = 1;
1035 else window->lowLimit -= correction;
1036 if (window->dictLimit <= correction) window->dictLimit = 1;
1037 else window->dictLimit -= correction;
1038
1039 /* Ensure we can still reference the full window. */
1040 assert(newCurrent >= maxDist);
1041 assert(newCurrent - maxDist >= 1);
1042 /* Ensure that lowLimit and dictLimit didn't underflow. */
1043 assert(window->lowLimit <= newCurrent);
1044 assert(window->dictLimit <= newCurrent);
1045
1046 ++window->nbOverflowCorrections;
1047
1048 DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1049 window->lowLimit);
1050 return correction;
1051 }
1052
1053 /**
1054 * ZSTD_window_enforceMaxDist():
1055 * Updates lowLimit so that:
1056 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1057 *
1058 * It ensures index is valid as long as index >= lowLimit.
1059 * This must be called before a block compression call.
1060 *
1061 * loadedDictEnd is only defined if a dictionary is in use for current compression.
1062 * As the name implies, loadedDictEnd represents the index at end of dictionary.
1063 * The value lies within context's referential, it can be directly compared to blockEndIdx.
1064 *
1065 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1066 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1067 * This is because dictionaries are allowed to be referenced fully
1068 * as long as the last byte of the dictionary is in the window.
1069 * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1070 *
1071 * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1072 * In dictMatchState mode, lowLimit and dictLimit are the same,
1073 * and the dictionary is below them.
1074 * forceWindow and dictMatchState are therefore incompatible.
1075 */
1076 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1077 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1078 const void* blockEnd,
1079 U32 maxDist,
1080 U32* loadedDictEndPtr,
1081 const ZSTD_matchState_t** dictMatchStatePtr)
1082 {
1083 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1084 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1085 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1086 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1087
1088 /* - When there is no dictionary : loadedDictEnd == 0.
1089 In which case, the test (blockEndIdx > maxDist) is merely to avoid
1090 overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1091 - When there is a standard dictionary :
1092 Index referential is copied from the dictionary,
1093 which means it starts from 0.
1094 In which case, loadedDictEnd == dictSize,
1095 and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1096 since `blockEndIdx` also starts from zero.
1097 - When there is an attached dictionary :
1098 loadedDictEnd is expressed within the referential of the context,
1099 so it can be directly compared against blockEndIdx.
1100 */
1101 if (blockEndIdx > maxDist + loadedDictEnd) {
1102 U32 const newLowLimit = blockEndIdx - maxDist;
1103 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1104 if (window->dictLimit < window->lowLimit) {
1105 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1106 (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1107 window->dictLimit = window->lowLimit;
1108 }
1109 /* On reaching window size, dictionaries are invalidated */
1110 if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1111 if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1112 }
1113 }
1114
1115 /* Similar to ZSTD_window_enforceMaxDist(),
1116 * but only invalidates dictionary
1117 * when input progresses beyond window size.
1118 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1119 * loadedDictEnd uses same referential as window->base
1120 * maxDist is the window size */
1121 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1122 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1123 const void* blockEnd,
1124 U32 maxDist,
1125 U32* loadedDictEndPtr,
1126 const ZSTD_matchState_t** dictMatchStatePtr)
1127 {
1128 assert(loadedDictEndPtr != NULL);
1129 assert(dictMatchStatePtr != NULL);
1130 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1131 U32 const loadedDictEnd = *loadedDictEndPtr;
1132 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1133 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1134 assert(blockEndIdx >= loadedDictEnd);
1135
1136 if (blockEndIdx > loadedDictEnd + maxDist) {
1137 /* On reaching window size, dictionaries are invalidated.
1138 * For simplification, if window size is reached anywhere within next block,
1139 * the dictionary is invalidated for the full block.
1140 */
1141 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1142 *loadedDictEndPtr = 0;
1143 *dictMatchStatePtr = NULL;
1144 } else {
1145 if (*loadedDictEndPtr != 0) {
1146 DEBUGLOG(6, "dictionary considered valid for current block");
1147 } } }
1148 }
1149
ZSTD_window_init(ZSTD_window_t * window)1150 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1151 ZSTD_memset(window, 0, sizeof(*window));
1152 window->base = (BYTE const*)"";
1153 window->dictBase = (BYTE const*)"";
1154 window->dictLimit = 1; /* start from 1, so that 1st position is valid */
1155 window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1156 window->nextSrc = window->base + 1; /* see issue #1241 */
1157 window->nbOverflowCorrections = 0;
1158 }
1159
1160 /**
1161 * ZSTD_window_update():
1162 * Updates the window by appending [src, src + srcSize) to the window.
1163 * If it is not contiguous, the current prefix becomes the extDict, and we
1164 * forget about the extDict. Handles overlap of the prefix and extDict.
1165 * Returns non-zero if the segment is contiguous.
1166 */
ZSTD_window_update(ZSTD_window_t * window,void const * src,size_t srcSize,int forceNonContiguous)1167 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1168 void const* src, size_t srcSize,
1169 int forceNonContiguous)
1170 {
1171 BYTE const* const ip = (BYTE const*)src;
1172 U32 contiguous = 1;
1173 DEBUGLOG(5, "ZSTD_window_update");
1174 if (srcSize == 0)
1175 return contiguous;
1176 assert(window->base != NULL);
1177 assert(window->dictBase != NULL);
1178 /* Check if blocks follow each other */
1179 if (src != window->nextSrc || forceNonContiguous) {
1180 /* not contiguous */
1181 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1182 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1183 window->lowLimit = window->dictLimit;
1184 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
1185 window->dictLimit = (U32)distanceFromBase;
1186 window->dictBase = window->base;
1187 window->base = ip - distanceFromBase;
1188 /* ms->nextToUpdate = window->dictLimit; */
1189 if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
1190 contiguous = 0;
1191 }
1192 window->nextSrc = ip + srcSize;
1193 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1194 if ( (ip+srcSize > window->dictBase + window->lowLimit)
1195 & (ip < window->dictBase + window->dictLimit)) {
1196 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1197 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1198 window->lowLimit = lowLimitMax;
1199 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1200 }
1201 return contiguous;
1202 }
1203
1204 /**
1205 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1206 */
ZSTD_getLowestMatchIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1207 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1208 {
1209 U32 const maxDistance = 1U << windowLog;
1210 U32 const lowestValid = ms->window.lowLimit;
1211 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1212 U32 const isDictionary = (ms->loadedDictEnd != 0);
1213 /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1214 * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1215 * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1216 */
1217 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1218 return matchLowest;
1219 }
1220
1221 /**
1222 * Returns the lowest allowed match index in the prefix.
1223 */
ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1224 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1225 {
1226 U32 const maxDistance = 1U << windowLog;
1227 U32 const lowestValid = ms->window.dictLimit;
1228 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1229 U32 const isDictionary = (ms->loadedDictEnd != 0);
1230 /* When computing the lowest prefix index we need to take the dictionary into account to handle
1231 * the edge case where the dictionary and the source are contiguous in memory.
1232 */
1233 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1234 return matchLowest;
1235 }
1236
1237
1238
1239 /* debug functions */
1240 #if (DEBUGLEVEL>=2)
1241
ZSTD_fWeight(U32 rawStat)1242 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1243 {
1244 U32 const fp_accuracy = 8;
1245 U32 const fp_multiplier = (1 << fp_accuracy);
1246 U32 const newStat = rawStat + 1;
1247 U32 const hb = ZSTD_highbit32(newStat);
1248 U32 const BWeight = hb * fp_multiplier;
1249 U32 const FWeight = (newStat << fp_accuracy) >> hb;
1250 U32 const weight = BWeight + FWeight;
1251 assert(hb + fp_accuracy < 31);
1252 return (double)weight / fp_multiplier;
1253 }
1254
1255 /* display a table content,
1256 * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1257 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1258 {
1259 unsigned u, sum;
1260 for (u=0, sum=0; u<=max; u++) sum += table[u];
1261 DEBUGLOG(2, "total nb elts: %u", sum);
1262 for (u=0; u<=max; u++) {
1263 DEBUGLOG(2, "%2u: %5u (%.2f)",
1264 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1265 }
1266 }
1267
1268 #endif
1269
1270
1271 #if defined (__cplusplus)
1272 }
1273 #endif
1274
1275 /* ===============================================================
1276 * Shared internal declarations
1277 * These prototypes may be called from sources not in lib/compress
1278 * =============================================================== */
1279
1280 /* ZSTD_loadCEntropy() :
1281 * dict : must point at beginning of a valid zstd dictionary.
1282 * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1283 * assumptions : magic number supposed already checked
1284 * and dictSize >= 8 */
1285 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1286 const void* const dict, size_t dictSize);
1287
1288 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1289
1290 /* ==============================================================
1291 * Private declarations
1292 * These prototypes shall only be called from within lib/compress
1293 * ============================================================== */
1294
1295 /* ZSTD_getCParamsFromCCtxParams() :
1296 * cParams are built depending on compressionLevel, src size hints,
1297 * LDM and manually set compression parameters.
1298 * Note: srcSizeHint == 0 means 0!
1299 */
1300 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1301 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1302
1303 /*! ZSTD_initCStream_internal() :
1304 * Private use only. Init streaming operation.
1305 * expects params to be valid.
1306 * must receive dict, or cdict, or none, but not both.
1307 * @return : 0, or an error code */
1308 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1309 const void* dict, size_t dictSize,
1310 const ZSTD_CDict* cdict,
1311 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1312
1313 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1314
1315 /*! ZSTD_getCParamsFromCDict() :
1316 * as the name implies */
1317 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1318
1319 /* ZSTD_compressBegin_advanced_internal() :
1320 * Private use only. To be called from zstdmt_compress.c. */
1321 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1322 const void* dict, size_t dictSize,
1323 ZSTD_dictContentType_e dictContentType,
1324 ZSTD_dictTableLoadMethod_e dtlm,
1325 const ZSTD_CDict* cdict,
1326 const ZSTD_CCtx_params* params,
1327 unsigned long long pledgedSrcSize);
1328
1329 /* ZSTD_compress_advanced_internal() :
1330 * Private use only. To be called from zstdmt_compress.c. */
1331 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1332 void* dst, size_t dstCapacity,
1333 const void* src, size_t srcSize,
1334 const void* dict,size_t dictSize,
1335 const ZSTD_CCtx_params* params);
1336
1337
1338 /* ZSTD_writeLastEmptyBlock() :
1339 * output an empty Block with end-of-frame mark to complete a frame
1340 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1341 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1342 */
1343 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1344
1345
1346 /* ZSTD_referenceExternalSequences() :
1347 * Must be called before starting a compression operation.
1348 * seqs must parse a prefix of the source.
1349 * This cannot be used when long range matching is enabled.
1350 * Zstd will use these sequences, and pass the literals to a secondary block
1351 * compressor.
1352 * @return : An error code on failure.
1353 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1354 * access and data corruption.
1355 */
1356 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1357
1358 /** ZSTD_cycleLog() :
1359 * condition for correct operation : hashLog > 1 */
1360 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1361
1362 /** ZSTD_CCtx_trace() :
1363 * Trace the end of a compression call.
1364 */
1365 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1366
1367 #endif /* ZSTD_COMPRESS_H */
1368