1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /*-*************************************
12 * Dependencies
13 ***************************************/
14 #include <limits.h> /* INT_MAX */
15 #include <string.h> /* memset */
16 #include "cpu.h"
17 #include "mem.h"
18 #include "hist.h" /* HIST_countFast_wksp */
19 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
20 #include "fse.h"
21 #define HUF_STATIC_LINKING_ONLY
22 #include "huf.h"
23 #include "zstd_compress_internal.h"
24 #include "zstd_compress_sequences.h"
25 #include "zstd_compress_literals.h"
26 #include "zstd_fast.h"
27 #include "zstd_double_fast.h"
28 #include "zstd_lazy.h"
29 #include "zstd_opt.h"
30 #include "zstd_ldm.h"
31
32
33 /*-*************************************
34 * Helper functions
35 ***************************************/
ZSTD_compressBound(size_t srcSize)36 size_t ZSTD_compressBound(size_t srcSize) {
37 return ZSTD_COMPRESSBOUND(srcSize);
38 }
39
40
41 /*-*************************************
42 * Context memory management
43 ***************************************/
44 struct ZSTD_CDict_s {
45 const void* dictContent;
46 size_t dictContentSize;
47 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
48 ZSTD_cwksp workspace;
49 ZSTD_matchState_t matchState;
50 ZSTD_compressedBlockState_t cBlockState;
51 ZSTD_customMem customMem;
52 U32 dictID;
53 int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
54 }; /* typedef'd to ZSTD_CDict within "zstd.h" */
55
ZSTD_createCCtx(void)56 ZSTD_CCtx* ZSTD_createCCtx(void)
57 {
58 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
59 }
60
ZSTD_initCCtx(ZSTD_CCtx * cctx,ZSTD_customMem memManager)61 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
62 {
63 assert(cctx != NULL);
64 memset(cctx, 0, sizeof(*cctx));
65 cctx->customMem = memManager;
66 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
67 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
68 assert(!ZSTD_isError(err));
69 (void)err;
70 }
71 }
72
ZSTD_createCCtx_advanced(ZSTD_customMem customMem)73 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
74 {
75 ZSTD_STATIC_ASSERT(zcss_init==0);
76 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
77 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
78 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
79 if (!cctx) return NULL;
80 ZSTD_initCCtx(cctx, customMem);
81 return cctx;
82 }
83 }
84
ZSTD_initStaticCCtx(void * workspace,size_t workspaceSize)85 ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
86 {
87 ZSTD_cwksp ws;
88 ZSTD_CCtx* cctx;
89 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
90 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
91 ZSTD_cwksp_init(&ws, workspace, workspaceSize);
92
93 cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
94 if (cctx == NULL) {
95 return NULL;
96 }
97 memset(cctx, 0, sizeof(ZSTD_CCtx));
98 ZSTD_cwksp_move(&cctx->workspace, &ws);
99 cctx->staticSize = workspaceSize;
100
101 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
102 if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
103 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
104 cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
105 cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(
106 &cctx->workspace, HUF_WORKSPACE_SIZE);
107 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
108 return cctx;
109 }
110
111 /**
112 * Clears and frees all of the dictionaries in the CCtx.
113 */
ZSTD_clearAllDicts(ZSTD_CCtx * cctx)114 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
115 {
116 ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem);
117 ZSTD_freeCDict(cctx->localDict.cdict);
118 memset(&cctx->localDict, 0, sizeof(cctx->localDict));
119 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
120 cctx->cdict = NULL;
121 }
122
ZSTD_sizeof_localDict(ZSTD_localDict dict)123 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
124 {
125 size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
126 size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
127 return bufferSize + cdictSize;
128 }
129
ZSTD_freeCCtxContent(ZSTD_CCtx * cctx)130 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
131 {
132 assert(cctx != NULL);
133 assert(cctx->staticSize == 0);
134 ZSTD_clearAllDicts(cctx);
135 #ifdef ZSTD_MULTITHREAD
136 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
137 #endif
138 ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
139 }
140
ZSTD_freeCCtx(ZSTD_CCtx * cctx)141 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
142 {
143 if (cctx==NULL) return 0; /* support free on NULL */
144 RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
145 "not compatible with static CCtx");
146 {
147 int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
148 ZSTD_freeCCtxContent(cctx);
149 if (!cctxInWorkspace) {
150 ZSTD_free(cctx, cctx->customMem);
151 }
152 }
153 return 0;
154 }
155
156
ZSTD_sizeof_mtctx(const ZSTD_CCtx * cctx)157 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
158 {
159 #ifdef ZSTD_MULTITHREAD
160 return ZSTDMT_sizeof_CCtx(cctx->mtctx);
161 #else
162 (void)cctx;
163 return 0;
164 #endif
165 }
166
167
ZSTD_sizeof_CCtx(const ZSTD_CCtx * cctx)168 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
169 {
170 if (cctx==NULL) return 0; /* support sizeof on NULL */
171 /* cctx may be in the workspace */
172 return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
173 + ZSTD_cwksp_sizeof(&cctx->workspace)
174 + ZSTD_sizeof_localDict(cctx->localDict)
175 + ZSTD_sizeof_mtctx(cctx);
176 }
177
ZSTD_sizeof_CStream(const ZSTD_CStream * zcs)178 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
179 {
180 return ZSTD_sizeof_CCtx(zcs); /* same object */
181 }
182
183 /* private API call, for dictBuilder only */
ZSTD_getSeqStore(const ZSTD_CCtx * ctx)184 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
185
ZSTD_makeCCtxParamsFromCParams(ZSTD_compressionParameters cParams)186 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
187 ZSTD_compressionParameters cParams)
188 {
189 ZSTD_CCtx_params cctxParams;
190 memset(&cctxParams, 0, sizeof(cctxParams));
191 cctxParams.cParams = cParams;
192 cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
193 assert(!ZSTD_checkCParams(cParams));
194 cctxParams.fParams.contentSizeFlag = 1;
195 return cctxParams;
196 }
197
ZSTD_createCCtxParams_advanced(ZSTD_customMem customMem)198 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
199 ZSTD_customMem customMem)
200 {
201 ZSTD_CCtx_params* params;
202 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
203 params = (ZSTD_CCtx_params*)ZSTD_calloc(
204 sizeof(ZSTD_CCtx_params), customMem);
205 if (!params) { return NULL; }
206 params->customMem = customMem;
207 params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
208 params->fParams.contentSizeFlag = 1;
209 return params;
210 }
211
ZSTD_createCCtxParams(void)212 ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
213 {
214 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
215 }
216
ZSTD_freeCCtxParams(ZSTD_CCtx_params * params)217 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
218 {
219 if (params == NULL) { return 0; }
220 ZSTD_free(params, params->customMem);
221 return 0;
222 }
223
ZSTD_CCtxParams_reset(ZSTD_CCtx_params * params)224 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
225 {
226 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
227 }
228
ZSTD_CCtxParams_init(ZSTD_CCtx_params * cctxParams,int compressionLevel)229 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
230 RETURN_ERROR_IF(!cctxParams, GENERIC);
231 memset(cctxParams, 0, sizeof(*cctxParams));
232 cctxParams->compressionLevel = compressionLevel;
233 cctxParams->fParams.contentSizeFlag = 1;
234 return 0;
235 }
236
ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params * cctxParams,ZSTD_parameters params)237 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
238 {
239 RETURN_ERROR_IF(!cctxParams, GENERIC);
240 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
241 memset(cctxParams, 0, sizeof(*cctxParams));
242 assert(!ZSTD_checkCParams(params.cParams));
243 cctxParams->cParams = params.cParams;
244 cctxParams->fParams = params.fParams;
245 cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
246 return 0;
247 }
248
249 /* ZSTD_assignParamsToCCtxParams() :
250 * params is presumed valid at this stage */
ZSTD_assignParamsToCCtxParams(const ZSTD_CCtx_params * cctxParams,ZSTD_parameters params)251 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
252 const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
253 {
254 ZSTD_CCtx_params ret = *cctxParams;
255 assert(!ZSTD_checkCParams(params.cParams));
256 ret.cParams = params.cParams;
257 ret.fParams = params.fParams;
258 ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
259 return ret;
260 }
261
ZSTD_cParam_getBounds(ZSTD_cParameter param)262 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
263 {
264 ZSTD_bounds bounds = { 0, 0, 0 };
265
266 switch(param)
267 {
268 case ZSTD_c_compressionLevel:
269 bounds.lowerBound = ZSTD_minCLevel();
270 bounds.upperBound = ZSTD_maxCLevel();
271 return bounds;
272
273 case ZSTD_c_windowLog:
274 bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
275 bounds.upperBound = ZSTD_WINDOWLOG_MAX;
276 return bounds;
277
278 case ZSTD_c_hashLog:
279 bounds.lowerBound = ZSTD_HASHLOG_MIN;
280 bounds.upperBound = ZSTD_HASHLOG_MAX;
281 return bounds;
282
283 case ZSTD_c_chainLog:
284 bounds.lowerBound = ZSTD_CHAINLOG_MIN;
285 bounds.upperBound = ZSTD_CHAINLOG_MAX;
286 return bounds;
287
288 case ZSTD_c_searchLog:
289 bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
290 bounds.upperBound = ZSTD_SEARCHLOG_MAX;
291 return bounds;
292
293 case ZSTD_c_minMatch:
294 bounds.lowerBound = ZSTD_MINMATCH_MIN;
295 bounds.upperBound = ZSTD_MINMATCH_MAX;
296 return bounds;
297
298 case ZSTD_c_targetLength:
299 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
300 bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
301 return bounds;
302
303 case ZSTD_c_strategy:
304 bounds.lowerBound = ZSTD_STRATEGY_MIN;
305 bounds.upperBound = ZSTD_STRATEGY_MAX;
306 return bounds;
307
308 case ZSTD_c_contentSizeFlag:
309 bounds.lowerBound = 0;
310 bounds.upperBound = 1;
311 return bounds;
312
313 case ZSTD_c_checksumFlag:
314 bounds.lowerBound = 0;
315 bounds.upperBound = 1;
316 return bounds;
317
318 case ZSTD_c_dictIDFlag:
319 bounds.lowerBound = 0;
320 bounds.upperBound = 1;
321 return bounds;
322
323 case ZSTD_c_nbWorkers:
324 bounds.lowerBound = 0;
325 #ifdef ZSTD_MULTITHREAD
326 bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
327 #else
328 bounds.upperBound = 0;
329 #endif
330 return bounds;
331
332 case ZSTD_c_jobSize:
333 bounds.lowerBound = 0;
334 #ifdef ZSTD_MULTITHREAD
335 bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
336 #else
337 bounds.upperBound = 0;
338 #endif
339 return bounds;
340
341 case ZSTD_c_overlapLog:
342 bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
343 bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
344 return bounds;
345
346 case ZSTD_c_enableLongDistanceMatching:
347 bounds.lowerBound = 0;
348 bounds.upperBound = 1;
349 return bounds;
350
351 case ZSTD_c_ldmHashLog:
352 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
353 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
354 return bounds;
355
356 case ZSTD_c_ldmMinMatch:
357 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
358 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
359 return bounds;
360
361 case ZSTD_c_ldmBucketSizeLog:
362 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
363 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
364 return bounds;
365
366 case ZSTD_c_ldmHashRateLog:
367 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
368 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
369 return bounds;
370
371 /* experimental parameters */
372 case ZSTD_c_rsyncable:
373 bounds.lowerBound = 0;
374 bounds.upperBound = 1;
375 return bounds;
376
377 case ZSTD_c_forceMaxWindow :
378 bounds.lowerBound = 0;
379 bounds.upperBound = 1;
380 return bounds;
381
382 case ZSTD_c_format:
383 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
384 bounds.lowerBound = ZSTD_f_zstd1;
385 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */
386 return bounds;
387
388 case ZSTD_c_forceAttachDict:
389 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
390 bounds.lowerBound = ZSTD_dictDefaultAttach;
391 bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
392 return bounds;
393
394 case ZSTD_c_literalCompressionMode:
395 ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
396 bounds.lowerBound = ZSTD_lcm_auto;
397 bounds.upperBound = ZSTD_lcm_uncompressed;
398 return bounds;
399
400 case ZSTD_c_targetCBlockSize:
401 bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
402 bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
403 return bounds;
404
405 case ZSTD_c_srcSizeHint:
406 bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
407 bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
408 return bounds;
409
410 default:
411 { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
412 return boundError;
413 }
414 }
415 }
416
417 /* ZSTD_cParam_clampBounds:
418 * Clamps the value into the bounded range.
419 */
ZSTD_cParam_clampBounds(ZSTD_cParameter cParam,int * value)420 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
421 {
422 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
423 if (ZSTD_isError(bounds.error)) return bounds.error;
424 if (*value < bounds.lowerBound) *value = bounds.lowerBound;
425 if (*value > bounds.upperBound) *value = bounds.upperBound;
426 return 0;
427 }
428
429 #define BOUNDCHECK(cParam, val) { \
430 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
431 parameter_outOfBound); \
432 }
433
434
ZSTD_isUpdateAuthorized(ZSTD_cParameter param)435 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
436 {
437 switch(param)
438 {
439 case ZSTD_c_compressionLevel:
440 case ZSTD_c_hashLog:
441 case ZSTD_c_chainLog:
442 case ZSTD_c_searchLog:
443 case ZSTD_c_minMatch:
444 case ZSTD_c_targetLength:
445 case ZSTD_c_strategy:
446 return 1;
447
448 case ZSTD_c_format:
449 case ZSTD_c_windowLog:
450 case ZSTD_c_contentSizeFlag:
451 case ZSTD_c_checksumFlag:
452 case ZSTD_c_dictIDFlag:
453 case ZSTD_c_forceMaxWindow :
454 case ZSTD_c_nbWorkers:
455 case ZSTD_c_jobSize:
456 case ZSTD_c_overlapLog:
457 case ZSTD_c_rsyncable:
458 case ZSTD_c_enableLongDistanceMatching:
459 case ZSTD_c_ldmHashLog:
460 case ZSTD_c_ldmMinMatch:
461 case ZSTD_c_ldmBucketSizeLog:
462 case ZSTD_c_ldmHashRateLog:
463 case ZSTD_c_forceAttachDict:
464 case ZSTD_c_literalCompressionMode:
465 case ZSTD_c_targetCBlockSize:
466 case ZSTD_c_srcSizeHint:
467 default:
468 return 0;
469 }
470 }
471
ZSTD_CCtx_setParameter(ZSTD_CCtx * cctx,ZSTD_cParameter param,int value)472 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
473 {
474 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
475 if (cctx->streamStage != zcss_init) {
476 if (ZSTD_isUpdateAuthorized(param)) {
477 cctx->cParamsChanged = 1;
478 } else {
479 RETURN_ERROR(stage_wrong);
480 } }
481
482 switch(param)
483 {
484 case ZSTD_c_nbWorkers:
485 RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
486 "MT not compatible with static alloc");
487 break;
488
489 case ZSTD_c_compressionLevel:
490 case ZSTD_c_windowLog:
491 case ZSTD_c_hashLog:
492 case ZSTD_c_chainLog:
493 case ZSTD_c_searchLog:
494 case ZSTD_c_minMatch:
495 case ZSTD_c_targetLength:
496 case ZSTD_c_strategy:
497 case ZSTD_c_ldmHashRateLog:
498 case ZSTD_c_format:
499 case ZSTD_c_contentSizeFlag:
500 case ZSTD_c_checksumFlag:
501 case ZSTD_c_dictIDFlag:
502 case ZSTD_c_forceMaxWindow:
503 case ZSTD_c_forceAttachDict:
504 case ZSTD_c_literalCompressionMode:
505 case ZSTD_c_jobSize:
506 case ZSTD_c_overlapLog:
507 case ZSTD_c_rsyncable:
508 case ZSTD_c_enableLongDistanceMatching:
509 case ZSTD_c_ldmHashLog:
510 case ZSTD_c_ldmMinMatch:
511 case ZSTD_c_ldmBucketSizeLog:
512 case ZSTD_c_targetCBlockSize:
513 case ZSTD_c_srcSizeHint:
514 break;
515
516 default: RETURN_ERROR(parameter_unsupported);
517 }
518 return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
519 }
520
ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params * CCtxParams,ZSTD_cParameter param,int value)521 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
522 ZSTD_cParameter param, int value)
523 {
524 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
525 switch(param)
526 {
527 case ZSTD_c_format :
528 BOUNDCHECK(ZSTD_c_format, value);
529 CCtxParams->format = (ZSTD_format_e)value;
530 return (size_t)CCtxParams->format;
531
532 case ZSTD_c_compressionLevel : {
533 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
534 if (value) { /* 0 : does not change current level */
535 CCtxParams->compressionLevel = value;
536 }
537 if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
538 return 0; /* return type (size_t) cannot represent negative values */
539 }
540
541 case ZSTD_c_windowLog :
542 if (value!=0) /* 0 => use default */
543 BOUNDCHECK(ZSTD_c_windowLog, value);
544 CCtxParams->cParams.windowLog = (U32)value;
545 return CCtxParams->cParams.windowLog;
546
547 case ZSTD_c_hashLog :
548 if (value!=0) /* 0 => use default */
549 BOUNDCHECK(ZSTD_c_hashLog, value);
550 CCtxParams->cParams.hashLog = (U32)value;
551 return CCtxParams->cParams.hashLog;
552
553 case ZSTD_c_chainLog :
554 if (value!=0) /* 0 => use default */
555 BOUNDCHECK(ZSTD_c_chainLog, value);
556 CCtxParams->cParams.chainLog = (U32)value;
557 return CCtxParams->cParams.chainLog;
558
559 case ZSTD_c_searchLog :
560 if (value!=0) /* 0 => use default */
561 BOUNDCHECK(ZSTD_c_searchLog, value);
562 CCtxParams->cParams.searchLog = (U32)value;
563 return (size_t)value;
564
565 case ZSTD_c_minMatch :
566 if (value!=0) /* 0 => use default */
567 BOUNDCHECK(ZSTD_c_minMatch, value);
568 CCtxParams->cParams.minMatch = value;
569 return CCtxParams->cParams.minMatch;
570
571 case ZSTD_c_targetLength :
572 BOUNDCHECK(ZSTD_c_targetLength, value);
573 CCtxParams->cParams.targetLength = value;
574 return CCtxParams->cParams.targetLength;
575
576 case ZSTD_c_strategy :
577 if (value!=0) /* 0 => use default */
578 BOUNDCHECK(ZSTD_c_strategy, value);
579 CCtxParams->cParams.strategy = (ZSTD_strategy)value;
580 return (size_t)CCtxParams->cParams.strategy;
581
582 case ZSTD_c_contentSizeFlag :
583 /* Content size written in frame header _when known_ (default:1) */
584 DEBUGLOG(4, "set content size flag = %u", (value!=0));
585 CCtxParams->fParams.contentSizeFlag = value != 0;
586 return CCtxParams->fParams.contentSizeFlag;
587
588 case ZSTD_c_checksumFlag :
589 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
590 CCtxParams->fParams.checksumFlag = value != 0;
591 return CCtxParams->fParams.checksumFlag;
592
593 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
594 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
595 CCtxParams->fParams.noDictIDFlag = !value;
596 return !CCtxParams->fParams.noDictIDFlag;
597
598 case ZSTD_c_forceMaxWindow :
599 CCtxParams->forceWindow = (value != 0);
600 return CCtxParams->forceWindow;
601
602 case ZSTD_c_forceAttachDict : {
603 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
604 BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
605 CCtxParams->attachDictPref = pref;
606 return CCtxParams->attachDictPref;
607 }
608
609 case ZSTD_c_literalCompressionMode : {
610 const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
611 BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
612 CCtxParams->literalCompressionMode = lcm;
613 return CCtxParams->literalCompressionMode;
614 }
615
616 case ZSTD_c_nbWorkers :
617 #ifndef ZSTD_MULTITHREAD
618 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
619 return 0;
620 #else
621 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
622 CCtxParams->nbWorkers = value;
623 return CCtxParams->nbWorkers;
624 #endif
625
626 case ZSTD_c_jobSize :
627 #ifndef ZSTD_MULTITHREAD
628 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
629 return 0;
630 #else
631 /* Adjust to the minimum non-default value. */
632 if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
633 value = ZSTDMT_JOBSIZE_MIN;
634 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
635 assert(value >= 0);
636 CCtxParams->jobSize = value;
637 return CCtxParams->jobSize;
638 #endif
639
640 case ZSTD_c_overlapLog :
641 #ifndef ZSTD_MULTITHREAD
642 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
643 return 0;
644 #else
645 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value));
646 CCtxParams->overlapLog = value;
647 return CCtxParams->overlapLog;
648 #endif
649
650 case ZSTD_c_rsyncable :
651 #ifndef ZSTD_MULTITHREAD
652 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
653 return 0;
654 #else
655 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value));
656 CCtxParams->rsyncable = value;
657 return CCtxParams->rsyncable;
658 #endif
659
660 case ZSTD_c_enableLongDistanceMatching :
661 CCtxParams->ldmParams.enableLdm = (value!=0);
662 return CCtxParams->ldmParams.enableLdm;
663
664 case ZSTD_c_ldmHashLog :
665 if (value!=0) /* 0 ==> auto */
666 BOUNDCHECK(ZSTD_c_ldmHashLog, value);
667 CCtxParams->ldmParams.hashLog = value;
668 return CCtxParams->ldmParams.hashLog;
669
670 case ZSTD_c_ldmMinMatch :
671 if (value!=0) /* 0 ==> default */
672 BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
673 CCtxParams->ldmParams.minMatchLength = value;
674 return CCtxParams->ldmParams.minMatchLength;
675
676 case ZSTD_c_ldmBucketSizeLog :
677 if (value!=0) /* 0 ==> default */
678 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
679 CCtxParams->ldmParams.bucketSizeLog = value;
680 return CCtxParams->ldmParams.bucketSizeLog;
681
682 case ZSTD_c_ldmHashRateLog :
683 RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN,
684 parameter_outOfBound);
685 CCtxParams->ldmParams.hashRateLog = value;
686 return CCtxParams->ldmParams.hashRateLog;
687
688 case ZSTD_c_targetCBlockSize :
689 if (value!=0) /* 0 ==> default */
690 BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
691 CCtxParams->targetCBlockSize = value;
692 return CCtxParams->targetCBlockSize;
693
694 case ZSTD_c_srcSizeHint :
695 if (value!=0) /* 0 ==> default */
696 BOUNDCHECK(ZSTD_c_srcSizeHint, value);
697 CCtxParams->srcSizeHint = value;
698 return CCtxParams->srcSizeHint;
699
700 default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
701 }
702 }
703
ZSTD_CCtx_getParameter(ZSTD_CCtx * cctx,ZSTD_cParameter param,int * value)704 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value)
705 {
706 return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
707 }
708
ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params * CCtxParams,ZSTD_cParameter param,int * value)709 size_t ZSTD_CCtxParams_getParameter(
710 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value)
711 {
712 switch(param)
713 {
714 case ZSTD_c_format :
715 *value = CCtxParams->format;
716 break;
717 case ZSTD_c_compressionLevel :
718 *value = CCtxParams->compressionLevel;
719 break;
720 case ZSTD_c_windowLog :
721 *value = (int)CCtxParams->cParams.windowLog;
722 break;
723 case ZSTD_c_hashLog :
724 *value = (int)CCtxParams->cParams.hashLog;
725 break;
726 case ZSTD_c_chainLog :
727 *value = (int)CCtxParams->cParams.chainLog;
728 break;
729 case ZSTD_c_searchLog :
730 *value = CCtxParams->cParams.searchLog;
731 break;
732 case ZSTD_c_minMatch :
733 *value = CCtxParams->cParams.minMatch;
734 break;
735 case ZSTD_c_targetLength :
736 *value = CCtxParams->cParams.targetLength;
737 break;
738 case ZSTD_c_strategy :
739 *value = (unsigned)CCtxParams->cParams.strategy;
740 break;
741 case ZSTD_c_contentSizeFlag :
742 *value = CCtxParams->fParams.contentSizeFlag;
743 break;
744 case ZSTD_c_checksumFlag :
745 *value = CCtxParams->fParams.checksumFlag;
746 break;
747 case ZSTD_c_dictIDFlag :
748 *value = !CCtxParams->fParams.noDictIDFlag;
749 break;
750 case ZSTD_c_forceMaxWindow :
751 *value = CCtxParams->forceWindow;
752 break;
753 case ZSTD_c_forceAttachDict :
754 *value = CCtxParams->attachDictPref;
755 break;
756 case ZSTD_c_literalCompressionMode :
757 *value = CCtxParams->literalCompressionMode;
758 break;
759 case ZSTD_c_nbWorkers :
760 #ifndef ZSTD_MULTITHREAD
761 assert(CCtxParams->nbWorkers == 0);
762 #endif
763 *value = CCtxParams->nbWorkers;
764 break;
765 case ZSTD_c_jobSize :
766 #ifndef ZSTD_MULTITHREAD
767 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
768 #else
769 assert(CCtxParams->jobSize <= INT_MAX);
770 *value = (int)CCtxParams->jobSize;
771 break;
772 #endif
773 case ZSTD_c_overlapLog :
774 #ifndef ZSTD_MULTITHREAD
775 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
776 #else
777 *value = CCtxParams->overlapLog;
778 break;
779 #endif
780 case ZSTD_c_rsyncable :
781 #ifndef ZSTD_MULTITHREAD
782 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
783 #else
784 *value = CCtxParams->rsyncable;
785 break;
786 #endif
787 case ZSTD_c_enableLongDistanceMatching :
788 *value = CCtxParams->ldmParams.enableLdm;
789 break;
790 case ZSTD_c_ldmHashLog :
791 *value = CCtxParams->ldmParams.hashLog;
792 break;
793 case ZSTD_c_ldmMinMatch :
794 *value = CCtxParams->ldmParams.minMatchLength;
795 break;
796 case ZSTD_c_ldmBucketSizeLog :
797 *value = CCtxParams->ldmParams.bucketSizeLog;
798 break;
799 case ZSTD_c_ldmHashRateLog :
800 *value = CCtxParams->ldmParams.hashRateLog;
801 break;
802 case ZSTD_c_targetCBlockSize :
803 *value = (int)CCtxParams->targetCBlockSize;
804 break;
805 case ZSTD_c_srcSizeHint :
806 *value = (int)CCtxParams->srcSizeHint;
807 break;
808 default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
809 }
810 return 0;
811 }
812
813 /** ZSTD_CCtx_setParametersUsingCCtxParams() :
814 * just applies `params` into `cctx`
815 * no action is performed, parameters are merely stored.
816 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
817 * This is possible even if a compression is ongoing.
818 * In which case, new parameters will be applied on the fly, starting with next compression job.
819 */
ZSTD_CCtx_setParametersUsingCCtxParams(ZSTD_CCtx * cctx,const ZSTD_CCtx_params * params)820 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
821 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
822 {
823 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
824 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
825 RETURN_ERROR_IF(cctx->cdict, stage_wrong);
826
827 cctx->requestedParams = *params;
828 return 0;
829 }
830
ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx * cctx,unsigned long long pledgedSrcSize)831 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
832 {
833 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
834 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
835 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
836 return 0;
837 }
838
839 /**
840 * Initializes the local dict using the requested parameters.
841 * NOTE: This does not use the pledged src size, because it may be used for more
842 * than one compression.
843 */
ZSTD_initLocalDict(ZSTD_CCtx * cctx)844 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
845 {
846 ZSTD_localDict* const dl = &cctx->localDict;
847 ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
848 &cctx->requestedParams, 0, dl->dictSize);
849 if (dl->dict == NULL) {
850 /* No local dictionary. */
851 assert(dl->dictBuffer == NULL);
852 assert(dl->cdict == NULL);
853 assert(dl->dictSize == 0);
854 return 0;
855 }
856 if (dl->cdict != NULL) {
857 assert(cctx->cdict == dl->cdict);
858 /* Local dictionary already initialized. */
859 return 0;
860 }
861 assert(dl->dictSize > 0);
862 assert(cctx->cdict == NULL);
863 assert(cctx->prefixDict.dict == NULL);
864
865 dl->cdict = ZSTD_createCDict_advanced(
866 dl->dict,
867 dl->dictSize,
868 ZSTD_dlm_byRef,
869 dl->dictContentType,
870 cParams,
871 cctx->customMem);
872 RETURN_ERROR_IF(!dl->cdict, memory_allocation);
873 cctx->cdict = dl->cdict;
874 return 0;
875 }
876
ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)877 size_t ZSTD_CCtx_loadDictionary_advanced(
878 ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
879 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
880 {
881 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
882 RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
883 "no malloc for static CCtx");
884 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
885 ZSTD_clearAllDicts(cctx); /* in case one already exists */
886 if (dict == NULL || dictSize == 0) /* no dictionary mode */
887 return 0;
888 if (dictLoadMethod == ZSTD_dlm_byRef) {
889 cctx->localDict.dict = dict;
890 } else {
891 void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem);
892 RETURN_ERROR_IF(!dictBuffer, memory_allocation);
893 memcpy(dictBuffer, dict, dictSize);
894 cctx->localDict.dictBuffer = dictBuffer;
895 cctx->localDict.dict = dictBuffer;
896 }
897 cctx->localDict.dictSize = dictSize;
898 cctx->localDict.dictContentType = dictContentType;
899 return 0;
900 }
901
ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx * cctx,const void * dict,size_t dictSize)902 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
903 ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
904 {
905 return ZSTD_CCtx_loadDictionary_advanced(
906 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
907 }
908
ZSTD_CCtx_loadDictionary(ZSTD_CCtx * cctx,const void * dict,size_t dictSize)909 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
910 {
911 return ZSTD_CCtx_loadDictionary_advanced(
912 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
913 }
914
915
ZSTD_CCtx_refCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict)916 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
917 {
918 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
919 /* Free the existing local cdict (if any) to save memory. */
920 ZSTD_clearAllDicts(cctx);
921 cctx->cdict = cdict;
922 return 0;
923 }
924
ZSTD_CCtx_refPrefix(ZSTD_CCtx * cctx,const void * prefix,size_t prefixSize)925 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
926 {
927 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
928 }
929
ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx * cctx,const void * prefix,size_t prefixSize,ZSTD_dictContentType_e dictContentType)930 size_t ZSTD_CCtx_refPrefix_advanced(
931 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
932 {
933 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
934 ZSTD_clearAllDicts(cctx);
935 cctx->prefixDict.dict = prefix;
936 cctx->prefixDict.dictSize = prefixSize;
937 cctx->prefixDict.dictContentType = dictContentType;
938 return 0;
939 }
940
941 /*! ZSTD_CCtx_reset() :
942 * Also dumps dictionary */
ZSTD_CCtx_reset(ZSTD_CCtx * cctx,ZSTD_ResetDirective reset)943 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
944 {
945 if ( (reset == ZSTD_reset_session_only)
946 || (reset == ZSTD_reset_session_and_parameters) ) {
947 cctx->streamStage = zcss_init;
948 cctx->pledgedSrcSizePlusOne = 0;
949 }
950 if ( (reset == ZSTD_reset_parameters)
951 || (reset == ZSTD_reset_session_and_parameters) ) {
952 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
953 ZSTD_clearAllDicts(cctx);
954 return ZSTD_CCtxParams_reset(&cctx->requestedParams);
955 }
956 return 0;
957 }
958
959
960 /** ZSTD_checkCParams() :
961 control CParam values remain within authorized range.
962 @return : 0, or an error code if one value is beyond authorized range */
ZSTD_checkCParams(ZSTD_compressionParameters cParams)963 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
964 {
965 BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
966 BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
967 BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
968 BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
969 BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
970 BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
971 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
972 return 0;
973 }
974
975 /** ZSTD_clampCParams() :
976 * make CParam values within valid range.
977 * @return : valid CParams */
978 static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams)979 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
980 {
981 # define CLAMP_TYPE(cParam, val, type) { \
982 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
983 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
984 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
985 }
986 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
987 CLAMP(ZSTD_c_windowLog, cParams.windowLog);
988 CLAMP(ZSTD_c_chainLog, cParams.chainLog);
989 CLAMP(ZSTD_c_hashLog, cParams.hashLog);
990 CLAMP(ZSTD_c_searchLog, cParams.searchLog);
991 CLAMP(ZSTD_c_minMatch, cParams.minMatch);
992 CLAMP(ZSTD_c_targetLength,cParams.targetLength);
993 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
994 return cParams;
995 }
996
997 /** ZSTD_cycleLog() :
998 * condition for correct operation : hashLog > 1 */
ZSTD_cycleLog(U32 hashLog,ZSTD_strategy strat)999 static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
1000 {
1001 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
1002 return hashLog - btScale;
1003 }
1004
1005 /** ZSTD_adjustCParams_internal() :
1006 * optimize `cPar` for a specified input (`srcSize` and `dictSize`).
1007 * mostly downsize to reduce memory consumption and initialization latency.
1008 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
1009 * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention.
1010 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
1011 static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,unsigned long long srcSize,size_t dictSize)1012 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
1013 unsigned long long srcSize,
1014 size_t dictSize)
1015 {
1016 static const U64 minSrcSize = 513; /* (1<<9) + 1 */
1017 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
1018 assert(ZSTD_checkCParams(cPar)==0);
1019
1020 if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ )
1021 srcSize = minSrcSize; /* presumed small when there is a dictionary */
1022 else if (srcSize == 0)
1023 srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */
1024
1025 /* resize windowLog if input is small enough, to use less memory */
1026 if ( (srcSize < maxWindowResize)
1027 && (dictSize < maxWindowResize) ) {
1028 U32 const tSize = (U32)(srcSize + dictSize);
1029 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
1030 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
1031 ZSTD_highbit32(tSize-1) + 1;
1032 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
1033 }
1034 if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
1035 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
1036 if (cycleLog > cPar.windowLog)
1037 cPar.chainLog -= (cycleLog - cPar.windowLog);
1038 }
1039
1040 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
1041 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
1042
1043 return cPar;
1044 }
1045
1046 ZSTD_compressionParameters
ZSTD_adjustCParams(ZSTD_compressionParameters cPar,unsigned long long srcSize,size_t dictSize)1047 ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
1048 unsigned long long srcSize,
1049 size_t dictSize)
1050 {
1051 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
1052 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
1053 }
1054
ZSTD_getCParamsFromCCtxParams(const ZSTD_CCtx_params * CCtxParams,U64 srcSizeHint,size_t dictSize)1055 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1056 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
1057 {
1058 ZSTD_compressionParameters cParams;
1059 if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
1060 srcSizeHint = CCtxParams->srcSizeHint;
1061 }
1062 cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
1063 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
1064 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
1065 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
1066 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
1067 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
1068 if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch;
1069 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
1070 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
1071 assert(!ZSTD_checkCParams(cParams));
1072 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
1073 }
1074
1075 static size_t
ZSTD_sizeof_matchState(const ZSTD_compressionParameters * const cParams,const U32 forCCtx)1076 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1077 const U32 forCCtx)
1078 {
1079 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1080 size_t const hSize = ((size_t)1) << cParams->hashLog;
1081 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1082 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1083 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
1084 * surrounded by redzones in ASAN. */
1085 size_t const tableSpace = chainSize * sizeof(U32)
1086 + hSize * sizeof(U32)
1087 + h3Size * sizeof(U32);
1088 size_t const optPotentialSpace =
1089 ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
1090 + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
1091 + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
1092 + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
1093 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1094 + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1095 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
1096 ? optPotentialSpace
1097 : 0;
1098 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1099 (U32)chainSize, (U32)hSize, (U32)h3Size);
1100 return tableSpace + optSpace;
1101 }
1102
ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params * params)1103 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1104 {
1105 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1106 { ZSTD_compressionParameters const cParams =
1107 ZSTD_getCParamsFromCCtxParams(params, 0, 0);
1108 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
1109 U32 const divider = (cParams.minMatch==3) ? 3 : 4;
1110 size_t const maxNbSeq = blockSize / divider;
1111 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1112 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
1113 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1114 size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
1115 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1116 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
1117
1118 size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
1119 size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq));
1120
1121 size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace +
1122 matchStateSize + ldmSpace + ldmSeqSpace;
1123 size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx));
1124
1125 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace);
1126 DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
1127 return cctxSpace + neededSpace;
1128 }
1129 }
1130
ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)1131 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
1132 {
1133 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1134 return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms);
1135 }
1136
ZSTD_estimateCCtxSize_internal(int compressionLevel)1137 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
1138 {
1139 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
1140 return ZSTD_estimateCCtxSize_usingCParams(cParams);
1141 }
1142
ZSTD_estimateCCtxSize(int compressionLevel)1143 size_t ZSTD_estimateCCtxSize(int compressionLevel)
1144 {
1145 int level;
1146 size_t memBudget = 0;
1147 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1148 size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
1149 if (newMB > memBudget) memBudget = newMB;
1150 }
1151 return memBudget;
1152 }
1153
ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params * params)1154 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1155 {
1156 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1157 { ZSTD_compressionParameters const cParams =
1158 ZSTD_getCParamsFromCCtxParams(params, 0, 0);
1159 size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
1160 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
1161 size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
1162 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
1163 size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize)
1164 + ZSTD_cwksp_alloc_size(outBuffSize);
1165
1166 return CCtxSize + streamingSize;
1167 }
1168 }
1169
ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)1170 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
1171 {
1172 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1173 return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms);
1174 }
1175
ZSTD_estimateCStreamSize_internal(int compressionLevel)1176 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
1177 {
1178 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
1179 return ZSTD_estimateCStreamSize_usingCParams(cParams);
1180 }
1181
ZSTD_estimateCStreamSize(int compressionLevel)1182 size_t ZSTD_estimateCStreamSize(int compressionLevel)
1183 {
1184 int level;
1185 size_t memBudget = 0;
1186 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1187 size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
1188 if (newMB > memBudget) memBudget = newMB;
1189 }
1190 return memBudget;
1191 }
1192
1193 /* ZSTD_getFrameProgression():
1194 * tells how much data has been consumed (input) and produced (output) for current frame.
1195 * able to count progression inside worker threads (non-blocking mode).
1196 */
ZSTD_getFrameProgression(const ZSTD_CCtx * cctx)1197 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
1198 {
1199 #ifdef ZSTD_MULTITHREAD
1200 if (cctx->appliedParams.nbWorkers > 0) {
1201 return ZSTDMT_getFrameProgression(cctx->mtctx);
1202 }
1203 #endif
1204 { ZSTD_frameProgression fp;
1205 size_t const buffered = (cctx->inBuff == NULL) ? 0 :
1206 cctx->inBuffPos - cctx->inToCompress;
1207 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
1208 assert(buffered <= ZSTD_BLOCKSIZE_MAX);
1209 fp.ingested = cctx->consumedSrcSize + buffered;
1210 fp.consumed = cctx->consumedSrcSize;
1211 fp.produced = cctx->producedCSize;
1212 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */
1213 fp.currentJobID = 0;
1214 fp.nbActiveWorkers = 0;
1215 return fp;
1216 } }
1217
1218 /*! ZSTD_toFlushNow()
1219 * Only useful for multithreading scenarios currently (nbWorkers >= 1).
1220 */
ZSTD_toFlushNow(ZSTD_CCtx * cctx)1221 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
1222 {
1223 #ifdef ZSTD_MULTITHREAD
1224 if (cctx->appliedParams.nbWorkers > 0) {
1225 return ZSTDMT_toFlushNow(cctx->mtctx);
1226 }
1227 #endif
1228 (void)cctx;
1229 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
1230 }
1231
ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,ZSTD_compressionParameters cParams2)1232 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
1233 ZSTD_compressionParameters cParams2)
1234 {
1235 (void)cParams1;
1236 (void)cParams2;
1237 assert(cParams1.windowLog == cParams2.windowLog);
1238 assert(cParams1.chainLog == cParams2.chainLog);
1239 assert(cParams1.hashLog == cParams2.hashLog);
1240 assert(cParams1.searchLog == cParams2.searchLog);
1241 assert(cParams1.minMatch == cParams2.minMatch);
1242 assert(cParams1.targetLength == cParams2.targetLength);
1243 assert(cParams1.strategy == cParams2.strategy);
1244 }
1245
ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t * bs)1246 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
1247 {
1248 int i;
1249 for (i = 0; i < ZSTD_REP_NUM; ++i)
1250 bs->rep[i] = repStartValue[i];
1251 bs->entropy.huf.repeatMode = HUF_repeat_none;
1252 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
1253 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
1254 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
1255 }
1256
1257 /*! ZSTD_invalidateMatchState()
1258 * Invalidate all the matches in the match finder tables.
1259 * Requires nextSrc and base to be set (can be NULL).
1260 */
ZSTD_invalidateMatchState(ZSTD_matchState_t * ms)1261 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
1262 {
1263 ZSTD_window_clear(&ms->window);
1264
1265 ms->nextToUpdate = ms->window.dictLimit;
1266 ms->loadedDictEnd = 0;
1267 ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1268 ms->dictMatchState = NULL;
1269 }
1270
1271 /**
1272 * Indicates whether this compression proceeds directly from user-provided
1273 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
1274 * whether the context needs to buffer the input/output (ZSTDb_buffered).
1275 */
1276 typedef enum {
1277 ZSTDb_not_buffered,
1278 ZSTDb_buffered
1279 } ZSTD_buffered_policy_e;
1280
1281 /**
1282 * Controls, for this matchState reset, whether the tables need to be cleared /
1283 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
1284 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
1285 * subsequent operation will overwrite the table space anyways (e.g., copying
1286 * the matchState contents in from a CDict).
1287 */
1288 typedef enum {
1289 ZSTDcrp_makeClean,
1290 ZSTDcrp_leaveDirty
1291 } ZSTD_compResetPolicy_e;
1292
1293 /**
1294 * Controls, for this matchState reset, whether indexing can continue where it
1295 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
1296 * (ZSTDirp_reset).
1297 */
1298 typedef enum {
1299 ZSTDirp_continue,
1300 ZSTDirp_reset
1301 } ZSTD_indexResetPolicy_e;
1302
1303 typedef enum {
1304 ZSTD_resetTarget_CDict,
1305 ZSTD_resetTarget_CCtx
1306 } ZSTD_resetTarget_e;
1307
1308 static size_t
ZSTD_reset_matchState(ZSTD_matchState_t * ms,ZSTD_cwksp * ws,const ZSTD_compressionParameters * cParams,const ZSTD_compResetPolicy_e crp,const ZSTD_indexResetPolicy_e forceResetIndex,const ZSTD_resetTarget_e forWho)1309 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1310 ZSTD_cwksp* ws,
1311 const ZSTD_compressionParameters* cParams,
1312 const ZSTD_compResetPolicy_e crp,
1313 const ZSTD_indexResetPolicy_e forceResetIndex,
1314 const ZSTD_resetTarget_e forWho)
1315 {
1316 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1317 size_t const hSize = ((size_t)1) << cParams->hashLog;
1318 U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1319 size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1320
1321 DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
1322 if (forceResetIndex == ZSTDirp_reset) {
1323 memset(&ms->window, 0, sizeof(ms->window));
1324 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
1325 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1326 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
1327 ZSTD_cwksp_mark_tables_dirty(ws);
1328 }
1329
1330 ms->hashLog3 = hashLog3;
1331
1332 ZSTD_invalidateMatchState(ms);
1333
1334 assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
1335
1336 ZSTD_cwksp_clear_tables(ws);
1337
1338 DEBUGLOG(5, "reserving table space");
1339 /* table Space */
1340 ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
1341 ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
1342 ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
1343 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1344 "failed a workspace allocation in ZSTD_reset_matchState");
1345
1346 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
1347 if (crp!=ZSTDcrp_leaveDirty) {
1348 /* reset tables only */
1349 ZSTD_cwksp_clean_tables(ws);
1350 }
1351
1352 /* opt parser space */
1353 if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
1354 DEBUGLOG(4, "reserving optimal parser space");
1355 ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
1356 ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
1357 ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
1358 ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
1359 ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
1360 ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1361 }
1362
1363 ms->cParams = *cParams;
1364
1365 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1366 "failed a workspace allocation in ZSTD_reset_matchState");
1367
1368 return 0;
1369 }
1370
1371 /* ZSTD_indexTooCloseToMax() :
1372 * minor optimization : prefer memset() rather than reduceIndex()
1373 * which is measurably slow in some circumstances (reported for Visual Studio).
1374 * Works when re-using a context for a lot of smallish inputs :
1375 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
1376 * memset() will be triggered before reduceIndex().
1377 */
1378 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
ZSTD_indexTooCloseToMax(ZSTD_window_t w)1379 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
1380 {
1381 return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1382 }
1383
1384 /*! ZSTD_resetCCtx_internal() :
1385 note : `params` are assumed fully validated at this stage */
ZSTD_resetCCtx_internal(ZSTD_CCtx * zc,ZSTD_CCtx_params params,U64 const pledgedSrcSize,ZSTD_compResetPolicy_e const crp,ZSTD_buffered_policy_e const zbuff)1386 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1387 ZSTD_CCtx_params params,
1388 U64 const pledgedSrcSize,
1389 ZSTD_compResetPolicy_e const crp,
1390 ZSTD_buffered_policy_e const zbuff)
1391 {
1392 ZSTD_cwksp* const ws = &zc->workspace;
1393 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
1394 (U32)pledgedSrcSize, params.cParams.windowLog);
1395 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1396
1397 zc->isFirstBlock = 1;
1398
1399 if (params.ldmParams.enableLdm) {
1400 /* Adjust long distance matching parameters */
1401 ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
1402 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
1403 assert(params.ldmParams.hashRateLog < 32);
1404 zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
1405 }
1406
1407 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1408 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1409 U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
1410 size_t const maxNbSeq = blockSize / divider;
1411 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1412 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
1413 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1414 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
1415 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
1416 size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1);
1417 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
1418
1419 ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue;
1420
1421 if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
1422 needsIndexReset = ZSTDirp_reset;
1423 }
1424
1425 ZSTD_cwksp_bump_oversized_duration(ws, 0);
1426
1427 /* Check if workspace is large enough, alloc a new one if needed */
1428 { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
1429 size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
1430 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1431 size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize);
1432 size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
1433 size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq));
1434
1435 size_t const neededSpace =
1436 cctxSpace +
1437 entropySpace +
1438 blockStateSpace +
1439 ldmSpace +
1440 ldmSeqSpace +
1441 matchStateSize +
1442 tokenSpace +
1443 bufferSpace;
1444
1445 int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
1446 int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
1447
1448 DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
1449 neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
1450 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1451
1452 if (workspaceTooSmall || workspaceWasteful) {
1453 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1454 ZSTD_cwksp_sizeof(ws) >> 10,
1455 neededSpace >> 10);
1456
1457 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
1458
1459 needsIndexReset = ZSTDirp_reset;
1460
1461 ZSTD_cwksp_free(ws, zc->customMem);
1462 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem));
1463
1464 DEBUGLOG(5, "reserving object space");
1465 /* Statically sized space.
1466 * entropyWorkspace never moves,
1467 * though prev/next block swap places */
1468 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
1469 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1470 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
1471 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
1472 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
1473 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE);
1474 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
1475 } }
1476
1477 ZSTD_cwksp_clear(ws);
1478
1479 /* init params */
1480 zc->appliedParams = params;
1481 zc->blockState.matchState.cParams = params.cParams;
1482 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1483 zc->consumedSrcSize = 0;
1484 zc->producedCSize = 0;
1485 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1486 zc->appliedParams.fParams.contentSizeFlag = 0;
1487 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1488 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
1489 zc->blockSize = blockSize;
1490
1491 XXH64_reset(&zc->xxhState, 0);
1492 zc->stage = ZSTDcs_init;
1493 zc->dictID = 0;
1494
1495 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
1496
1497 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1498 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1499 */
1500 zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
1501 zc->seqStore.maxNbLit = blockSize;
1502
1503 /* buffers */
1504 zc->inBuffSize = buffInSize;
1505 zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
1506 zc->outBuffSize = buffOutSize;
1507 zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
1508
1509 /* ldm bucketOffsets table */
1510 if (params.ldmParams.enableLdm) {
1511 /* TODO: avoid memset? */
1512 size_t const ldmBucketSize =
1513 ((size_t)1) << (params.ldmParams.hashLog -
1514 params.ldmParams.bucketSizeLog);
1515 zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
1516 memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
1517 }
1518
1519 /* sequences storage */
1520 ZSTD_referenceExternalSequences(zc, NULL, 0);
1521 zc->seqStore.maxNbSeq = maxNbSeq;
1522 zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1523 zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1524 zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
1525 zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
1526
1527 FORWARD_IF_ERROR(ZSTD_reset_matchState(
1528 &zc->blockState.matchState,
1529 ws,
1530 ¶ms.cParams,
1531 crp,
1532 needsIndexReset,
1533 ZSTD_resetTarget_CCtx));
1534
1535 /* ldm hash table */
1536 if (params.ldmParams.enableLdm) {
1537 /* TODO: avoid memset? */
1538 size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
1539 zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
1540 memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
1541 zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
1542 zc->maxNbLdmSequences = maxNbLdmSeq;
1543
1544 memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
1545 ZSTD_window_clear(&zc->ldmState.window);
1546 }
1547
1548 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
1549
1550 return 0;
1551 }
1552 }
1553
1554 /* ZSTD_invalidateRepCodes() :
1555 * ensures next compression will not use repcodes from previous block.
1556 * Note : only works with regular variant;
1557 * do not use with extDict variant ! */
ZSTD_invalidateRepCodes(ZSTD_CCtx * cctx)1558 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
1559 int i;
1560 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
1561 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
1562 }
1563
1564 /* These are the approximate sizes for each strategy past which copying the
1565 * dictionary tables into the working context is faster than using them
1566 * in-place.
1567 */
1568 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
1569 8 KB, /* unused */
1570 8 KB, /* ZSTD_fast */
1571 16 KB, /* ZSTD_dfast */
1572 32 KB, /* ZSTD_greedy */
1573 32 KB, /* ZSTD_lazy */
1574 32 KB, /* ZSTD_lazy2 */
1575 32 KB, /* ZSTD_btlazy2 */
1576 32 KB, /* ZSTD_btopt */
1577 8 KB, /* ZSTD_btultra */
1578 8 KB /* ZSTD_btultra2 */
1579 };
1580
ZSTD_shouldAttachDict(const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize)1581 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1582 const ZSTD_CCtx_params* params,
1583 U64 pledgedSrcSize)
1584 {
1585 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
1586 return ( pledgedSrcSize <= cutoff
1587 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
1588 || params->attachDictPref == ZSTD_dictForceAttach )
1589 && params->attachDictPref != ZSTD_dictForceCopy
1590 && !params->forceWindow; /* dictMatchState isn't correctly
1591 * handled in _enforceMaxDist */
1592 }
1593
1594 static size_t
ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,ZSTD_CCtx_params params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1595 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1596 const ZSTD_CDict* cdict,
1597 ZSTD_CCtx_params params,
1598 U64 pledgedSrcSize,
1599 ZSTD_buffered_policy_e zbuff)
1600 {
1601 { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
1602 unsigned const windowLog = params.cParams.windowLog;
1603 assert(windowLog != 0);
1604 /* Resize working context table params for input only, since the dict
1605 * has its own tables. */
1606 params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
1607 params.cParams.windowLog = windowLog;
1608 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1609 ZSTDcrp_makeClean, zbuff));
1610 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1611 }
1612
1613 { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1614 - cdict->matchState.window.base);
1615 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
1616 if (cdictLen == 0) {
1617 /* don't even attach dictionaries with no contents */
1618 DEBUGLOG(4, "skipping attaching empty dictionary");
1619 } else {
1620 DEBUGLOG(4, "attaching dictionary into context");
1621 cctx->blockState.matchState.dictMatchState = &cdict->matchState;
1622
1623 /* prep working match state so dict matches never have negative indices
1624 * when they are translated to the working context's index space. */
1625 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
1626 cctx->blockState.matchState.window.nextSrc =
1627 cctx->blockState.matchState.window.base + cdictEnd;
1628 ZSTD_window_clear(&cctx->blockState.matchState.window);
1629 }
1630 /* loadedDictEnd is expressed within the referential of the active context */
1631 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
1632 } }
1633
1634 cctx->dictID = cdict->dictID;
1635
1636 /* copy block state */
1637 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1638
1639 return 0;
1640 }
1641
ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,ZSTD_CCtx_params params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1642 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1643 const ZSTD_CDict* cdict,
1644 ZSTD_CCtx_params params,
1645 U64 pledgedSrcSize,
1646 ZSTD_buffered_policy_e zbuff)
1647 {
1648 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1649
1650 DEBUGLOG(4, "copying dictionary into context");
1651
1652 { unsigned const windowLog = params.cParams.windowLog;
1653 assert(windowLog != 0);
1654 /* Copy only compression parameters related to tables. */
1655 params.cParams = *cdict_cParams;
1656 params.cParams.windowLog = windowLog;
1657 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1658 ZSTDcrp_leaveDirty, zbuff));
1659 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1660 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
1661 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
1662 }
1663
1664 ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
1665
1666 /* copy tables */
1667 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
1668 size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
1669
1670 memcpy(cctx->blockState.matchState.hashTable,
1671 cdict->matchState.hashTable,
1672 hSize * sizeof(U32));
1673 memcpy(cctx->blockState.matchState.chainTable,
1674 cdict->matchState.chainTable,
1675 chainSize * sizeof(U32));
1676 }
1677
1678 /* Zero the hashTable3, since the cdict never fills it */
1679 { int const h3log = cctx->blockState.matchState.hashLog3;
1680 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
1681 assert(cdict->matchState.hashLog3 == 0);
1682 memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
1683 }
1684
1685 ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
1686
1687 /* copy dictionary offsets */
1688 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
1689 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
1690 dstMatchState->window = srcMatchState->window;
1691 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1692 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1693 }
1694
1695 cctx->dictID = cdict->dictID;
1696
1697 /* copy block state */
1698 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1699
1700 return 0;
1701 }
1702
1703 /* We have a choice between copying the dictionary context into the working
1704 * context, or referencing the dictionary context from the working context
1705 * in-place. We decide here which strategy to use. */
ZSTD_resetCCtx_usingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1706 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1707 const ZSTD_CDict* cdict,
1708 const ZSTD_CCtx_params* params,
1709 U64 pledgedSrcSize,
1710 ZSTD_buffered_policy_e zbuff)
1711 {
1712
1713 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
1714 (unsigned)pledgedSrcSize);
1715
1716 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1717 return ZSTD_resetCCtx_byAttachingCDict(
1718 cctx, cdict, *params, pledgedSrcSize, zbuff);
1719 } else {
1720 return ZSTD_resetCCtx_byCopyingCDict(
1721 cctx, cdict, *params, pledgedSrcSize, zbuff);
1722 }
1723 }
1724
1725 /*! ZSTD_copyCCtx_internal() :
1726 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1727 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
1728 * The "context", in this case, refers to the hash and chain tables,
1729 * entropy tables, and dictionary references.
1730 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
1731 * @return : 0, or an error code */
ZSTD_copyCCtx_internal(ZSTD_CCtx * dstCCtx,const ZSTD_CCtx * srcCCtx,ZSTD_frameParameters fParams,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)1732 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
1733 const ZSTD_CCtx* srcCCtx,
1734 ZSTD_frameParameters fParams,
1735 U64 pledgedSrcSize,
1736 ZSTD_buffered_policy_e zbuff)
1737 {
1738 DEBUGLOG(5, "ZSTD_copyCCtx_internal");
1739 RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong);
1740
1741 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
1742 { ZSTD_CCtx_params params = dstCCtx->requestedParams;
1743 /* Copy only compression parameters related to tables. */
1744 params.cParams = srcCCtx->appliedParams.cParams;
1745 params.fParams = fParams;
1746 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
1747 ZSTDcrp_leaveDirty, zbuff);
1748 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
1749 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
1750 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
1751 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
1752 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
1753 }
1754
1755 ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
1756
1757 /* copy tables */
1758 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
1759 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
1760 int const h3log = srcCCtx->blockState.matchState.hashLog3;
1761 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
1762
1763 memcpy(dstCCtx->blockState.matchState.hashTable,
1764 srcCCtx->blockState.matchState.hashTable,
1765 hSize * sizeof(U32));
1766 memcpy(dstCCtx->blockState.matchState.chainTable,
1767 srcCCtx->blockState.matchState.chainTable,
1768 chainSize * sizeof(U32));
1769 memcpy(dstCCtx->blockState.matchState.hashTable3,
1770 srcCCtx->blockState.matchState.hashTable3,
1771 h3Size * sizeof(U32));
1772 }
1773
1774 ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
1775
1776 /* copy dictionary offsets */
1777 {
1778 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
1779 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
1780 dstMatchState->window = srcMatchState->window;
1781 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1782 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1783 }
1784 dstCCtx->dictID = srcCCtx->dictID;
1785
1786 /* copy block state */
1787 memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
1788
1789 return 0;
1790 }
1791
1792 /*! ZSTD_copyCCtx() :
1793 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1794 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
1795 * pledgedSrcSize==0 means "unknown".
1796 * @return : 0, or an error code */
ZSTD_copyCCtx(ZSTD_CCtx * dstCCtx,const ZSTD_CCtx * srcCCtx,unsigned long long pledgedSrcSize)1797 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
1798 {
1799 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
1800 ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
1801 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
1802 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1803 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
1804
1805 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
1806 fParams, pledgedSrcSize,
1807 zbuff);
1808 }
1809
1810
1811 #define ZSTD_ROWSIZE 16
1812 /*! ZSTD_reduceTable() :
1813 * reduce table indexes by `reducerValue`, or squash to zero.
1814 * PreserveMark preserves "unsorted mark" for btlazy2 strategy.
1815 * It must be set to a clear 0/1 value, to remove branch during inlining.
1816 * Presume table size is a multiple of ZSTD_ROWSIZE
1817 * to help auto-vectorization */
1818 FORCE_INLINE_TEMPLATE void
ZSTD_reduceTable_internal(U32 * const table,U32 const size,U32 const reducerValue,int const preserveMark)1819 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
1820 {
1821 int const nbRows = (int)size / ZSTD_ROWSIZE;
1822 int cellNb = 0;
1823 int rowNb;
1824 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
1825 assert(size < (1U<<31)); /* can be casted to int */
1826
1827 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
1828 /* To validate that the table re-use logic is sound, and that we don't
1829 * access table space that we haven't cleaned, we re-"poison" the table
1830 * space every time we mark it dirty.
1831 *
1832 * This function however is intended to operate on those dirty tables and
1833 * re-clean them. So when this function is used correctly, we can unpoison
1834 * the memory it operated on. This introduces a blind spot though, since
1835 * if we now try to operate on __actually__ poisoned memory, we will not
1836 * detect that. */
1837 __msan_unpoison(table, size * sizeof(U32));
1838 #endif
1839
1840 for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
1841 int column;
1842 for (column=0; column<ZSTD_ROWSIZE; column++) {
1843 if (preserveMark) {
1844 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
1845 table[cellNb] += adder;
1846 }
1847 if (table[cellNb] < reducerValue) table[cellNb] = 0;
1848 else table[cellNb] -= reducerValue;
1849 cellNb++;
1850 } }
1851 }
1852
ZSTD_reduceTable(U32 * const table,U32 const size,U32 const reducerValue)1853 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
1854 {
1855 ZSTD_reduceTable_internal(table, size, reducerValue, 0);
1856 }
1857
ZSTD_reduceTable_btlazy2(U32 * const table,U32 const size,U32 const reducerValue)1858 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
1859 {
1860 ZSTD_reduceTable_internal(table, size, reducerValue, 1);
1861 }
1862
1863 /*! ZSTD_reduceIndex() :
1864 * rescale all indexes to avoid future overflow (indexes are U32) */
ZSTD_reduceIndex(ZSTD_matchState_t * ms,ZSTD_CCtx_params const * params,const U32 reducerValue)1865 static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
1866 {
1867 { U32 const hSize = (U32)1 << params->cParams.hashLog;
1868 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
1869 }
1870
1871 if (params->cParams.strategy != ZSTD_fast) {
1872 U32 const chainSize = (U32)1 << params->cParams.chainLog;
1873 if (params->cParams.strategy == ZSTD_btlazy2)
1874 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
1875 else
1876 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
1877 }
1878
1879 if (ms->hashLog3) {
1880 U32 const h3Size = (U32)1 << ms->hashLog3;
1881 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
1882 }
1883 }
1884
1885
1886 /*-*******************************************************
1887 * Block entropic compression
1888 *********************************************************/
1889
1890 /* See doc/zstd_compression_format.md for detailed format description */
1891
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)1892 static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
1893 {
1894 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
1895 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
1896 dstSize_tooSmall);
1897 MEM_writeLE24(dst, cBlockHeader24);
1898 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
1899 return ZSTD_blockHeaderSize + srcSize;
1900 }
1901
ZSTD_seqToCodes(const seqStore_t * seqStorePtr)1902 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
1903 {
1904 const seqDef* const sequences = seqStorePtr->sequencesStart;
1905 BYTE* const llCodeTable = seqStorePtr->llCode;
1906 BYTE* const ofCodeTable = seqStorePtr->ofCode;
1907 BYTE* const mlCodeTable = seqStorePtr->mlCode;
1908 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1909 U32 u;
1910 assert(nbSeq <= seqStorePtr->maxNbSeq);
1911 for (u=0; u<nbSeq; u++) {
1912 U32 const llv = sequences[u].litLength;
1913 U32 const mlv = sequences[u].matchLength;
1914 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
1915 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
1916 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
1917 }
1918 if (seqStorePtr->longLengthID==1)
1919 llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
1920 if (seqStorePtr->longLengthID==2)
1921 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
1922 }
1923
ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params * cctxParams)1924 static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
1925 {
1926 switch (cctxParams->literalCompressionMode) {
1927 case ZSTD_lcm_huffman:
1928 return 0;
1929 case ZSTD_lcm_uncompressed:
1930 return 1;
1931 default:
1932 assert(0 /* impossible: pre-validated */);
1933 /* fall-through */
1934 case ZSTD_lcm_auto:
1935 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
1936 }
1937 }
1938
1939 /* ZSTD_compressSequences_internal():
1940 * actually compresses both literals and sequences */
1941 MEM_STATIC size_t
ZSTD_compressSequences_internal(seqStore_t * seqStorePtr,const ZSTD_entropyCTables_t * prevEntropy,ZSTD_entropyCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,void * dst,size_t dstCapacity,void * entropyWorkspace,size_t entropyWkspSize,const int bmi2)1942 ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
1943 const ZSTD_entropyCTables_t* prevEntropy,
1944 ZSTD_entropyCTables_t* nextEntropy,
1945 const ZSTD_CCtx_params* cctxParams,
1946 void* dst, size_t dstCapacity,
1947 void* entropyWorkspace, size_t entropyWkspSize,
1948 const int bmi2)
1949 {
1950 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
1951 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
1952 unsigned count[MaxSeq+1];
1953 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
1954 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
1955 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
1956 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
1957 const seqDef* const sequences = seqStorePtr->sequencesStart;
1958 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
1959 const BYTE* const llCodeTable = seqStorePtr->llCode;
1960 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
1961 BYTE* const ostart = (BYTE*)dst;
1962 BYTE* const oend = ostart + dstCapacity;
1963 BYTE* op = ostart;
1964 size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1965 BYTE* seqHead;
1966 BYTE* lastNCount = NULL;
1967
1968 DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
1969 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
1970
1971 /* Compress literals */
1972 { const BYTE* const literals = seqStorePtr->litStart;
1973 size_t const litSize = (size_t)(seqStorePtr->lit - literals);
1974 size_t const cSize = ZSTD_compressLiterals(
1975 &prevEntropy->huf, &nextEntropy->huf,
1976 cctxParams->cParams.strategy,
1977 ZSTD_disableLiteralsCompression(cctxParams),
1978 op, dstCapacity,
1979 literals, litSize,
1980 entropyWorkspace, entropyWkspSize,
1981 bmi2);
1982 FORWARD_IF_ERROR(cSize);
1983 assert(cSize <= dstCapacity);
1984 op += cSize;
1985 }
1986
1987 /* Sequences Header */
1988 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
1989 dstSize_tooSmall);
1990 if (nbSeq < 128) {
1991 *op++ = (BYTE)nbSeq;
1992 } else if (nbSeq < LONGNBSEQ) {
1993 op[0] = (BYTE)((nbSeq>>8) + 0x80);
1994 op[1] = (BYTE)nbSeq;
1995 op+=2;
1996 } else {
1997 op[0]=0xFF;
1998 MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
1999 op+=3;
2000 }
2001 assert(op <= oend);
2002 if (nbSeq==0) {
2003 /* Copy the old tables over as if we repeated them */
2004 memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
2005 return (size_t)(op - ostart);
2006 }
2007
2008 /* seqHead : flags for FSE encoding type */
2009 seqHead = op++;
2010 assert(op <= oend);
2011
2012 /* convert length/distances into codes */
2013 ZSTD_seqToCodes(seqStorePtr);
2014 /* build CTable for Literal Lengths */
2015 { unsigned max = MaxLL;
2016 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2017 DEBUGLOG(5, "Building LL table");
2018 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2019 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
2020 count, max, mostFrequent, nbSeq,
2021 LLFSELog, prevEntropy->fse.litlengthCTable,
2022 LL_defaultNorm, LL_defaultNormLog,
2023 ZSTD_defaultAllowed, strategy);
2024 assert(set_basic < set_compressed && set_rle < set_compressed);
2025 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2026 { size_t const countSize = ZSTD_buildCTable(
2027 op, (size_t)(oend - op),
2028 CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2029 count, max, llCodeTable, nbSeq,
2030 LL_defaultNorm, LL_defaultNormLog, MaxLL,
2031 prevEntropy->fse.litlengthCTable,
2032 sizeof(prevEntropy->fse.litlengthCTable),
2033 entropyWorkspace, entropyWkspSize);
2034 FORWARD_IF_ERROR(countSize);
2035 if (LLtype == set_compressed)
2036 lastNCount = op;
2037 op += countSize;
2038 assert(op <= oend);
2039 } }
2040 /* build CTable for Offsets */
2041 { unsigned max = MaxOff;
2042 size_t const mostFrequent = HIST_countFast_wksp(
2043 count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2044 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2045 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2046 DEBUGLOG(5, "Building OF table");
2047 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2048 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
2049 count, max, mostFrequent, nbSeq,
2050 OffFSELog, prevEntropy->fse.offcodeCTable,
2051 OF_defaultNorm, OF_defaultNormLog,
2052 defaultPolicy, strategy);
2053 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2054 { size_t const countSize = ZSTD_buildCTable(
2055 op, (size_t)(oend - op),
2056 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2057 count, max, ofCodeTable, nbSeq,
2058 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2059 prevEntropy->fse.offcodeCTable,
2060 sizeof(prevEntropy->fse.offcodeCTable),
2061 entropyWorkspace, entropyWkspSize);
2062 FORWARD_IF_ERROR(countSize);
2063 if (Offtype == set_compressed)
2064 lastNCount = op;
2065 op += countSize;
2066 assert(op <= oend);
2067 } }
2068 /* build CTable for MatchLengths */
2069 { unsigned max = MaxML;
2070 size_t const mostFrequent = HIST_countFast_wksp(
2071 count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2072 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2073 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2074 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
2075 count, max, mostFrequent, nbSeq,
2076 MLFSELog, prevEntropy->fse.matchlengthCTable,
2077 ML_defaultNorm, ML_defaultNormLog,
2078 ZSTD_defaultAllowed, strategy);
2079 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2080 { size_t const countSize = ZSTD_buildCTable(
2081 op, (size_t)(oend - op),
2082 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2083 count, max, mlCodeTable, nbSeq,
2084 ML_defaultNorm, ML_defaultNormLog, MaxML,
2085 prevEntropy->fse.matchlengthCTable,
2086 sizeof(prevEntropy->fse.matchlengthCTable),
2087 entropyWorkspace, entropyWkspSize);
2088 FORWARD_IF_ERROR(countSize);
2089 if (MLtype == set_compressed)
2090 lastNCount = op;
2091 op += countSize;
2092 assert(op <= oend);
2093 } }
2094
2095 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
2096
2097 { size_t const bitstreamSize = ZSTD_encodeSequences(
2098 op, (size_t)(oend - op),
2099 CTable_MatchLength, mlCodeTable,
2100 CTable_OffsetBits, ofCodeTable,
2101 CTable_LitLength, llCodeTable,
2102 sequences, nbSeq,
2103 longOffsets, bmi2);
2104 FORWARD_IF_ERROR(bitstreamSize);
2105 op += bitstreamSize;
2106 assert(op <= oend);
2107 /* zstd versions <= 1.3.4 mistakenly report corruption when
2108 * FSE_readNCount() receives a buffer < 4 bytes.
2109 * Fixed by https://github.com/facebook/zstd/pull/1146.
2110 * This can happen when the last set_compressed table present is 2
2111 * bytes and the bitstream is only one byte.
2112 * In this exceedingly rare case, we will simply emit an uncompressed
2113 * block, since it isn't worth optimizing.
2114 */
2115 if (lastNCount && (op - lastNCount) < 4) {
2116 /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2117 assert(op - lastNCount == 3);
2118 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2119 "emitting an uncompressed block.");
2120 return 0;
2121 }
2122 }
2123
2124 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
2125 return (size_t)(op - ostart);
2126 }
2127
2128 MEM_STATIC size_t
ZSTD_compressSequences(seqStore_t * seqStorePtr,const ZSTD_entropyCTables_t * prevEntropy,ZSTD_entropyCTables_t * nextEntropy,const ZSTD_CCtx_params * cctxParams,void * dst,size_t dstCapacity,size_t srcSize,void * entropyWorkspace,size_t entropyWkspSize,int bmi2)2129 ZSTD_compressSequences(seqStore_t* seqStorePtr,
2130 const ZSTD_entropyCTables_t* prevEntropy,
2131 ZSTD_entropyCTables_t* nextEntropy,
2132 const ZSTD_CCtx_params* cctxParams,
2133 void* dst, size_t dstCapacity,
2134 size_t srcSize,
2135 void* entropyWorkspace, size_t entropyWkspSize,
2136 int bmi2)
2137 {
2138 size_t const cSize = ZSTD_compressSequences_internal(
2139 seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2140 dst, dstCapacity,
2141 entropyWorkspace, entropyWkspSize, bmi2);
2142 if (cSize == 0) return 0;
2143 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2144 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2145 */
2146 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
2147 return 0; /* block not compressed */
2148 FORWARD_IF_ERROR(cSize);
2149
2150 /* Check compressibility */
2151 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
2152 if (cSize >= maxCSize) return 0; /* block not compressed */
2153 }
2154
2155 return cSize;
2156 }
2157
2158 /* ZSTD_selectBlockCompressor() :
2159 * Not static, but internal use only (used by long distance matcher)
2160 * assumption : strat is a valid strategy */
ZSTD_selectBlockCompressor(ZSTD_strategy strat,ZSTD_dictMode_e dictMode)2161 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
2162 {
2163 static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = {
2164 { ZSTD_compressBlock_fast /* default for 0 */,
2165 ZSTD_compressBlock_fast,
2166 ZSTD_compressBlock_doubleFast,
2167 ZSTD_compressBlock_greedy,
2168 ZSTD_compressBlock_lazy,
2169 ZSTD_compressBlock_lazy2,
2170 ZSTD_compressBlock_btlazy2,
2171 ZSTD_compressBlock_btopt,
2172 ZSTD_compressBlock_btultra,
2173 ZSTD_compressBlock_btultra2 },
2174 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
2175 ZSTD_compressBlock_fast_extDict,
2176 ZSTD_compressBlock_doubleFast_extDict,
2177 ZSTD_compressBlock_greedy_extDict,
2178 ZSTD_compressBlock_lazy_extDict,
2179 ZSTD_compressBlock_lazy2_extDict,
2180 ZSTD_compressBlock_btlazy2_extDict,
2181 ZSTD_compressBlock_btopt_extDict,
2182 ZSTD_compressBlock_btultra_extDict,
2183 ZSTD_compressBlock_btultra_extDict },
2184 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
2185 ZSTD_compressBlock_fast_dictMatchState,
2186 ZSTD_compressBlock_doubleFast_dictMatchState,
2187 ZSTD_compressBlock_greedy_dictMatchState,
2188 ZSTD_compressBlock_lazy_dictMatchState,
2189 ZSTD_compressBlock_lazy2_dictMatchState,
2190 ZSTD_compressBlock_btlazy2_dictMatchState,
2191 ZSTD_compressBlock_btopt_dictMatchState,
2192 ZSTD_compressBlock_btultra_dictMatchState,
2193 ZSTD_compressBlock_btultra_dictMatchState }
2194 };
2195 ZSTD_blockCompressor selectedCompressor;
2196 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2197
2198 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
2199 selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2200 assert(selectedCompressor != NULL);
2201 return selectedCompressor;
2202 }
2203
ZSTD_storeLastLiterals(seqStore_t * seqStorePtr,const BYTE * anchor,size_t lastLLSize)2204 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
2205 const BYTE* anchor, size_t lastLLSize)
2206 {
2207 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2208 seqStorePtr->lit += lastLLSize;
2209 }
2210
ZSTD_resetSeqStore(seqStore_t * ssPtr)2211 void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2212 {
2213 ssPtr->lit = ssPtr->litStart;
2214 ssPtr->sequences = ssPtr->sequencesStart;
2215 ssPtr->longLengthID = 0;
2216 }
2217
2218 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
2219
ZSTD_buildSeqStore(ZSTD_CCtx * zc,const void * src,size_t srcSize)2220 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2221 {
2222 ZSTD_matchState_t* const ms = &zc->blockState.matchState;
2223 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
2224 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2225 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2226 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2227 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
2228 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
2229 return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
2230 }
2231 ZSTD_resetSeqStore(&(zc->seqStore));
2232 /* required for optimal parser to read stats from dictionary */
2233 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
2234 /* tell the optimal parser how we expect to compress literals */
2235 ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
2236 /* a gap between an attached dict and the current window is not safe,
2237 * they must remain adjacent,
2238 * and when that stops being the case, the dict must be unset */
2239 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
2240
2241 /* limited update after a very long match */
2242 { const BYTE* const base = ms->window.base;
2243 const BYTE* const istart = (const BYTE*)src;
2244 const U32 current = (U32)(istart-base);
2245 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
2246 if (current > ms->nextToUpdate + 384)
2247 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
2248 }
2249
2250 /* select and store sequences */
2251 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
2252 size_t lastLLSize;
2253 { int i;
2254 for (i = 0; i < ZSTD_REP_NUM; ++i)
2255 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
2256 }
2257 if (zc->externSeqStore.pos < zc->externSeqStore.size) {
2258 assert(!zc->appliedParams.ldmParams.enableLdm);
2259 /* Updates ldmSeqStore.pos */
2260 lastLLSize =
2261 ZSTD_ldm_blockCompress(&zc->externSeqStore,
2262 ms, &zc->seqStore,
2263 zc->blockState.nextCBlock->rep,
2264 src, srcSize);
2265 assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
2266 } else if (zc->appliedParams.ldmParams.enableLdm) {
2267 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
2268
2269 ldmSeqStore.seq = zc->ldmSequences;
2270 ldmSeqStore.capacity = zc->maxNbLdmSequences;
2271 /* Updates ldmSeqStore.size */
2272 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
2273 &zc->appliedParams.ldmParams,
2274 src, srcSize));
2275 /* Updates ldmSeqStore.pos */
2276 lastLLSize =
2277 ZSTD_ldm_blockCompress(&ldmSeqStore,
2278 ms, &zc->seqStore,
2279 zc->blockState.nextCBlock->rep,
2280 src, srcSize);
2281 assert(ldmSeqStore.pos == ldmSeqStore.size);
2282 } else { /* not long range mode */
2283 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
2284 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
2285 }
2286 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
2287 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
2288 } }
2289 return ZSTDbss_compress;
2290 }
2291
ZSTD_copyBlockSequences(ZSTD_CCtx * zc)2292 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
2293 {
2294 const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
2295 const seqDef* seqs = seqStore->sequencesStart;
2296 size_t seqsSize = seqStore->sequences - seqs;
2297
2298 ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
2299 size_t i; size_t position; int repIdx;
2300
2301 assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
2302 for (i = 0, position = 0; i < seqsSize; ++i) {
2303 outSeqs[i].offset = seqs[i].offset;
2304 outSeqs[i].litLength = seqs[i].litLength;
2305 outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
2306
2307 if (i == seqStore->longLengthPos) {
2308 if (seqStore->longLengthID == 1) {
2309 outSeqs[i].litLength += 0x10000;
2310 } else if (seqStore->longLengthID == 2) {
2311 outSeqs[i].matchLength += 0x10000;
2312 }
2313 }
2314
2315 if (outSeqs[i].offset <= ZSTD_REP_NUM) {
2316 outSeqs[i].rep = outSeqs[i].offset;
2317 repIdx = (unsigned int)i - outSeqs[i].offset;
2318
2319 if (outSeqs[i].litLength == 0) {
2320 if (outSeqs[i].offset < 3) {
2321 --repIdx;
2322 } else {
2323 repIdx = (unsigned int)i - 1;
2324 }
2325 ++outSeqs[i].rep;
2326 }
2327 assert(repIdx >= -3);
2328 outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
2329 if (outSeqs[i].rep == 4) {
2330 --outSeqs[i].offset;
2331 }
2332 } else {
2333 outSeqs[i].offset -= ZSTD_REP_NUM;
2334 }
2335
2336 position += outSeqs[i].litLength;
2337 outSeqs[i].matchPos = (unsigned int)position;
2338 position += outSeqs[i].matchLength;
2339 }
2340 zc->seqCollector.seqIndex += seqsSize;
2341 }
2342
ZSTD_getSequences(ZSTD_CCtx * zc,ZSTD_Sequence * outSeqs,size_t outSeqsSize,const void * src,size_t srcSize)2343 size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
2344 size_t outSeqsSize, const void* src, size_t srcSize)
2345 {
2346 const size_t dstCapacity = ZSTD_compressBound(srcSize);
2347 void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem);
2348 SeqCollector seqCollector;
2349
2350 RETURN_ERROR_IF(dst == NULL, memory_allocation);
2351
2352 seqCollector.collectSequences = 1;
2353 seqCollector.seqStart = outSeqs;
2354 seqCollector.seqIndex = 0;
2355 seqCollector.maxSequences = outSeqsSize;
2356 zc->seqCollector = seqCollector;
2357
2358 ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
2359 ZSTD_free(dst, ZSTD_defaultCMem);
2360 return zc->seqCollector.seqIndex;
2361 }
2362
2363 /* Returns true if the given block is a RLE block */
ZSTD_isRLE(const BYTE * ip,size_t length)2364 static int ZSTD_isRLE(const BYTE *ip, size_t length) {
2365 size_t i;
2366 if (length < 2) return 1;
2367 for (i = 1; i < length; ++i) {
2368 if (ip[0] != ip[i]) return 0;
2369 }
2370 return 1;
2371 }
2372
ZSTD_compressBlock_internal(ZSTD_CCtx * zc,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 frame)2373 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2374 void* dst, size_t dstCapacity,
2375 const void* src, size_t srcSize, U32 frame)
2376 {
2377 /* This the upper bound for the length of an rle block.
2378 * This isn't the actual upper bound. Finding the real threshold
2379 * needs further investigation.
2380 */
2381 const U32 rleMaxLength = 25;
2382 size_t cSize;
2383 const BYTE* ip = (const BYTE*)src;
2384 BYTE* op = (BYTE*)dst;
2385 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2386 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
2387 (unsigned)zc->blockState.matchState.nextToUpdate);
2388
2389 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
2390 FORWARD_IF_ERROR(bss);
2391 if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
2392 }
2393
2394 if (zc->seqCollector.collectSequences) {
2395 ZSTD_copyBlockSequences(zc);
2396 return 0;
2397 }
2398
2399 /* encode sequences and literals */
2400 cSize = ZSTD_compressSequences(&zc->seqStore,
2401 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2402 &zc->appliedParams,
2403 dst, dstCapacity,
2404 srcSize,
2405 zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
2406 zc->bmi2);
2407
2408 if (frame &&
2409 /* We don't want to emit our first block as a RLE even if it qualifies because
2410 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
2411 * This is only an issue for zstd <= v1.4.3
2412 */
2413 !zc->isFirstBlock &&
2414 cSize < rleMaxLength &&
2415 ZSTD_isRLE(ip, srcSize))
2416 {
2417 cSize = 1;
2418 op[0] = ip[0];
2419 }
2420
2421 out:
2422 if (!ZSTD_isError(cSize) && cSize > 1) {
2423 /* confirm repcodes and entropy tables when emitting a compressed block */
2424 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2425 zc->blockState.prevCBlock = zc->blockState.nextCBlock;
2426 zc->blockState.nextCBlock = tmp;
2427 }
2428 /* We check that dictionaries have offset codes available for the first
2429 * block. After the first block, the offcode table might not have large
2430 * enough codes to represent the offsets in the data.
2431 */
2432 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
2433 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
2434
2435 return cSize;
2436 }
2437
2438
ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t * ms,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,void const * ip,void const * iend)2439 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
2440 ZSTD_cwksp* ws,
2441 ZSTD_CCtx_params const* params,
2442 void const* ip,
2443 void const* iend)
2444 {
2445 if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
2446 U32 const maxDist = (U32)1 << params->cParams.windowLog;
2447 U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
2448 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
2449 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2450 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2451 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2452 ZSTD_cwksp_mark_tables_dirty(ws);
2453 ZSTD_reduceIndex(ms, params, correction);
2454 ZSTD_cwksp_mark_tables_clean(ws);
2455 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2456 else ms->nextToUpdate -= correction;
2457 /* invalidate dictionaries on overflow correction */
2458 ms->loadedDictEnd = 0;
2459 ms->dictMatchState = NULL;
2460 }
2461 }
2462
2463 /*! ZSTD_compress_frameChunk() :
2464 * Compress a chunk of data into one or multiple blocks.
2465 * All blocks will be terminated, all input will be consumed.
2466 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2467 * Frame is supposed already started (header already produced)
2468 * @return : compressed size, or an error code
2469 */
ZSTD_compress_frameChunk(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastFrameChunk)2470 static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2471 void* dst, size_t dstCapacity,
2472 const void* src, size_t srcSize,
2473 U32 lastFrameChunk)
2474 {
2475 size_t blockSize = cctx->blockSize;
2476 size_t remaining = srcSize;
2477 const BYTE* ip = (const BYTE*)src;
2478 BYTE* const ostart = (BYTE*)dst;
2479 BYTE* op = ostart;
2480 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
2481 assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
2482
2483 DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
2484 if (cctx->appliedParams.fParams.checksumFlag && srcSize)
2485 XXH64_update(&cctx->xxhState, src, srcSize);
2486
2487 while (remaining) {
2488 ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
2489 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2490
2491 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
2492 dstSize_tooSmall,
2493 "not enough space to store compressed block");
2494 if (remaining < blockSize) blockSize = remaining;
2495
2496 ZSTD_overflowCorrectIfNeeded(
2497 ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
2498 ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
2499
2500 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
2501 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
2502
2503 { size_t cSize = ZSTD_compressBlock_internal(cctx,
2504 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
2505 ip, blockSize, 1 /* frame */);
2506 FORWARD_IF_ERROR(cSize);
2507 if (cSize == 0) { /* block is not compressible */
2508 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
2509 FORWARD_IF_ERROR(cSize);
2510 } else {
2511 const U32 cBlockHeader = cSize == 1 ?
2512 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
2513 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2514 MEM_writeLE24(op, cBlockHeader);
2515 cSize += ZSTD_blockHeaderSize;
2516 }
2517
2518 ip += blockSize;
2519 assert(remaining >= blockSize);
2520 remaining -= blockSize;
2521 op += cSize;
2522 assert(dstCapacity >= cSize);
2523 dstCapacity -= cSize;
2524 cctx->isFirstBlock = 0;
2525 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
2526 (unsigned)cSize);
2527 } }
2528
2529 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2530 return (size_t)(op-ostart);
2531 }
2532
2533
ZSTD_writeFrameHeader(void * dst,size_t dstCapacity,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,U32 dictID)2534 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2535 const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
2536 { BYTE* const op = (BYTE*)dst;
2537 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2538 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
2539 U32 const checksumFlag = params->fParams.checksumFlag>0;
2540 U32 const windowSize = (U32)1 << params->cParams.windowLog;
2541 U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2542 BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2543 U32 const fcsCode = params->fParams.contentSizeFlag ?
2544 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
2545 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2546 size_t pos=0;
2547
2548 assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2549 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall);
2550 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2551 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
2552
2553 if (params->format == ZSTD_f_zstd1) {
2554 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2555 pos = 4;
2556 }
2557 op[pos++] = frameHeaderDescriptionByte;
2558 if (!singleSegment) op[pos++] = windowLogByte;
2559 switch(dictIDSizeCode)
2560 {
2561 default: assert(0); /* impossible */
2562 case 0 : break;
2563 case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2564 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2565 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2566 }
2567 switch(fcsCode)
2568 {
2569 default: assert(0); /* impossible */
2570 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
2571 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2572 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2573 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2574 }
2575 return pos;
2576 }
2577
2578 /* ZSTD_writeLastEmptyBlock() :
2579 * output an empty Block with end-of-frame mark to complete a frame
2580 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
2581 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
2582 */
ZSTD_writeLastEmptyBlock(void * dst,size_t dstCapacity)2583 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
2584 {
2585 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall);
2586 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */
2587 MEM_writeLE24(dst, cBlockHeader24);
2588 return ZSTD_blockHeaderSize;
2589 }
2590 }
2591
ZSTD_referenceExternalSequences(ZSTD_CCtx * cctx,rawSeq * seq,size_t nbSeq)2592 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
2593 {
2594 RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong);
2595 RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
2596 parameter_unsupported);
2597 cctx->externSeqStore.seq = seq;
2598 cctx->externSeqStore.size = nbSeq;
2599 cctx->externSeqStore.capacity = nbSeq;
2600 cctx->externSeqStore.pos = 0;
2601 return 0;
2602 }
2603
2604
ZSTD_compressContinue_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 frame,U32 lastFrameChunk)2605 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2606 void* dst, size_t dstCapacity,
2607 const void* src, size_t srcSize,
2608 U32 frame, U32 lastFrameChunk)
2609 {
2610 ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
2611 size_t fhSize = 0;
2612
2613 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
2614 cctx->stage, (unsigned)srcSize);
2615 RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
2616 "missing init (ZSTD_compressBegin)");
2617
2618 if (frame && (cctx->stage==ZSTDcs_init)) {
2619 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
2620 cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
2621 FORWARD_IF_ERROR(fhSize);
2622 assert(fhSize <= dstCapacity);
2623 dstCapacity -= fhSize;
2624 dst = (char*)dst + fhSize;
2625 cctx->stage = ZSTDcs_ongoing;
2626 }
2627
2628 if (!srcSize) return fhSize; /* do not generate an empty block if no input */
2629
2630 if (!ZSTD_window_update(&ms->window, src, srcSize)) {
2631 ms->nextToUpdate = ms->window.dictLimit;
2632 }
2633 if (cctx->appliedParams.ldmParams.enableLdm) {
2634 ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
2635 }
2636
2637 if (!frame) {
2638 /* overflow check and correction for block mode */
2639 ZSTD_overflowCorrectIfNeeded(
2640 ms, &cctx->workspace, &cctx->appliedParams,
2641 src, (BYTE const*)src + srcSize);
2642 }
2643
2644 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
2645 { size_t const cSize = frame ?
2646 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2647 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
2648 FORWARD_IF_ERROR(cSize);
2649 cctx->consumedSrcSize += srcSize;
2650 cctx->producedCSize += (cSize + fhSize);
2651 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
2652 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
2653 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
2654 RETURN_ERROR_IF(
2655 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
2656 srcSize_wrong,
2657 "error : pledgedSrcSize = %u, while realSrcSize >= %u",
2658 (unsigned)cctx->pledgedSrcSizePlusOne-1,
2659 (unsigned)cctx->consumedSrcSize);
2660 }
2661 return cSize + fhSize;
2662 }
2663 }
2664
ZSTD_compressContinue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)2665 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
2666 void* dst, size_t dstCapacity,
2667 const void* src, size_t srcSize)
2668 {
2669 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
2670 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
2671 }
2672
2673
ZSTD_getBlockSize(const ZSTD_CCtx * cctx)2674 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
2675 {
2676 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
2677 assert(!ZSTD_checkCParams(cParams));
2678 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
2679 }
2680
ZSTD_compressBlock(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)2681 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2682 {
2683 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
2684 { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
2685 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); }
2686
2687 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
2688 }
2689
2690 /*! ZSTD_loadDictionaryContent() :
2691 * @return : 0, or an error code
2692 */
ZSTD_loadDictionaryContent(ZSTD_matchState_t * ms,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,const void * src,size_t srcSize,ZSTD_dictTableLoadMethod_e dtlm)2693 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
2694 ZSTD_cwksp* ws,
2695 ZSTD_CCtx_params const* params,
2696 const void* src, size_t srcSize,
2697 ZSTD_dictTableLoadMethod_e dtlm)
2698 {
2699 const BYTE* ip = (const BYTE*) src;
2700 const BYTE* const iend = ip + srcSize;
2701
2702 ZSTD_window_update(&ms->window, src, srcSize);
2703 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
2704
2705 /* Assert that we the ms params match the params we're being given */
2706 ZSTD_assertEqualCParams(params->cParams, ms->cParams);
2707
2708 if (srcSize <= HASH_READ_SIZE) return 0;
2709
2710 while (iend - ip > HASH_READ_SIZE) {
2711 size_t const remaining = (size_t)(iend - ip);
2712 size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
2713 const BYTE* const ichunk = ip + chunk;
2714
2715 ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
2716
2717 switch(params->cParams.strategy)
2718 {
2719 case ZSTD_fast:
2720 ZSTD_fillHashTable(ms, ichunk, dtlm);
2721 break;
2722 case ZSTD_dfast:
2723 ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
2724 break;
2725
2726 case ZSTD_greedy:
2727 case ZSTD_lazy:
2728 case ZSTD_lazy2:
2729 if (chunk >= HASH_READ_SIZE)
2730 ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
2731 break;
2732
2733 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
2734 case ZSTD_btopt:
2735 case ZSTD_btultra:
2736 case ZSTD_btultra2:
2737 if (chunk >= HASH_READ_SIZE)
2738 ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
2739 break;
2740
2741 default:
2742 assert(0); /* not possible : not a valid strategy id */
2743 }
2744
2745 ip = ichunk;
2746 }
2747
2748 ms->nextToUpdate = (U32)(iend - ms->window.base);
2749 return 0;
2750 }
2751
2752
2753 /* Dictionaries that assign zero probability to symbols that show up causes problems
2754 when FSE encoding. Refuse dictionaries that assign zero probability to symbols
2755 that we may encounter during compression.
2756 NOTE: This behavior is not standard and could be improved in the future. */
ZSTD_checkDictNCount(short * normalizedCounter,unsigned dictMaxSymbolValue,unsigned maxSymbolValue)2757 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
2758 U32 s;
2759 RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted);
2760 for (s = 0; s <= maxSymbolValue; ++s) {
2761 RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted);
2762 }
2763 return 0;
2764 }
2765
2766
2767 /* Dictionary format :
2768 * See :
2769 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
2770 */
2771 /*! ZSTD_loadZstdDictionary() :
2772 * @return : dictID, or an error code
2773 * assumptions : magic number supposed already checked
2774 * dictSize supposed > 8
2775 */
ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t * bs,ZSTD_matchState_t * ms,ZSTD_cwksp * ws,ZSTD_CCtx_params const * params,const void * dict,size_t dictSize,ZSTD_dictTableLoadMethod_e dtlm,void * workspace)2776 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
2777 ZSTD_matchState_t* ms,
2778 ZSTD_cwksp* ws,
2779 ZSTD_CCtx_params const* params,
2780 const void* dict, size_t dictSize,
2781 ZSTD_dictTableLoadMethod_e dtlm,
2782 void* workspace)
2783 {
2784 const BYTE* dictPtr = (const BYTE*)dict;
2785 const BYTE* const dictEnd = dictPtr + dictSize;
2786 short offcodeNCount[MaxOff+1];
2787 unsigned offcodeMaxValue = MaxOff;
2788 size_t dictID;
2789
2790 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2791 assert(dictSize > 8);
2792 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
2793
2794 dictPtr += 4; /* skip magic number */
2795 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
2796 dictPtr += 4;
2797
2798 { unsigned maxSymbolValue = 255;
2799 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
2800 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted);
2801 RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted);
2802 dictPtr += hufHeaderSize;
2803 }
2804
2805 { unsigned offcodeLog;
2806 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2807 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
2808 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
2809 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2810 /* fill all offset symbols to avoid garbage at end of table */
2811 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
2812 bs->entropy.fse.offcodeCTable,
2813 offcodeNCount, MaxOff, offcodeLog,
2814 workspace, HUF_WORKSPACE_SIZE)),
2815 dictionary_corrupted);
2816 dictPtr += offcodeHeaderSize;
2817 }
2818
2819 { short matchlengthNCount[MaxML+1];
2820 unsigned matchlengthMaxValue = MaxML, matchlengthLog;
2821 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
2822 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
2823 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
2824 /* Every match length code must have non-zero probability */
2825 FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2826 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
2827 bs->entropy.fse.matchlengthCTable,
2828 matchlengthNCount, matchlengthMaxValue, matchlengthLog,
2829 workspace, HUF_WORKSPACE_SIZE)),
2830 dictionary_corrupted);
2831 dictPtr += matchlengthHeaderSize;
2832 }
2833
2834 { short litlengthNCount[MaxLL+1];
2835 unsigned litlengthMaxValue = MaxLL, litlengthLog;
2836 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
2837 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
2838 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
2839 /* Every literal length code must have non-zero probability */
2840 FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2841 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
2842 bs->entropy.fse.litlengthCTable,
2843 litlengthNCount, litlengthMaxValue, litlengthLog,
2844 workspace, HUF_WORKSPACE_SIZE)),
2845 dictionary_corrupted);
2846 dictPtr += litlengthHeaderSize;
2847 }
2848
2849 RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
2850 bs->rep[0] = MEM_readLE32(dictPtr+0);
2851 bs->rep[1] = MEM_readLE32(dictPtr+4);
2852 bs->rep[2] = MEM_readLE32(dictPtr+8);
2853 dictPtr += 12;
2854
2855 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
2856 U32 offcodeMax = MaxOff;
2857 if (dictContentSize <= ((U32)-1) - 128 KB) {
2858 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
2859 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
2860 }
2861 /* All offset values <= dictContentSize + 128 KB must be representable */
2862 FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
2863 /* All repCodes must be <= dictContentSize and != 0*/
2864 { U32 u;
2865 for (u=0; u<3; u++) {
2866 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted);
2867 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted);
2868 } }
2869
2870 bs->entropy.huf.repeatMode = HUF_repeat_valid;
2871 bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
2872 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
2873 bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
2874 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
2875 ms, ws, params, dictPtr, dictContentSize, dtlm));
2876 return dictID;
2877 }
2878 }
2879
2880 /** ZSTD_compress_insertDictionary() :
2881 * @return : dictID, or an error code */
2882 static size_t
ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t * bs,ZSTD_matchState_t * ms,ZSTD_cwksp * ws,const ZSTD_CCtx_params * params,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,void * workspace)2883 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
2884 ZSTD_matchState_t* ms,
2885 ZSTD_cwksp* ws,
2886 const ZSTD_CCtx_params* params,
2887 const void* dict, size_t dictSize,
2888 ZSTD_dictContentType_e dictContentType,
2889 ZSTD_dictTableLoadMethod_e dtlm,
2890 void* workspace)
2891 {
2892 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
2893 if ((dict==NULL) || (dictSize<=8)) return 0;
2894
2895 ZSTD_reset_compressedBlockState(bs);
2896
2897 /* dict restricted modes */
2898 if (dictContentType == ZSTD_dct_rawContent)
2899 return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm);
2900
2901 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
2902 if (dictContentType == ZSTD_dct_auto) {
2903 DEBUGLOG(4, "raw content dictionary detected");
2904 return ZSTD_loadDictionaryContent(
2905 ms, ws, params, dict, dictSize, dtlm);
2906 }
2907 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
2908 assert(0); /* impossible */
2909 }
2910
2911 /* dict as full zstd dictionary */
2912 return ZSTD_loadZstdDictionary(
2913 bs, ms, ws, params, dict, dictSize, dtlm, workspace);
2914 }
2915
2916 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
2917 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
2918
2919 /*! ZSTD_compressBegin_internal() :
2920 * @return : 0, or an error code */
ZSTD_compressBegin_internal(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,U64 pledgedSrcSize,ZSTD_buffered_policy_e zbuff)2921 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2922 const void* dict, size_t dictSize,
2923 ZSTD_dictContentType_e dictContentType,
2924 ZSTD_dictTableLoadMethod_e dtlm,
2925 const ZSTD_CDict* cdict,
2926 const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
2927 ZSTD_buffered_policy_e zbuff)
2928 {
2929 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
2930 /* params are supposed to be fully validated at this point */
2931 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
2932 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2933 if ( (cdict)
2934 && (cdict->dictContentSize > 0)
2935 && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
2936 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
2937 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
2938 || cdict->compressionLevel == 0)
2939 && (params->attachDictPref != ZSTD_dictForceLoad) ) {
2940 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
2941 }
2942
2943 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
2944 ZSTDcrp_makeClean, zbuff) );
2945 { size_t const dictID = cdict ?
2946 ZSTD_compress_insertDictionary(
2947 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2948 &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize,
2949 dictContentType, dtlm, cctx->entropyWorkspace)
2950 : ZSTD_compress_insertDictionary(
2951 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2952 &cctx->workspace, params, dict, dictSize,
2953 dictContentType, dtlm, cctx->entropyWorkspace);
2954 FORWARD_IF_ERROR(dictID);
2955 assert(dictID <= UINT_MAX);
2956 cctx->dictID = (U32)dictID;
2957 }
2958 return 0;
2959 }
2960
ZSTD_compressBegin_advanced_internal(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_dictContentType_e dictContentType,ZSTD_dictTableLoadMethod_e dtlm,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,unsigned long long pledgedSrcSize)2961 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
2962 const void* dict, size_t dictSize,
2963 ZSTD_dictContentType_e dictContentType,
2964 ZSTD_dictTableLoadMethod_e dtlm,
2965 const ZSTD_CDict* cdict,
2966 const ZSTD_CCtx_params* params,
2967 unsigned long long pledgedSrcSize)
2968 {
2969 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
2970 /* compression parameters verification and optimization */
2971 FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) );
2972 return ZSTD_compressBegin_internal(cctx,
2973 dict, dictSize, dictContentType, dtlm,
2974 cdict,
2975 params, pledgedSrcSize,
2976 ZSTDb_not_buffered);
2977 }
2978
2979 /*! ZSTD_compressBegin_advanced() :
2980 * @return : 0, or an error code */
ZSTD_compressBegin_advanced(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pledgedSrcSize)2981 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
2982 const void* dict, size_t dictSize,
2983 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2984 {
2985 ZSTD_CCtx_params const cctxParams =
2986 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
2987 return ZSTD_compressBegin_advanced_internal(cctx,
2988 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
2989 NULL /*cdict*/,
2990 &cctxParams, pledgedSrcSize);
2991 }
2992
ZSTD_compressBegin_usingDict(ZSTD_CCtx * cctx,const void * dict,size_t dictSize,int compressionLevel)2993 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2994 {
2995 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
2996 ZSTD_CCtx_params const cctxParams =
2997 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
2998 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
2999 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3000 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
3001 }
3002
ZSTD_compressBegin(ZSTD_CCtx * cctx,int compressionLevel)3003 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
3004 {
3005 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
3006 }
3007
3008
3009 /*! ZSTD_writeEpilogue() :
3010 * Ends a frame.
3011 * @return : nb of bytes written into dst (or an error code) */
ZSTD_writeEpilogue(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity)3012 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
3013 {
3014 BYTE* const ostart = (BYTE*)dst;
3015 BYTE* op = ostart;
3016 size_t fhSize = 0;
3017
3018 DEBUGLOG(4, "ZSTD_writeEpilogue");
3019 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
3020
3021 /* special case : empty frame */
3022 if (cctx->stage == ZSTDcs_init) {
3023 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
3024 FORWARD_IF_ERROR(fhSize);
3025 dstCapacity -= fhSize;
3026 op += fhSize;
3027 cctx->stage = ZSTDcs_ongoing;
3028 }
3029
3030 if (cctx->stage != ZSTDcs_ending) {
3031 /* write one last empty block, make it the "last" block */
3032 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
3033 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall);
3034 MEM_writeLE32(op, cBlockHeader24);
3035 op += ZSTD_blockHeaderSize;
3036 dstCapacity -= ZSTD_blockHeaderSize;
3037 }
3038
3039 if (cctx->appliedParams.fParams.checksumFlag) {
3040 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
3041 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall);
3042 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
3043 MEM_writeLE32(op, checksum);
3044 op += 4;
3045 }
3046
3047 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
3048 return op-ostart;
3049 }
3050
ZSTD_compressEnd(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)3051 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
3052 void* dst, size_t dstCapacity,
3053 const void* src, size_t srcSize)
3054 {
3055 size_t endResult;
3056 size_t const cSize = ZSTD_compressContinue_internal(cctx,
3057 dst, dstCapacity, src, srcSize,
3058 1 /* frame mode */, 1 /* last chunk */);
3059 FORWARD_IF_ERROR(cSize);
3060 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
3061 FORWARD_IF_ERROR(endResult);
3062 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
3063 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
3064 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
3065 DEBUGLOG(4, "end of frame : controlling src size");
3066 RETURN_ERROR_IF(
3067 cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
3068 srcSize_wrong,
3069 "error : pledgedSrcSize = %u, while realSrcSize = %u",
3070 (unsigned)cctx->pledgedSrcSizePlusOne-1,
3071 (unsigned)cctx->consumedSrcSize);
3072 }
3073 return cSize + endResult;
3074 }
3075
3076
ZSTD_compress_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,ZSTD_parameters params)3077 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
3078 void* dst, size_t dstCapacity,
3079 const void* src, size_t srcSize,
3080 const void* dict,size_t dictSize,
3081 ZSTD_parameters params)
3082 {
3083 ZSTD_CCtx_params const cctxParams =
3084 ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
3085 DEBUGLOG(4, "ZSTD_compress_internal");
3086 return ZSTD_compress_advanced_internal(cctx,
3087 dst, dstCapacity,
3088 src, srcSize,
3089 dict, dictSize,
3090 &cctxParams);
3091 }
3092
ZSTD_compress_advanced(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,ZSTD_parameters params)3093 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
3094 void* dst, size_t dstCapacity,
3095 const void* src, size_t srcSize,
3096 const void* dict,size_t dictSize,
3097 ZSTD_parameters params)
3098 {
3099 DEBUGLOG(4, "ZSTD_compress_advanced");
3100 FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams));
3101 return ZSTD_compress_internal(cctx,
3102 dst, dstCapacity,
3103 src, srcSize,
3104 dict, dictSize,
3105 params);
3106 }
3107
3108 /* Internal */
ZSTD_compress_advanced_internal(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,const ZSTD_CCtx_params * params)3109 size_t ZSTD_compress_advanced_internal(
3110 ZSTD_CCtx* cctx,
3111 void* dst, size_t dstCapacity,
3112 const void* src, size_t srcSize,
3113 const void* dict,size_t dictSize,
3114 const ZSTD_CCtx_params* params)
3115 {
3116 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
3117 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
3118 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3119 params, srcSize, ZSTDb_not_buffered) );
3120 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
3121 }
3122
ZSTD_compress_usingDict(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const void * dict,size_t dictSize,int compressionLevel)3123 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
3124 void* dst, size_t dstCapacity,
3125 const void* src, size_t srcSize,
3126 const void* dict, size_t dictSize,
3127 int compressionLevel)
3128 {
3129 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
3130 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
3131 assert(params.fParams.contentSizeFlag == 1);
3132 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
3133 }
3134
ZSTD_compressCCtx(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)3135 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
3136 void* dst, size_t dstCapacity,
3137 const void* src, size_t srcSize,
3138 int compressionLevel)
3139 {
3140 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
3141 assert(cctx != NULL);
3142 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
3143 }
3144
ZSTD_compress(void * dst,size_t dstCapacity,const void * src,size_t srcSize,int compressionLevel)3145 size_t ZSTD_compress(void* dst, size_t dstCapacity,
3146 const void* src, size_t srcSize,
3147 int compressionLevel)
3148 {
3149 size_t result;
3150 ZSTD_CCtx ctxBody;
3151 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
3152 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
3153 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */
3154 return result;
3155 }
3156
3157
3158 /* ===== Dictionary API ===== */
3159
3160 /*! ZSTD_estimateCDictSize_advanced() :
3161 * Estimate amount of memory that will be needed to create a dictionary with following arguments */
ZSTD_estimateCDictSize_advanced(size_t dictSize,ZSTD_compressionParameters cParams,ZSTD_dictLoadMethod_e dictLoadMethod)3162 size_t ZSTD_estimateCDictSize_advanced(
3163 size_t dictSize, ZSTD_compressionParameters cParams,
3164 ZSTD_dictLoadMethod_e dictLoadMethod)
3165 {
3166 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
3167 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3168 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3169 + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
3170 + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3171 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
3172 }
3173
ZSTD_estimateCDictSize(size_t dictSize,int compressionLevel)3174 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
3175 {
3176 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3177 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
3178 }
3179
ZSTD_sizeof_CDict(const ZSTD_CDict * cdict)3180 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
3181 {
3182 if (cdict==NULL) return 0; /* support sizeof on NULL */
3183 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
3184 /* cdict may be in the workspace */
3185 return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
3186 + ZSTD_cwksp_sizeof(&cdict->workspace);
3187 }
3188
ZSTD_initCDict_internal(ZSTD_CDict * cdict,const void * dictBuffer,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_compressionParameters cParams)3189 static size_t ZSTD_initCDict_internal(
3190 ZSTD_CDict* cdict,
3191 const void* dictBuffer, size_t dictSize,
3192 ZSTD_dictLoadMethod_e dictLoadMethod,
3193 ZSTD_dictContentType_e dictContentType,
3194 ZSTD_compressionParameters cParams)
3195 {
3196 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
3197 assert(!ZSTD_checkCParams(cParams));
3198 cdict->matchState.cParams = cParams;
3199 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3200 cdict->dictContent = dictBuffer;
3201 } else {
3202 void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
3203 RETURN_ERROR_IF(!internalBuffer, memory_allocation);
3204 cdict->dictContent = internalBuffer;
3205 memcpy(internalBuffer, dictBuffer, dictSize);
3206 }
3207 cdict->dictContentSize = dictSize;
3208
3209 cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
3210
3211
3212 /* Reset the state to no dictionary */
3213 ZSTD_reset_compressedBlockState(&cdict->cBlockState);
3214 FORWARD_IF_ERROR(ZSTD_reset_matchState(
3215 &cdict->matchState,
3216 &cdict->workspace,
3217 &cParams,
3218 ZSTDcrp_makeClean,
3219 ZSTDirp_reset,
3220 ZSTD_resetTarget_CDict));
3221 /* (Maybe) load the dictionary
3222 * Skips loading the dictionary if it is <= 8 bytes.
3223 */
3224 { ZSTD_CCtx_params params;
3225 memset(¶ms, 0, sizeof(params));
3226 params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
3227 params.fParams.contentSizeFlag = 1;
3228 params.cParams = cParams;
3229 { size_t const dictID = ZSTD_compress_insertDictionary(
3230 &cdict->cBlockState, &cdict->matchState, &cdict->workspace,
3231 ¶ms, cdict->dictContent, cdict->dictContentSize,
3232 dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
3233 FORWARD_IF_ERROR(dictID);
3234 assert(dictID <= (size_t)(U32)-1);
3235 cdict->dictID = (U32)dictID;
3236 }
3237 }
3238
3239 return 0;
3240 }
3241
ZSTD_createCDict_advanced(const void * dictBuffer,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_compressionParameters cParams,ZSTD_customMem customMem)3242 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
3243 ZSTD_dictLoadMethod_e dictLoadMethod,
3244 ZSTD_dictContentType_e dictContentType,
3245 ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
3246 {
3247 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
3248 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
3249
3250 { size_t const workspaceSize =
3251 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
3252 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
3253 ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
3254 (dictLoadMethod == ZSTD_dlm_byRef ? 0
3255 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
3256 void* const workspace = ZSTD_malloc(workspaceSize, customMem);
3257 ZSTD_cwksp ws;
3258 ZSTD_CDict* cdict;
3259
3260 if (!workspace) {
3261 ZSTD_free(workspace, customMem);
3262 return NULL;
3263 }
3264
3265 ZSTD_cwksp_init(&ws, workspace, workspaceSize);
3266
3267 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
3268 assert(cdict != NULL);
3269 ZSTD_cwksp_move(&cdict->workspace, &ws);
3270 cdict->customMem = customMem;
3271 cdict->compressionLevel = 0; /* signals advanced API usage */
3272
3273 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3274 dictBuffer, dictSize,
3275 dictLoadMethod, dictContentType,
3276 cParams) )) {
3277 ZSTD_freeCDict(cdict);
3278 return NULL;
3279 }
3280
3281 return cdict;
3282 }
3283 }
3284
ZSTD_createCDict(const void * dict,size_t dictSize,int compressionLevel)3285 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
3286 {
3287 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3288 ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
3289 ZSTD_dlm_byCopy, ZSTD_dct_auto,
3290 cParams, ZSTD_defaultCMem);
3291 if (cdict)
3292 cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
3293 return cdict;
3294 }
3295
ZSTD_createCDict_byReference(const void * dict,size_t dictSize,int compressionLevel)3296 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
3297 {
3298 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3299 return ZSTD_createCDict_advanced(dict, dictSize,
3300 ZSTD_dlm_byRef, ZSTD_dct_auto,
3301 cParams, ZSTD_defaultCMem);
3302 }
3303
ZSTD_freeCDict(ZSTD_CDict * cdict)3304 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
3305 {
3306 if (cdict==NULL) return 0; /* support free on NULL */
3307 { ZSTD_customMem const cMem = cdict->customMem;
3308 int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
3309 ZSTD_cwksp_free(&cdict->workspace, cMem);
3310 if (!cdictInWorkspace) {
3311 ZSTD_free(cdict, cMem);
3312 }
3313 return 0;
3314 }
3315 }
3316
3317 /*! ZSTD_initStaticCDict_advanced() :
3318 * Generate a digested dictionary in provided memory area.
3319 * workspace: The memory area to emplace the dictionary into.
3320 * Provided pointer must 8-bytes aligned.
3321 * It must outlive dictionary usage.
3322 * workspaceSize: Use ZSTD_estimateCDictSize()
3323 * to determine how large workspace must be.
3324 * cParams : use ZSTD_getCParams() to transform a compression level
3325 * into its relevants cParams.
3326 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
3327 * Note : there is no corresponding "free" function.
3328 * Since workspace was allocated externally, it must be freed externally.
3329 */
ZSTD_initStaticCDict(void * workspace,size_t workspaceSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_compressionParameters cParams)3330 const ZSTD_CDict* ZSTD_initStaticCDict(
3331 void* workspace, size_t workspaceSize,
3332 const void* dict, size_t dictSize,
3333 ZSTD_dictLoadMethod_e dictLoadMethod,
3334 ZSTD_dictContentType_e dictContentType,
3335 ZSTD_compressionParameters cParams)
3336 {
3337 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
3338 size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3339 + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3340 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
3341 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3342 + matchStateSize;
3343 ZSTD_CDict* cdict;
3344
3345 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
3346
3347 {
3348 ZSTD_cwksp ws;
3349 ZSTD_cwksp_init(&ws, workspace, workspaceSize);
3350 cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
3351 if (cdict == NULL) return NULL;
3352 ZSTD_cwksp_move(&cdict->workspace, &ws);
3353 }
3354
3355 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
3356 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
3357 if (workspaceSize < neededSize) return NULL;
3358
3359 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3360 dict, dictSize,
3361 dictLoadMethod, dictContentType,
3362 cParams) ))
3363 return NULL;
3364
3365 return cdict;
3366 }
3367
ZSTD_getCParamsFromCDict(const ZSTD_CDict * cdict)3368 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
3369 {
3370 assert(cdict != NULL);
3371 return cdict->matchState.cParams;
3372 }
3373
3374 /* ZSTD_compressBegin_usingCDict_advanced() :
3375 * cdict must be != NULL */
ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx * const cctx,const ZSTD_CDict * const cdict,ZSTD_frameParameters const fParams,unsigned long long const pledgedSrcSize)3376 size_t ZSTD_compressBegin_usingCDict_advanced(
3377 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
3378 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
3379 {
3380 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
3381 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong);
3382 { ZSTD_CCtx_params params = cctx->requestedParams;
3383 params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
3384 || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
3385 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
3386 || cdict->compressionLevel == 0 )
3387 && (params.attachDictPref != ZSTD_dictForceLoad) ?
3388 ZSTD_getCParamsFromCDict(cdict)
3389 : ZSTD_getCParams(cdict->compressionLevel,
3390 pledgedSrcSize,
3391 cdict->dictContentSize);
3392 /* Increase window log to fit the entire dictionary and source if the
3393 * source size is known. Limit the increase to 19, which is the
3394 * window log for compression level 1 with the largest source size.
3395 */
3396 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
3397 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
3398 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
3399 params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
3400 }
3401 params.fParams = fParams;
3402 return ZSTD_compressBegin_internal(cctx,
3403 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
3404 cdict,
3405 ¶ms, pledgedSrcSize,
3406 ZSTDb_not_buffered);
3407 }
3408 }
3409
3410 /* ZSTD_compressBegin_usingCDict() :
3411 * pledgedSrcSize=0 means "unknown"
3412 * if pledgedSrcSize>0, it will enable contentSizeFlag */
ZSTD_compressBegin_usingCDict(ZSTD_CCtx * cctx,const ZSTD_CDict * cdict)3413 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
3414 {
3415 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3416 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
3417 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
3418 }
3419
ZSTD_compress_usingCDict_advanced(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict,ZSTD_frameParameters fParams)3420 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
3421 void* dst, size_t dstCapacity,
3422 const void* src, size_t srcSize,
3423 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
3424 {
3425 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
3426 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
3427 }
3428
3429 /*! ZSTD_compress_usingCDict() :
3430 * Compression using a digested Dictionary.
3431 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
3432 * Note that compression parameters are decided at CDict creation time
3433 * while frame parameters are hardcoded */
ZSTD_compress_usingCDict(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize,const ZSTD_CDict * cdict)3434 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
3435 void* dst, size_t dstCapacity,
3436 const void* src, size_t srcSize,
3437 const ZSTD_CDict* cdict)
3438 {
3439 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3440 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
3441 }
3442
3443
3444
3445 /* ******************************************************************
3446 * Streaming
3447 ********************************************************************/
3448
ZSTD_createCStream(void)3449 ZSTD_CStream* ZSTD_createCStream(void)
3450 {
3451 DEBUGLOG(3, "ZSTD_createCStream");
3452 return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
3453 }
3454
ZSTD_initStaticCStream(void * workspace,size_t workspaceSize)3455 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
3456 {
3457 return ZSTD_initStaticCCtx(workspace, workspaceSize);
3458 }
3459
ZSTD_createCStream_advanced(ZSTD_customMem customMem)3460 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
3461 { /* CStream and CCtx are now same object */
3462 return ZSTD_createCCtx_advanced(customMem);
3463 }
3464
ZSTD_freeCStream(ZSTD_CStream * zcs)3465 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
3466 {
3467 return ZSTD_freeCCtx(zcs); /* same object */
3468 }
3469
3470
3471
3472 /*====== Initialization ======*/
3473
ZSTD_CStreamInSize(void)3474 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }
3475
ZSTD_CStreamOutSize(void)3476 size_t ZSTD_CStreamOutSize(void)
3477 {
3478 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
3479 }
3480
ZSTD_resetCStream_internal(ZSTD_CStream * cctx,const void * const dict,size_t const dictSize,ZSTD_dictContentType_e const dictContentType,const ZSTD_CDict * const cdict,ZSTD_CCtx_params params,unsigned long long const pledgedSrcSize)3481 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
3482 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
3483 const ZSTD_CDict* const cdict,
3484 ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
3485 {
3486 DEBUGLOG(4, "ZSTD_resetCStream_internal");
3487 /* Finalize the compression parameters */
3488 params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize);
3489 /* params are supposed to be fully validated at this point */
3490 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
3491 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3492
3493 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
3494 dict, dictSize, dictContentType, ZSTD_dtlm_fast,
3495 cdict,
3496 ¶ms, pledgedSrcSize,
3497 ZSTDb_buffered) );
3498
3499 cctx->inToCompress = 0;
3500 cctx->inBuffPos = 0;
3501 cctx->inBuffTarget = cctx->blockSize
3502 + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
3503 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
3504 cctx->streamStage = zcss_load;
3505 cctx->frameEnded = 0;
3506 return 0; /* ready to go */
3507 }
3508
3509 /* ZSTD_resetCStream():
3510 * pledgedSrcSize == 0 means "unknown" */
ZSTD_resetCStream(ZSTD_CStream * zcs,unsigned long long pss)3511 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
3512 {
3513 /* temporary : 0 interpreted as "unknown" during transition period.
3514 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
3515 * 0 will be interpreted as "empty" in the future.
3516 */
3517 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
3518 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
3519 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3520 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3521 return 0;
3522 }
3523
3524 /*! ZSTD_initCStream_internal() :
3525 * Note : for lib/compress only. Used by zstdmt_compress.c.
3526 * Assumption 1 : params are valid
3527 * Assumption 2 : either dict, or cdict, is defined, not both */
ZSTD_initCStream_internal(ZSTD_CStream * zcs,const void * dict,size_t dictSize,const ZSTD_CDict * cdict,const ZSTD_CCtx_params * params,unsigned long long pledgedSrcSize)3528 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
3529 const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
3530 const ZSTD_CCtx_params* params,
3531 unsigned long long pledgedSrcSize)
3532 {
3533 DEBUGLOG(4, "ZSTD_initCStream_internal");
3534 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3535 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3536 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
3537 zcs->requestedParams = *params;
3538 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3539 if (dict) {
3540 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
3541 } else {
3542 /* Dictionary is cleared if !cdict */
3543 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
3544 }
3545 return 0;
3546 }
3547
3548 /* ZSTD_initCStream_usingCDict_advanced() :
3549 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream * zcs,const ZSTD_CDict * cdict,ZSTD_frameParameters fParams,unsigned long long pledgedSrcSize)3550 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
3551 const ZSTD_CDict* cdict,
3552 ZSTD_frameParameters fParams,
3553 unsigned long long pledgedSrcSize)
3554 {
3555 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
3556 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3557 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3558 zcs->requestedParams.fParams = fParams;
3559 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
3560 return 0;
3561 }
3562
3563 /* note : cdict must outlive compression session */
ZSTD_initCStream_usingCDict(ZSTD_CStream * zcs,const ZSTD_CDict * cdict)3564 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
3565 {
3566 DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
3567 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3568 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
3569 return 0;
3570 }
3571
3572
3573 /* ZSTD_initCStream_advanced() :
3574 * pledgedSrcSize must be exact.
3575 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
3576 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
ZSTD_initCStream_advanced(ZSTD_CStream * zcs,const void * dict,size_t dictSize,ZSTD_parameters params,unsigned long long pss)3577 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
3578 const void* dict, size_t dictSize,
3579 ZSTD_parameters params, unsigned long long pss)
3580 {
3581 /* for compatibility with older programs relying on this behavior.
3582 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
3583 * This line will be removed in the future.
3584 */
3585 U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
3586 DEBUGLOG(4, "ZSTD_initCStream_advanced");
3587 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3588 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3589 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
3590 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params);
3591 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
3592 return 0;
3593 }
3594
ZSTD_initCStream_usingDict(ZSTD_CStream * zcs,const void * dict,size_t dictSize,int compressionLevel)3595 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3596 {
3597 DEBUGLOG(4, "ZSTD_initCStream_usingDict");
3598 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3599 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
3600 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
3601 return 0;
3602 }
3603
ZSTD_initCStream_srcSize(ZSTD_CStream * zcs,int compressionLevel,unsigned long long pss)3604 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
3605 {
3606 /* temporary : 0 interpreted as "unknown" during transition period.
3607 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
3608 * 0 will be interpreted as "empty" in the future.
3609 */
3610 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
3611 DEBUGLOG(4, "ZSTD_initCStream_srcSize");
3612 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3613 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) );
3614 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
3615 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3616 return 0;
3617 }
3618
ZSTD_initCStream(ZSTD_CStream * zcs,int compressionLevel)3619 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3620 {
3621 DEBUGLOG(4, "ZSTD_initCStream");
3622 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3623 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) );
3624 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
3625 return 0;
3626 }
3627
3628 /*====== Compression ======*/
3629
ZSTD_nextInputSizeHint(const ZSTD_CCtx * cctx)3630 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
3631 {
3632 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
3633 if (hintInSize==0) hintInSize = cctx->blockSize;
3634 return hintInSize;
3635 }
3636
ZSTD_limitCopy(void * dst,size_t dstCapacity,const void * src,size_t srcSize)3637 static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
3638 const void* src, size_t srcSize)
3639 {
3640 size_t const length = MIN(dstCapacity, srcSize);
3641 if (length) memcpy(dst, src, length);
3642 return length;
3643 }
3644
3645 /** ZSTD_compressStream_generic():
3646 * internal function for all *compressStream*() variants
3647 * non-static, because can be called from zstdmt_compress.c
3648 * @return : hint size for next input */
ZSTD_compressStream_generic(ZSTD_CStream * zcs,ZSTD_outBuffer * output,ZSTD_inBuffer * input,ZSTD_EndDirective const flushMode)3649 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
3650 ZSTD_outBuffer* output,
3651 ZSTD_inBuffer* input,
3652 ZSTD_EndDirective const flushMode)
3653 {
3654 const char* const istart = (const char*)input->src;
3655 const char* const iend = istart + input->size;
3656 const char* ip = istart + input->pos;
3657 char* const ostart = (char*)output->dst;
3658 char* const oend = ostart + output->size;
3659 char* op = ostart + output->pos;
3660 U32 someMoreWork = 1;
3661
3662 /* check expectations */
3663 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
3664 assert(zcs->inBuff != NULL);
3665 assert(zcs->inBuffSize > 0);
3666 assert(zcs->outBuff != NULL);
3667 assert(zcs->outBuffSize > 0);
3668 assert(output->pos <= output->size);
3669 assert(input->pos <= input->size);
3670
3671 while (someMoreWork) {
3672 switch(zcs->streamStage)
3673 {
3674 case zcss_init:
3675 RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
3676
3677 case zcss_load:
3678 if ( (flushMode == ZSTD_e_end)
3679 && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */
3680 && (zcs->inBuffPos == 0) ) {
3681 /* shortcut to compression pass directly into output buffer */
3682 size_t const cSize = ZSTD_compressEnd(zcs,
3683 op, oend-op, ip, iend-ip);
3684 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
3685 FORWARD_IF_ERROR(cSize);
3686 ip = iend;
3687 op += cSize;
3688 zcs->frameEnded = 1;
3689 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
3690 someMoreWork = 0; break;
3691 }
3692 /* complete loading into inBuffer */
3693 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
3694 size_t const loaded = ZSTD_limitCopy(
3695 zcs->inBuff + zcs->inBuffPos, toLoad,
3696 ip, iend-ip);
3697 zcs->inBuffPos += loaded;
3698 ip += loaded;
3699 if ( (flushMode == ZSTD_e_continue)
3700 && (zcs->inBuffPos < zcs->inBuffTarget) ) {
3701 /* not enough input to fill full block : stop here */
3702 someMoreWork = 0; break;
3703 }
3704 if ( (flushMode == ZSTD_e_flush)
3705 && (zcs->inBuffPos == zcs->inToCompress) ) {
3706 /* empty */
3707 someMoreWork = 0; break;
3708 }
3709 }
3710 /* compress current block (note : this stage cannot be stopped in the middle) */
3711 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
3712 { void* cDst;
3713 size_t cSize;
3714 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
3715 size_t oSize = oend-op;
3716 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
3717 if (oSize >= ZSTD_compressBound(iSize))
3718 cDst = op; /* compress into output buffer, to skip flush stage */
3719 else
3720 cDst = zcs->outBuff, oSize = zcs->outBuffSize;
3721 cSize = lastBlock ?
3722 ZSTD_compressEnd(zcs, cDst, oSize,
3723 zcs->inBuff + zcs->inToCompress, iSize) :
3724 ZSTD_compressContinue(zcs, cDst, oSize,
3725 zcs->inBuff + zcs->inToCompress, iSize);
3726 FORWARD_IF_ERROR(cSize);
3727 zcs->frameEnded = lastBlock;
3728 /* prepare next block */
3729 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
3730 if (zcs->inBuffTarget > zcs->inBuffSize)
3731 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
3732 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
3733 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
3734 if (!lastBlock)
3735 assert(zcs->inBuffTarget <= zcs->inBuffSize);
3736 zcs->inToCompress = zcs->inBuffPos;
3737 if (cDst == op) { /* no need to flush */
3738 op += cSize;
3739 if (zcs->frameEnded) {
3740 DEBUGLOG(5, "Frame completed directly in outBuffer");
3741 someMoreWork = 0;
3742 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
3743 }
3744 break;
3745 }
3746 zcs->outBuffContentSize = cSize;
3747 zcs->outBuffFlushedSize = 0;
3748 zcs->streamStage = zcss_flush; /* pass-through to flush stage */
3749 }
3750 /* fall-through */
3751 case zcss_flush:
3752 DEBUGLOG(5, "flush stage");
3753 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3754 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
3755 zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3756 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
3757 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
3758 op += flushed;
3759 zcs->outBuffFlushedSize += flushed;
3760 if (toFlush!=flushed) {
3761 /* flush not fully completed, presumably because dst is too small */
3762 assert(op==oend);
3763 someMoreWork = 0;
3764 break;
3765 }
3766 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
3767 if (zcs->frameEnded) {
3768 DEBUGLOG(5, "Frame completed on flush");
3769 someMoreWork = 0;
3770 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
3771 break;
3772 }
3773 zcs->streamStage = zcss_load;
3774 break;
3775 }
3776
3777 default: /* impossible */
3778 assert(0);
3779 }
3780 }
3781
3782 input->pos = ip - istart;
3783 output->pos = op - ostart;
3784 if (zcs->frameEnded) return 0;
3785 return ZSTD_nextInputSizeHint(zcs);
3786 }
3787
ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx * cctx)3788 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
3789 {
3790 #ifdef ZSTD_MULTITHREAD
3791 if (cctx->appliedParams.nbWorkers >= 1) {
3792 assert(cctx->mtctx != NULL);
3793 return ZSTDMT_nextInputSizeHint(cctx->mtctx);
3794 }
3795 #endif
3796 return ZSTD_nextInputSizeHint(cctx);
3797
3798 }
3799
ZSTD_compressStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output,ZSTD_inBuffer * input)3800 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
3801 {
3802 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) );
3803 return ZSTD_nextInputSizeHint_MTorST(zcs);
3804 }
3805
3806
ZSTD_compressStream2(ZSTD_CCtx * cctx,ZSTD_outBuffer * output,ZSTD_inBuffer * input,ZSTD_EndDirective endOp)3807 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
3808 ZSTD_outBuffer* output,
3809 ZSTD_inBuffer* input,
3810 ZSTD_EndDirective endOp)
3811 {
3812 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
3813 /* check conditions */
3814 RETURN_ERROR_IF(output->pos > output->size, GENERIC);
3815 RETURN_ERROR_IF(input->pos > input->size, GENERIC);
3816 assert(cctx!=NULL);
3817
3818 /* transparent initialization stage */
3819 if (cctx->streamStage == zcss_init) {
3820 ZSTD_CCtx_params params = cctx->requestedParams;
3821 ZSTD_prefixDict const prefixDict = cctx->prefixDict;
3822 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */
3823 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
3824 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
3825 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
3826 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
3827 params.cParams = ZSTD_getCParamsFromCCtxParams(
3828 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
3829
3830
3831 #ifdef ZSTD_MULTITHREAD
3832 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
3833 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
3834 }
3835 if (params.nbWorkers > 0) {
3836 /* mt context creation */
3837 if (cctx->mtctx == NULL) {
3838 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
3839 params.nbWorkers);
3840 cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
3841 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation);
3842 }
3843 /* mt compression */
3844 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
3845 FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
3846 cctx->mtctx,
3847 prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent,
3848 cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
3849 cctx->streamStage = zcss_load;
3850 cctx->appliedParams.nbWorkers = params.nbWorkers;
3851 } else
3852 #endif
3853 { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
3854 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
3855 cctx->cdict,
3856 params, cctx->pledgedSrcSizePlusOne-1) );
3857 assert(cctx->streamStage == zcss_load);
3858 assert(cctx->appliedParams.nbWorkers == 0);
3859 } }
3860 /* end of transparent initialization stage */
3861
3862 /* compression stage */
3863 #ifdef ZSTD_MULTITHREAD
3864 if (cctx->appliedParams.nbWorkers > 0) {
3865 int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
3866 size_t flushMin;
3867 assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */);
3868 if (cctx->cParamsChanged) {
3869 ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
3870 cctx->cParamsChanged = 0;
3871 }
3872 do {
3873 flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
3874 if ( ZSTD_isError(flushMin)
3875 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
3876 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
3877 }
3878 FORWARD_IF_ERROR(flushMin);
3879 } while (forceMaxProgress && flushMin != 0 && output->pos < output->size);
3880 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
3881 /* Either we don't require maximum forward progress, we've finished the
3882 * flush, or we are out of output space.
3883 */
3884 assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size);
3885 return flushMin;
3886 }
3887 #endif
3888 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) );
3889 DEBUGLOG(5, "completed ZSTD_compressStream2");
3890 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
3891 }
3892
ZSTD_compressStream2_simpleArgs(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,size_t * dstPos,const void * src,size_t srcSize,size_t * srcPos,ZSTD_EndDirective endOp)3893 size_t ZSTD_compressStream2_simpleArgs (
3894 ZSTD_CCtx* cctx,
3895 void* dst, size_t dstCapacity, size_t* dstPos,
3896 const void* src, size_t srcSize, size_t* srcPos,
3897 ZSTD_EndDirective endOp)
3898 {
3899 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
3900 ZSTD_inBuffer input = { src, srcSize, *srcPos };
3901 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
3902 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
3903 *dstPos = output.pos;
3904 *srcPos = input.pos;
3905 return cErr;
3906 }
3907
ZSTD_compress2(ZSTD_CCtx * cctx,void * dst,size_t dstCapacity,const void * src,size_t srcSize)3908 size_t ZSTD_compress2(ZSTD_CCtx* cctx,
3909 void* dst, size_t dstCapacity,
3910 const void* src, size_t srcSize)
3911 {
3912 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
3913 { size_t oPos = 0;
3914 size_t iPos = 0;
3915 size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
3916 dst, dstCapacity, &oPos,
3917 src, srcSize, &iPos,
3918 ZSTD_e_end);
3919 FORWARD_IF_ERROR(result);
3920 if (result != 0) { /* compression not completed, due to lack of output space */
3921 assert(oPos == dstCapacity);
3922 RETURN_ERROR(dstSize_tooSmall);
3923 }
3924 assert(iPos == srcSize); /* all input is expected consumed */
3925 return oPos;
3926 }
3927 }
3928
3929 /*====== Finalize ======*/
3930
3931 /*! ZSTD_flushStream() :
3932 * @return : amount of data remaining to flush */
ZSTD_flushStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)3933 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3934 {
3935 ZSTD_inBuffer input = { NULL, 0, 0 };
3936 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
3937 }
3938
3939
ZSTD_endStream(ZSTD_CStream * zcs,ZSTD_outBuffer * output)3940 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3941 {
3942 ZSTD_inBuffer input = { NULL, 0, 0 };
3943 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
3944 FORWARD_IF_ERROR( remainingToFlush );
3945 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
3946 /* single thread mode : attempt to calculate remaining to flush more precisely */
3947 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
3948 size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
3949 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
3950 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
3951 return toFlush;
3952 }
3953 }
3954
3955
3956 /*-===== Pre-defined compression levels =====-*/
3957
3958 #define ZSTD_MAX_CLEVEL 22
ZSTD_maxCLevel(void)3959 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
ZSTD_minCLevel(void)3960 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
3961
3962 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3963 { /* "default" - for any srcSize > 256 KB */
3964 /* W, C, H, S, L, TL, strat */
3965 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
3966 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
3967 { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
3968 { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
3969 { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
3970 { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */
3971 { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
3972 { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */
3973 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3974 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
3975 { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3976 { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3977 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3978 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */
3979 { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
3980 { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
3981 { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
3982 { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
3983 { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
3984 { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
3985 { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
3986 { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
3987 { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
3988 },
3989 { /* for srcSize <= 256 KB */
3990 /* W, C, H, S, L, T, strat */
3991 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3992 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
3993 { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
3994 { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
3995 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
3996 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
3997 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
3998 { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
3999 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4000 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4001 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
4002 { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
4003 { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
4004 { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
4005 { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
4006 { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
4007 { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
4008 { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
4009 { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
4010 { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
4011 { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
4012 { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
4013 { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
4014 },
4015 { /* for srcSize <= 128 KB */
4016 /* W, C, H, S, L, T, strat */
4017 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
4018 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
4019 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
4020 { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
4021 { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
4022 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
4023 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
4024 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
4025 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4026 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4027 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
4028 { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
4029 { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
4030 { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
4031 { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
4032 { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
4033 { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
4034 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
4035 { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
4036 { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
4037 { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
4038 { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
4039 { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
4040 },
4041 { /* for srcSize <= 16 KB */
4042 /* W, C, H, S, L, T, strat */
4043 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
4044 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
4045 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
4046 { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
4047 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
4048 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
4049 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
4050 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
4051 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
4052 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
4053 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
4054 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
4055 { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
4056 { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
4057 { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
4058 { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
4059 { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
4060 { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
4061 { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
4062 { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
4063 { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
4064 { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
4065 { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
4066 },
4067 };
4068
4069 /*! ZSTD_getCParams() :
4070 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
4071 * Size values are optional, provide 0 if not known or unused */
ZSTD_getCParams(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize)4072 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
4073 {
4074 size_t const addedSize = srcSizeHint ? 0 : 500;
4075 U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */
4076 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
4077 int row = compressionLevel;
4078 DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel);
4079 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
4080 if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
4081 if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
4082 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
4083 if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
4084 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */
4085 }
4086 }
4087
4088 /*! ZSTD_getParams() :
4089 * same idea as ZSTD_getCParams()
4090 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
4091 * Fields of `ZSTD_frameParameters` are set to default values */
ZSTD_getParams(int compressionLevel,unsigned long long srcSizeHint,size_t dictSize)4092 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
4093 ZSTD_parameters params;
4094 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
4095 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
4096 memset(¶ms, 0, sizeof(params));
4097 params.cParams = cParams;
4098 params.fParams.contentSizeFlag = 1;
4099 return params;
4100 }
4101