1a0483764SConrad Meyer /*
2*5ff13fbcSAllan Jude * Copyright (c) Yann Collet, Facebook, Inc.
3a0483764SConrad Meyer * All rights reserved.
4a0483764SConrad Meyer *
5a0483764SConrad Meyer * This source code is licensed under both the BSD-style license (found in the
6a0483764SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a0483764SConrad Meyer * in the COPYING file in the root directory of this source tree).
8a0483764SConrad Meyer * You may select, at your option, one of the above-listed licenses.
9a0483764SConrad Meyer */
10a0483764SConrad Meyer
11a0483764SConrad Meyer /* zstd_decompress_block :
12a0483764SConrad Meyer * this module takes care of decompressing _compressed_ block */
13a0483764SConrad Meyer
14a0483764SConrad Meyer /*-*******************************************************
15a0483764SConrad Meyer * Dependencies
16a0483764SConrad Meyer *********************************************************/
17f7cd7fe5SConrad Meyer #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
1837f1f268SConrad Meyer #include "../common/compiler.h" /* prefetch */
1937f1f268SConrad Meyer #include "../common/cpu.h" /* bmi2 */
2037f1f268SConrad Meyer #include "../common/mem.h" /* low level memory routines */
21a0483764SConrad Meyer #define FSE_STATIC_LINKING_ONLY
2237f1f268SConrad Meyer #include "../common/fse.h"
23a0483764SConrad Meyer #define HUF_STATIC_LINKING_ONLY
2437f1f268SConrad Meyer #include "../common/huf.h"
2537f1f268SConrad Meyer #include "../common/zstd_internal.h"
26a0483764SConrad Meyer #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27a0483764SConrad Meyer #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28a0483764SConrad Meyer #include "zstd_decompress_block.h"
29a0483764SConrad Meyer
30a0483764SConrad Meyer /*_*******************************************************
31a0483764SConrad Meyer * Macros
32a0483764SConrad Meyer **********************************************************/
33a0483764SConrad Meyer
34a0483764SConrad Meyer /* These two optional macros force the use one way or another of the two
35a0483764SConrad Meyer * ZSTD_decompressSequences implementations. You can't force in both directions
36a0483764SConrad Meyer * at the same time.
37a0483764SConrad Meyer */
38a0483764SConrad Meyer #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39a0483764SConrad Meyer defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40a0483764SConrad Meyer #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41a0483764SConrad Meyer #endif
42a0483764SConrad Meyer
43a0483764SConrad Meyer
44a0483764SConrad Meyer /*_*******************************************************
45a0483764SConrad Meyer * Memory operations
46a0483764SConrad Meyer **********************************************************/
ZSTD_copy4(void * dst,const void * src)47f7cd7fe5SConrad Meyer static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48a0483764SConrad Meyer
49a0483764SConrad Meyer
50a0483764SConrad Meyer /*-*************************************************************
51a0483764SConrad Meyer * Block decoding
52a0483764SConrad Meyer ***************************************************************/
53a0483764SConrad Meyer
54a0483764SConrad Meyer /*! ZSTD_getcBlockSize() :
55a0483764SConrad Meyer * Provides the size of compressed block from block header `src` */
ZSTD_getcBlockSize(const void * src,size_t srcSize,blockProperties_t * bpPtr)56a0483764SConrad Meyer size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57a0483764SConrad Meyer blockProperties_t* bpPtr)
58a0483764SConrad Meyer {
5937f1f268SConrad Meyer RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
602b9c00cbSConrad Meyer
61a0483764SConrad Meyer { U32 const cBlockHeader = MEM_readLE24(src);
62a0483764SConrad Meyer U32 const cSize = cBlockHeader >> 3;
63a0483764SConrad Meyer bpPtr->lastBlock = cBlockHeader & 1;
64a0483764SConrad Meyer bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65a0483764SConrad Meyer bpPtr->origSize = cSize; /* only useful for RLE */
66a0483764SConrad Meyer if (bpPtr->blockType == bt_rle) return 1;
6737f1f268SConrad Meyer RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68a0483764SConrad Meyer return cSize;
69a0483764SConrad Meyer }
70a0483764SConrad Meyer }
71a0483764SConrad Meyer
72*5ff13fbcSAllan Jude /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
ZSTD_allocateLiteralsBuffer(ZSTD_DCtx * dctx,void * const dst,const size_t dstCapacity,const size_t litSize,const streaming_operation streaming,const size_t expectedWriteSize,const unsigned splitImmediately)73*5ff13fbcSAllan Jude static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
74*5ff13fbcSAllan Jude const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
75*5ff13fbcSAllan Jude {
76*5ff13fbcSAllan Jude if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
77*5ff13fbcSAllan Jude {
78*5ff13fbcSAllan Jude /* room for litbuffer to fit without read faulting */
79*5ff13fbcSAllan Jude dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
80*5ff13fbcSAllan Jude dctx->litBufferEnd = dctx->litBuffer + litSize;
81*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_in_dst;
82*5ff13fbcSAllan Jude }
83*5ff13fbcSAllan Jude else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
84*5ff13fbcSAllan Jude {
85*5ff13fbcSAllan Jude /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
86*5ff13fbcSAllan Jude if (splitImmediately) {
87*5ff13fbcSAllan Jude /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
88*5ff13fbcSAllan Jude dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
89*5ff13fbcSAllan Jude dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90*5ff13fbcSAllan Jude }
91*5ff13fbcSAllan Jude else {
92*5ff13fbcSAllan Jude /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
93*5ff13fbcSAllan Jude dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94*5ff13fbcSAllan Jude dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95*5ff13fbcSAllan Jude }
96*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_split;
97*5ff13fbcSAllan Jude }
98*5ff13fbcSAllan Jude else
99*5ff13fbcSAllan Jude {
100*5ff13fbcSAllan Jude /* fits entirely within litExtraBuffer, so no split is necessary */
101*5ff13fbcSAllan Jude dctx->litBuffer = dctx->litExtraBuffer;
102*5ff13fbcSAllan Jude dctx->litBufferEnd = dctx->litBuffer + litSize;
103*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_not_in_dst;
104*5ff13fbcSAllan Jude }
105*5ff13fbcSAllan Jude }
106a0483764SConrad Meyer
107a0483764SConrad Meyer /* Hidden declaration for fullbench */
108a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
109*5ff13fbcSAllan Jude const void* src, size_t srcSize,
110*5ff13fbcSAllan Jude void* dst, size_t dstCapacity, const streaming_operation streaming);
111a0483764SConrad Meyer /*! ZSTD_decodeLiteralsBlock() :
112*5ff13fbcSAllan Jude * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113*5ff13fbcSAllan Jude * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
114*5ff13fbcSAllan Jude * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115*5ff13fbcSAllan Jude * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
116*5ff13fbcSAllan Jude *
117a0483764SConrad Meyer * @return : nb of bytes read from src (< srcSize )
118a0483764SConrad Meyer * note : symbol not declared but exposed for fullbench */
ZSTD_decodeLiteralsBlock(ZSTD_DCtx * dctx,const void * src,size_t srcSize,void * dst,size_t dstCapacity,const streaming_operation streaming)119a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120*5ff13fbcSAllan Jude const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
121*5ff13fbcSAllan Jude void* dst, size_t dstCapacity, const streaming_operation streaming)
122a0483764SConrad Meyer {
1239cbefe25SConrad Meyer DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
12437f1f268SConrad Meyer RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
125a0483764SConrad Meyer
126a0483764SConrad Meyer { const BYTE* const istart = (const BYTE*) src;
127a0483764SConrad Meyer symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
128a0483764SConrad Meyer
129a0483764SConrad Meyer switch(litEncType)
130a0483764SConrad Meyer {
131a0483764SConrad Meyer case set_repeat:
1329cbefe25SConrad Meyer DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
13337f1f268SConrad Meyer RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
134*5ff13fbcSAllan Jude ZSTD_FALLTHROUGH;
135a0483764SConrad Meyer
136a0483764SConrad Meyer case set_compressed:
1372b9c00cbSConrad Meyer RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
138a0483764SConrad Meyer { size_t lhSize, litSize, litCSize;
139a0483764SConrad Meyer U32 singleStream=0;
140a0483764SConrad Meyer U32 const lhlCode = (istart[0] >> 2) & 3;
141a0483764SConrad Meyer U32 const lhc = MEM_readLE32(istart);
142a0483764SConrad Meyer size_t hufSuccess;
143*5ff13fbcSAllan Jude size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144a0483764SConrad Meyer switch(lhlCode)
145a0483764SConrad Meyer {
146a0483764SConrad Meyer case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
147a0483764SConrad Meyer /* 2 - 2 - 10 - 10 */
148a0483764SConrad Meyer singleStream = !lhlCode;
149a0483764SConrad Meyer lhSize = 3;
150a0483764SConrad Meyer litSize = (lhc >> 4) & 0x3FF;
151a0483764SConrad Meyer litCSize = (lhc >> 14) & 0x3FF;
152a0483764SConrad Meyer break;
153a0483764SConrad Meyer case 2:
154a0483764SConrad Meyer /* 2 - 2 - 14 - 14 */
155a0483764SConrad Meyer lhSize = 4;
156a0483764SConrad Meyer litSize = (lhc >> 4) & 0x3FFF;
157a0483764SConrad Meyer litCSize = lhc >> 18;
158a0483764SConrad Meyer break;
159a0483764SConrad Meyer case 3:
160a0483764SConrad Meyer /* 2 - 2 - 18 - 18 */
161a0483764SConrad Meyer lhSize = 5;
162a0483764SConrad Meyer litSize = (lhc >> 4) & 0x3FFFF;
1639cbefe25SConrad Meyer litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
164a0483764SConrad Meyer break;
165a0483764SConrad Meyer }
166*5ff13fbcSAllan Jude RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
16737f1f268SConrad Meyer RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
16837f1f268SConrad Meyer RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
169*5ff13fbcSAllan Jude RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
170*5ff13fbcSAllan Jude ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
171a0483764SConrad Meyer
172a0483764SConrad Meyer /* prefetch huffman table if cold */
173a0483764SConrad Meyer if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
174a0483764SConrad Meyer PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
175a0483764SConrad Meyer }
176a0483764SConrad Meyer
177a0483764SConrad Meyer if (litEncType==set_repeat) {
178a0483764SConrad Meyer if (singleStream) {
179a0483764SConrad Meyer hufSuccess = HUF_decompress1X_usingDTable_bmi2(
180a0483764SConrad Meyer dctx->litBuffer, litSize, istart+lhSize, litCSize,
181*5ff13fbcSAllan Jude dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
182a0483764SConrad Meyer } else {
183a0483764SConrad Meyer hufSuccess = HUF_decompress4X_usingDTable_bmi2(
184a0483764SConrad Meyer dctx->litBuffer, litSize, istart+lhSize, litCSize,
185*5ff13fbcSAllan Jude dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
186a0483764SConrad Meyer }
187a0483764SConrad Meyer } else {
188a0483764SConrad Meyer if (singleStream) {
189a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X2)
190a0483764SConrad Meyer hufSuccess = HUF_decompress1X_DCtx_wksp(
191a0483764SConrad Meyer dctx->entropy.hufTable, dctx->litBuffer, litSize,
192a0483764SConrad Meyer istart+lhSize, litCSize, dctx->workspace,
193a0483764SConrad Meyer sizeof(dctx->workspace));
194a0483764SConrad Meyer #else
195a0483764SConrad Meyer hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
196a0483764SConrad Meyer dctx->entropy.hufTable, dctx->litBuffer, litSize,
197a0483764SConrad Meyer istart+lhSize, litCSize, dctx->workspace,
198*5ff13fbcSAllan Jude sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
199a0483764SConrad Meyer #endif
200a0483764SConrad Meyer } else {
201a0483764SConrad Meyer hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
202a0483764SConrad Meyer dctx->entropy.hufTable, dctx->litBuffer, litSize,
203a0483764SConrad Meyer istart+lhSize, litCSize, dctx->workspace,
204*5ff13fbcSAllan Jude sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
205a0483764SConrad Meyer }
206a0483764SConrad Meyer }
207*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split)
208*5ff13fbcSAllan Jude {
209*5ff13fbcSAllan Jude ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
210*5ff13fbcSAllan Jude ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
211*5ff13fbcSAllan Jude dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
212*5ff13fbcSAllan Jude dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
213*5ff13fbcSAllan Jude }
214a0483764SConrad Meyer
21537f1f268SConrad Meyer RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
216a0483764SConrad Meyer
217a0483764SConrad Meyer dctx->litPtr = dctx->litBuffer;
218a0483764SConrad Meyer dctx->litSize = litSize;
219a0483764SConrad Meyer dctx->litEntropy = 1;
220a0483764SConrad Meyer if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
221a0483764SConrad Meyer return litCSize + lhSize;
222a0483764SConrad Meyer }
223a0483764SConrad Meyer
224a0483764SConrad Meyer case set_basic:
225a0483764SConrad Meyer { size_t litSize, lhSize;
226a0483764SConrad Meyer U32 const lhlCode = ((istart[0]) >> 2) & 3;
227*5ff13fbcSAllan Jude size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
228a0483764SConrad Meyer switch(lhlCode)
229a0483764SConrad Meyer {
230a0483764SConrad Meyer case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
231a0483764SConrad Meyer lhSize = 1;
232a0483764SConrad Meyer litSize = istart[0] >> 3;
233a0483764SConrad Meyer break;
234a0483764SConrad Meyer case 1:
235a0483764SConrad Meyer lhSize = 2;
236a0483764SConrad Meyer litSize = MEM_readLE16(istart) >> 4;
237a0483764SConrad Meyer break;
238a0483764SConrad Meyer case 3:
239a0483764SConrad Meyer lhSize = 3;
240a0483764SConrad Meyer litSize = MEM_readLE24(istart) >> 4;
241a0483764SConrad Meyer break;
242a0483764SConrad Meyer }
243a0483764SConrad Meyer
244*5ff13fbcSAllan Jude RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
245*5ff13fbcSAllan Jude RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
246*5ff13fbcSAllan Jude ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
247a0483764SConrad Meyer if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
24837f1f268SConrad Meyer RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
249*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split)
250*5ff13fbcSAllan Jude {
251*5ff13fbcSAllan Jude ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
252*5ff13fbcSAllan Jude ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
253*5ff13fbcSAllan Jude }
254*5ff13fbcSAllan Jude else
255*5ff13fbcSAllan Jude {
256f7cd7fe5SConrad Meyer ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
257*5ff13fbcSAllan Jude }
258a0483764SConrad Meyer dctx->litPtr = dctx->litBuffer;
259a0483764SConrad Meyer dctx->litSize = litSize;
260a0483764SConrad Meyer return lhSize+litSize;
261a0483764SConrad Meyer }
262a0483764SConrad Meyer /* direct reference into compressed stream */
263a0483764SConrad Meyer dctx->litPtr = istart+lhSize;
264a0483764SConrad Meyer dctx->litSize = litSize;
265*5ff13fbcSAllan Jude dctx->litBufferEnd = dctx->litPtr + litSize;
266*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_not_in_dst;
267a0483764SConrad Meyer return lhSize+litSize;
268a0483764SConrad Meyer }
269a0483764SConrad Meyer
270a0483764SConrad Meyer case set_rle:
271a0483764SConrad Meyer { U32 const lhlCode = ((istart[0]) >> 2) & 3;
272a0483764SConrad Meyer size_t litSize, lhSize;
273*5ff13fbcSAllan Jude size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
274a0483764SConrad Meyer switch(lhlCode)
275a0483764SConrad Meyer {
276a0483764SConrad Meyer case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
277a0483764SConrad Meyer lhSize = 1;
278a0483764SConrad Meyer litSize = istart[0] >> 3;
279a0483764SConrad Meyer break;
280a0483764SConrad Meyer case 1:
281a0483764SConrad Meyer lhSize = 2;
282a0483764SConrad Meyer litSize = MEM_readLE16(istart) >> 4;
283a0483764SConrad Meyer break;
284a0483764SConrad Meyer case 3:
285a0483764SConrad Meyer lhSize = 3;
286a0483764SConrad Meyer litSize = MEM_readLE24(istart) >> 4;
2872b9c00cbSConrad Meyer RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
288a0483764SConrad Meyer break;
289a0483764SConrad Meyer }
290*5ff13fbcSAllan Jude RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
29137f1f268SConrad Meyer RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
292*5ff13fbcSAllan Jude RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
293*5ff13fbcSAllan Jude ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
294*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split)
295*5ff13fbcSAllan Jude {
296*5ff13fbcSAllan Jude ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
297*5ff13fbcSAllan Jude ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
298*5ff13fbcSAllan Jude }
299*5ff13fbcSAllan Jude else
300*5ff13fbcSAllan Jude {
301*5ff13fbcSAllan Jude ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
302*5ff13fbcSAllan Jude }
303a0483764SConrad Meyer dctx->litPtr = dctx->litBuffer;
304a0483764SConrad Meyer dctx->litSize = litSize;
305a0483764SConrad Meyer return lhSize+1;
306a0483764SConrad Meyer }
307a0483764SConrad Meyer default:
3082b9c00cbSConrad Meyer RETURN_ERROR(corruption_detected, "impossible");
309a0483764SConrad Meyer }
310a0483764SConrad Meyer }
311a0483764SConrad Meyer }
312a0483764SConrad Meyer
313a0483764SConrad Meyer /* Default FSE distribution tables.
314a0483764SConrad Meyer * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
315f7cd7fe5SConrad Meyer * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
316a0483764SConrad Meyer * They were generated programmatically with following method :
317a0483764SConrad Meyer * - start from default distributions, present in /lib/common/zstd_internal.h
318a0483764SConrad Meyer * - generate tables normally, using ZSTD_buildFSETable()
319a0483764SConrad Meyer * - printout the content of tables
320a0483764SConrad Meyer * - pretify output, report below, test with fuzzer to ensure it's correct */
321a0483764SConrad Meyer
322a0483764SConrad Meyer /* Default FSE distribution table for Literal Lengths */
323a0483764SConrad Meyer static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
324a0483764SConrad Meyer { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
325a0483764SConrad Meyer /* nextState, nbAddBits, nbBits, baseVal */
326a0483764SConrad Meyer { 0, 0, 4, 0}, { 16, 0, 4, 0},
327a0483764SConrad Meyer { 32, 0, 5, 1}, { 0, 0, 5, 3},
328a0483764SConrad Meyer { 0, 0, 5, 4}, { 0, 0, 5, 6},
329a0483764SConrad Meyer { 0, 0, 5, 7}, { 0, 0, 5, 9},
330a0483764SConrad Meyer { 0, 0, 5, 10}, { 0, 0, 5, 12},
331a0483764SConrad Meyer { 0, 0, 6, 14}, { 0, 1, 5, 16},
332a0483764SConrad Meyer { 0, 1, 5, 20}, { 0, 1, 5, 22},
333a0483764SConrad Meyer { 0, 2, 5, 28}, { 0, 3, 5, 32},
334a0483764SConrad Meyer { 0, 4, 5, 48}, { 32, 6, 5, 64},
335a0483764SConrad Meyer { 0, 7, 5, 128}, { 0, 8, 6, 256},
336a0483764SConrad Meyer { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
337a0483764SConrad Meyer { 32, 0, 4, 0}, { 0, 0, 4, 1},
338a0483764SConrad Meyer { 0, 0, 5, 2}, { 32, 0, 5, 4},
339a0483764SConrad Meyer { 0, 0, 5, 5}, { 32, 0, 5, 7},
340a0483764SConrad Meyer { 0, 0, 5, 8}, { 32, 0, 5, 10},
341a0483764SConrad Meyer { 0, 0, 5, 11}, { 0, 0, 6, 13},
342a0483764SConrad Meyer { 32, 1, 5, 16}, { 0, 1, 5, 18},
343a0483764SConrad Meyer { 32, 1, 5, 22}, { 0, 2, 5, 24},
344a0483764SConrad Meyer { 32, 3, 5, 32}, { 0, 3, 5, 40},
345a0483764SConrad Meyer { 0, 6, 4, 64}, { 16, 6, 4, 64},
346a0483764SConrad Meyer { 32, 7, 5, 128}, { 0, 9, 6, 512},
347a0483764SConrad Meyer { 0, 11, 6, 2048}, { 48, 0, 4, 0},
348a0483764SConrad Meyer { 16, 0, 4, 1}, { 32, 0, 5, 2},
349a0483764SConrad Meyer { 32, 0, 5, 3}, { 32, 0, 5, 5},
350a0483764SConrad Meyer { 32, 0, 5, 6}, { 32, 0, 5, 8},
351a0483764SConrad Meyer { 32, 0, 5, 9}, { 32, 0, 5, 11},
352a0483764SConrad Meyer { 32, 0, 5, 12}, { 0, 0, 6, 15},
353a0483764SConrad Meyer { 32, 1, 5, 18}, { 32, 1, 5, 20},
354a0483764SConrad Meyer { 32, 2, 5, 24}, { 32, 2, 5, 28},
355a0483764SConrad Meyer { 32, 3, 5, 40}, { 32, 4, 5, 48},
356a0483764SConrad Meyer { 0, 16, 6,65536}, { 0, 15, 6,32768},
357a0483764SConrad Meyer { 0, 14, 6,16384}, { 0, 13, 6, 8192},
358a0483764SConrad Meyer }; /* LL_defaultDTable */
359a0483764SConrad Meyer
360a0483764SConrad Meyer /* Default FSE distribution table for Offset Codes */
361a0483764SConrad Meyer static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
362a0483764SConrad Meyer { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
363a0483764SConrad Meyer /* nextState, nbAddBits, nbBits, baseVal */
364a0483764SConrad Meyer { 0, 0, 5, 0}, { 0, 6, 4, 61},
365a0483764SConrad Meyer { 0, 9, 5, 509}, { 0, 15, 5,32765},
366a0483764SConrad Meyer { 0, 21, 5,2097149}, { 0, 3, 5, 5},
367a0483764SConrad Meyer { 0, 7, 4, 125}, { 0, 12, 5, 4093},
368a0483764SConrad Meyer { 0, 18, 5,262141}, { 0, 23, 5,8388605},
369a0483764SConrad Meyer { 0, 5, 5, 29}, { 0, 8, 4, 253},
370a0483764SConrad Meyer { 0, 14, 5,16381}, { 0, 20, 5,1048573},
371a0483764SConrad Meyer { 0, 2, 5, 1}, { 16, 7, 4, 125},
372a0483764SConrad Meyer { 0, 11, 5, 2045}, { 0, 17, 5,131069},
373a0483764SConrad Meyer { 0, 22, 5,4194301}, { 0, 4, 5, 13},
374a0483764SConrad Meyer { 16, 8, 4, 253}, { 0, 13, 5, 8189},
375a0483764SConrad Meyer { 0, 19, 5,524285}, { 0, 1, 5, 1},
376a0483764SConrad Meyer { 16, 6, 4, 61}, { 0, 10, 5, 1021},
377a0483764SConrad Meyer { 0, 16, 5,65533}, { 0, 28, 5,268435453},
378a0483764SConrad Meyer { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
379a0483764SConrad Meyer { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
380a0483764SConrad Meyer }; /* OF_defaultDTable */
381a0483764SConrad Meyer
382a0483764SConrad Meyer
383a0483764SConrad Meyer /* Default FSE distribution table for Match Lengths */
384a0483764SConrad Meyer static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
385a0483764SConrad Meyer { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
386a0483764SConrad Meyer /* nextState, nbAddBits, nbBits, baseVal */
387a0483764SConrad Meyer { 0, 0, 6, 3}, { 0, 0, 4, 4},
388a0483764SConrad Meyer { 32, 0, 5, 5}, { 0, 0, 5, 6},
389a0483764SConrad Meyer { 0, 0, 5, 8}, { 0, 0, 5, 9},
390a0483764SConrad Meyer { 0, 0, 5, 11}, { 0, 0, 6, 13},
391a0483764SConrad Meyer { 0, 0, 6, 16}, { 0, 0, 6, 19},
392a0483764SConrad Meyer { 0, 0, 6, 22}, { 0, 0, 6, 25},
393a0483764SConrad Meyer { 0, 0, 6, 28}, { 0, 0, 6, 31},
394a0483764SConrad Meyer { 0, 0, 6, 34}, { 0, 1, 6, 37},
395a0483764SConrad Meyer { 0, 1, 6, 41}, { 0, 2, 6, 47},
396a0483764SConrad Meyer { 0, 3, 6, 59}, { 0, 4, 6, 83},
397a0483764SConrad Meyer { 0, 7, 6, 131}, { 0, 9, 6, 515},
398a0483764SConrad Meyer { 16, 0, 4, 4}, { 0, 0, 4, 5},
399a0483764SConrad Meyer { 32, 0, 5, 6}, { 0, 0, 5, 7},
400a0483764SConrad Meyer { 32, 0, 5, 9}, { 0, 0, 5, 10},
401a0483764SConrad Meyer { 0, 0, 6, 12}, { 0, 0, 6, 15},
402a0483764SConrad Meyer { 0, 0, 6, 18}, { 0, 0, 6, 21},
403a0483764SConrad Meyer { 0, 0, 6, 24}, { 0, 0, 6, 27},
404a0483764SConrad Meyer { 0, 0, 6, 30}, { 0, 0, 6, 33},
405a0483764SConrad Meyer { 0, 1, 6, 35}, { 0, 1, 6, 39},
406a0483764SConrad Meyer { 0, 2, 6, 43}, { 0, 3, 6, 51},
407a0483764SConrad Meyer { 0, 4, 6, 67}, { 0, 5, 6, 99},
408a0483764SConrad Meyer { 0, 8, 6, 259}, { 32, 0, 4, 4},
409a0483764SConrad Meyer { 48, 0, 4, 4}, { 16, 0, 4, 5},
410a0483764SConrad Meyer { 32, 0, 5, 7}, { 32, 0, 5, 8},
411a0483764SConrad Meyer { 32, 0, 5, 10}, { 32, 0, 5, 11},
412a0483764SConrad Meyer { 0, 0, 6, 14}, { 0, 0, 6, 17},
413a0483764SConrad Meyer { 0, 0, 6, 20}, { 0, 0, 6, 23},
414a0483764SConrad Meyer { 0, 0, 6, 26}, { 0, 0, 6, 29},
415a0483764SConrad Meyer { 0, 0, 6, 32}, { 0, 16, 6,65539},
416a0483764SConrad Meyer { 0, 15, 6,32771}, { 0, 14, 6,16387},
417a0483764SConrad Meyer { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
418a0483764SConrad Meyer { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
419a0483764SConrad Meyer }; /* ML_defaultDTable */
420a0483764SConrad Meyer
421a0483764SConrad Meyer
ZSTD_buildSeqTable_rle(ZSTD_seqSymbol * dt,U32 baseValue,U8 nbAddBits)422*5ff13fbcSAllan Jude static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
423a0483764SConrad Meyer {
424a0483764SConrad Meyer void* ptr = dt;
425a0483764SConrad Meyer ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
426a0483764SConrad Meyer ZSTD_seqSymbol* const cell = dt + 1;
427a0483764SConrad Meyer
428a0483764SConrad Meyer DTableH->tableLog = 0;
429a0483764SConrad Meyer DTableH->fastMode = 0;
430a0483764SConrad Meyer
431a0483764SConrad Meyer cell->nbBits = 0;
432a0483764SConrad Meyer cell->nextState = 0;
433a0483764SConrad Meyer assert(nbAddBits < 255);
434*5ff13fbcSAllan Jude cell->nbAdditionalBits = nbAddBits;
435a0483764SConrad Meyer cell->baseValue = baseValue;
436a0483764SConrad Meyer }
437a0483764SConrad Meyer
438a0483764SConrad Meyer
439a0483764SConrad Meyer /* ZSTD_buildFSETable() :
440a0483764SConrad Meyer * generate FSE decoding table for one symbol (ll, ml or off)
441a0483764SConrad Meyer * cannot fail if input is valid =>
442a0483764SConrad Meyer * all inputs are presumed validated at this stage */
443f7cd7fe5SConrad Meyer FORCE_INLINE_TEMPLATE
ZSTD_buildFSETable_body(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)444f7cd7fe5SConrad Meyer void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
445a0483764SConrad Meyer const short* normalizedCounter, unsigned maxSymbolValue,
446*5ff13fbcSAllan Jude const U32* baseValue, const U8* nbAdditionalBits,
447f7cd7fe5SConrad Meyer unsigned tableLog, void* wksp, size_t wkspSize)
448a0483764SConrad Meyer {
449a0483764SConrad Meyer ZSTD_seqSymbol* const tableDecode = dt+1;
450a0483764SConrad Meyer U32 const maxSV1 = maxSymbolValue + 1;
451a0483764SConrad Meyer U32 const tableSize = 1 << tableLog;
452f7cd7fe5SConrad Meyer
453f7cd7fe5SConrad Meyer U16* symbolNext = (U16*)wksp;
454f7cd7fe5SConrad Meyer BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
455a0483764SConrad Meyer U32 highThreshold = tableSize - 1;
456a0483764SConrad Meyer
457f7cd7fe5SConrad Meyer
458a0483764SConrad Meyer /* Sanity Checks */
459a0483764SConrad Meyer assert(maxSymbolValue <= MaxSeq);
460a0483764SConrad Meyer assert(tableLog <= MaxFSELog);
461f7cd7fe5SConrad Meyer assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
462f7cd7fe5SConrad Meyer (void)wkspSize;
463a0483764SConrad Meyer /* Init, lay down lowprob symbols */
464a0483764SConrad Meyer { ZSTD_seqSymbol_header DTableH;
465a0483764SConrad Meyer DTableH.tableLog = tableLog;
466a0483764SConrad Meyer DTableH.fastMode = 1;
467a0483764SConrad Meyer { S16 const largeLimit= (S16)(1 << (tableLog-1));
468a0483764SConrad Meyer U32 s;
469a0483764SConrad Meyer for (s=0; s<maxSV1; s++) {
470a0483764SConrad Meyer if (normalizedCounter[s]==-1) {
471a0483764SConrad Meyer tableDecode[highThreshold--].baseValue = s;
472a0483764SConrad Meyer symbolNext[s] = 1;
473a0483764SConrad Meyer } else {
474a0483764SConrad Meyer if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
4759cbefe25SConrad Meyer assert(normalizedCounter[s]>=0);
4769cbefe25SConrad Meyer symbolNext[s] = (U16)normalizedCounter[s];
477a0483764SConrad Meyer } } }
478f7cd7fe5SConrad Meyer ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
479a0483764SConrad Meyer }
480a0483764SConrad Meyer
481a0483764SConrad Meyer /* Spread symbols */
482f7cd7fe5SConrad Meyer assert(tableSize <= 512);
483f7cd7fe5SConrad Meyer /* Specialized symbol spreading for the case when there are
484f7cd7fe5SConrad Meyer * no low probability (-1 count) symbols. When compressing
485f7cd7fe5SConrad Meyer * small blocks we avoid low probability symbols to hit this
486f7cd7fe5SConrad Meyer * case, since header decoding speed matters more.
487f7cd7fe5SConrad Meyer */
488f7cd7fe5SConrad Meyer if (highThreshold == tableSize - 1) {
489f7cd7fe5SConrad Meyer size_t const tableMask = tableSize-1;
490f7cd7fe5SConrad Meyer size_t const step = FSE_TABLESTEP(tableSize);
491f7cd7fe5SConrad Meyer /* First lay down the symbols in order.
492f7cd7fe5SConrad Meyer * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
493f7cd7fe5SConrad Meyer * misses since small blocks generally have small table logs, so nearly
494f7cd7fe5SConrad Meyer * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
495f7cd7fe5SConrad Meyer * our buffer to handle the over-write.
496f7cd7fe5SConrad Meyer */
497f7cd7fe5SConrad Meyer {
498f7cd7fe5SConrad Meyer U64 const add = 0x0101010101010101ull;
499f7cd7fe5SConrad Meyer size_t pos = 0;
500f7cd7fe5SConrad Meyer U64 sv = 0;
501f7cd7fe5SConrad Meyer U32 s;
502f7cd7fe5SConrad Meyer for (s=0; s<maxSV1; ++s, sv += add) {
503f7cd7fe5SConrad Meyer int i;
504f7cd7fe5SConrad Meyer int const n = normalizedCounter[s];
505f7cd7fe5SConrad Meyer MEM_write64(spread + pos, sv);
506f7cd7fe5SConrad Meyer for (i = 8; i < n; i += 8) {
507f7cd7fe5SConrad Meyer MEM_write64(spread + pos + i, sv);
508f7cd7fe5SConrad Meyer }
509f7cd7fe5SConrad Meyer pos += n;
510f7cd7fe5SConrad Meyer }
511f7cd7fe5SConrad Meyer }
512f7cd7fe5SConrad Meyer /* Now we spread those positions across the table.
513f7cd7fe5SConrad Meyer * The benefit of doing it in two stages is that we avoid the the
514f7cd7fe5SConrad Meyer * variable size inner loop, which caused lots of branch misses.
515f7cd7fe5SConrad Meyer * Now we can run through all the positions without any branch misses.
516f7cd7fe5SConrad Meyer * We unroll the loop twice, since that is what emperically worked best.
517f7cd7fe5SConrad Meyer */
518f7cd7fe5SConrad Meyer {
519f7cd7fe5SConrad Meyer size_t position = 0;
520f7cd7fe5SConrad Meyer size_t s;
521f7cd7fe5SConrad Meyer size_t const unroll = 2;
522f7cd7fe5SConrad Meyer assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
523f7cd7fe5SConrad Meyer for (s = 0; s < (size_t)tableSize; s += unroll) {
524f7cd7fe5SConrad Meyer size_t u;
525f7cd7fe5SConrad Meyer for (u = 0; u < unroll; ++u) {
526f7cd7fe5SConrad Meyer size_t const uPosition = (position + (u * step)) & tableMask;
527f7cd7fe5SConrad Meyer tableDecode[uPosition].baseValue = spread[s + u];
528f7cd7fe5SConrad Meyer }
529f7cd7fe5SConrad Meyer position = (position + (unroll * step)) & tableMask;
530f7cd7fe5SConrad Meyer }
531f7cd7fe5SConrad Meyer assert(position == 0);
532f7cd7fe5SConrad Meyer }
533f7cd7fe5SConrad Meyer } else {
534f7cd7fe5SConrad Meyer U32 const tableMask = tableSize-1;
535a0483764SConrad Meyer U32 const step = FSE_TABLESTEP(tableSize);
536a0483764SConrad Meyer U32 s, position = 0;
537a0483764SConrad Meyer for (s=0; s<maxSV1; s++) {
538a0483764SConrad Meyer int i;
539f7cd7fe5SConrad Meyer int const n = normalizedCounter[s];
540f7cd7fe5SConrad Meyer for (i=0; i<n; i++) {
541a0483764SConrad Meyer tableDecode[position].baseValue = s;
542a0483764SConrad Meyer position = (position + step) & tableMask;
543a0483764SConrad Meyer while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
544a0483764SConrad Meyer } }
545a0483764SConrad Meyer assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
546a0483764SConrad Meyer }
547a0483764SConrad Meyer
548a0483764SConrad Meyer /* Build Decoding table */
549f7cd7fe5SConrad Meyer {
550f7cd7fe5SConrad Meyer U32 u;
551a0483764SConrad Meyer for (u=0; u<tableSize; u++) {
552a0483764SConrad Meyer U32 const symbol = tableDecode[u].baseValue;
553a0483764SConrad Meyer U32 const nextState = symbolNext[symbol]++;
554a0483764SConrad Meyer tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
555a0483764SConrad Meyer tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
556a0483764SConrad Meyer assert(nbAdditionalBits[symbol] < 255);
557*5ff13fbcSAllan Jude tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
558a0483764SConrad Meyer tableDecode[u].baseValue = baseValue[symbol];
559f7cd7fe5SConrad Meyer }
560f7cd7fe5SConrad Meyer }
561f7cd7fe5SConrad Meyer }
562f7cd7fe5SConrad Meyer
563f7cd7fe5SConrad Meyer /* Avoids the FORCE_INLINE of the _body() function. */
ZSTD_buildFSETable_body_default(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)564f7cd7fe5SConrad Meyer static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
565f7cd7fe5SConrad Meyer const short* normalizedCounter, unsigned maxSymbolValue,
566*5ff13fbcSAllan Jude const U32* baseValue, const U8* nbAdditionalBits,
567f7cd7fe5SConrad Meyer unsigned tableLog, void* wksp, size_t wkspSize)
568f7cd7fe5SConrad Meyer {
569f7cd7fe5SConrad Meyer ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
570f7cd7fe5SConrad Meyer baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
571f7cd7fe5SConrad Meyer }
572f7cd7fe5SConrad Meyer
573f7cd7fe5SConrad Meyer #if DYNAMIC_BMI2
ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)574*5ff13fbcSAllan Jude BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
575f7cd7fe5SConrad Meyer const short* normalizedCounter, unsigned maxSymbolValue,
576*5ff13fbcSAllan Jude const U32* baseValue, const U8* nbAdditionalBits,
577f7cd7fe5SConrad Meyer unsigned tableLog, void* wksp, size_t wkspSize)
578f7cd7fe5SConrad Meyer {
579f7cd7fe5SConrad Meyer ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
580f7cd7fe5SConrad Meyer baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
581f7cd7fe5SConrad Meyer }
582f7cd7fe5SConrad Meyer #endif
583f7cd7fe5SConrad Meyer
ZSTD_buildFSETable(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize,int bmi2)584f7cd7fe5SConrad Meyer void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
585f7cd7fe5SConrad Meyer const short* normalizedCounter, unsigned maxSymbolValue,
586*5ff13fbcSAllan Jude const U32* baseValue, const U8* nbAdditionalBits,
587f7cd7fe5SConrad Meyer unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
588f7cd7fe5SConrad Meyer {
589f7cd7fe5SConrad Meyer #if DYNAMIC_BMI2
590f7cd7fe5SConrad Meyer if (bmi2) {
591f7cd7fe5SConrad Meyer ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
592f7cd7fe5SConrad Meyer baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
593f7cd7fe5SConrad Meyer return;
594f7cd7fe5SConrad Meyer }
595f7cd7fe5SConrad Meyer #endif
596f7cd7fe5SConrad Meyer (void)bmi2;
597f7cd7fe5SConrad Meyer ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
598f7cd7fe5SConrad Meyer baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
599a0483764SConrad Meyer }
600a0483764SConrad Meyer
601a0483764SConrad Meyer
602a0483764SConrad Meyer /*! ZSTD_buildSeqTable() :
603a0483764SConrad Meyer * @return : nb bytes read from src,
604a0483764SConrad Meyer * or an error code if it fails */
ZSTD_buildSeqTable(ZSTD_seqSymbol * DTableSpace,const ZSTD_seqSymbol ** DTablePtr,symbolEncodingType_e type,unsigned max,U32 maxLog,const void * src,size_t srcSize,const U32 * baseValue,const U8 * nbAdditionalBits,const ZSTD_seqSymbol * defaultTable,U32 flagRepeatTable,int ddictIsCold,int nbSeq,U32 * wksp,size_t wkspSize,int bmi2)605a0483764SConrad Meyer static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
606a0483764SConrad Meyer symbolEncodingType_e type, unsigned max, U32 maxLog,
607a0483764SConrad Meyer const void* src, size_t srcSize,
608*5ff13fbcSAllan Jude const U32* baseValue, const U8* nbAdditionalBits,
609a0483764SConrad Meyer const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
610f7cd7fe5SConrad Meyer int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
611f7cd7fe5SConrad Meyer int bmi2)
612a0483764SConrad Meyer {
613a0483764SConrad Meyer switch(type)
614a0483764SConrad Meyer {
615a0483764SConrad Meyer case set_rle :
61637f1f268SConrad Meyer RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
61737f1f268SConrad Meyer RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
618a0483764SConrad Meyer { U32 const symbol = *(const BYTE*)src;
619a0483764SConrad Meyer U32 const baseline = baseValue[symbol];
620*5ff13fbcSAllan Jude U8 const nbBits = nbAdditionalBits[symbol];
621a0483764SConrad Meyer ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
622a0483764SConrad Meyer }
623a0483764SConrad Meyer *DTablePtr = DTableSpace;
624a0483764SConrad Meyer return 1;
625a0483764SConrad Meyer case set_basic :
626a0483764SConrad Meyer *DTablePtr = defaultTable;
627a0483764SConrad Meyer return 0;
628a0483764SConrad Meyer case set_repeat:
62937f1f268SConrad Meyer RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
630a0483764SConrad Meyer /* prefetch FSE table if used */
631a0483764SConrad Meyer if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
632a0483764SConrad Meyer const void* const pStart = *DTablePtr;
633a0483764SConrad Meyer size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
634a0483764SConrad Meyer PREFETCH_AREA(pStart, pSize);
635a0483764SConrad Meyer }
636a0483764SConrad Meyer return 0;
637a0483764SConrad Meyer case set_compressed :
638a0483764SConrad Meyer { unsigned tableLog;
639a0483764SConrad Meyer S16 norm[MaxSeq+1];
640a0483764SConrad Meyer size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
64137f1f268SConrad Meyer RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
64237f1f268SConrad Meyer RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
643f7cd7fe5SConrad Meyer ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
644a0483764SConrad Meyer *DTablePtr = DTableSpace;
645a0483764SConrad Meyer return headerSize;
646a0483764SConrad Meyer }
6472b9c00cbSConrad Meyer default :
648a0483764SConrad Meyer assert(0);
6492b9c00cbSConrad Meyer RETURN_ERROR(GENERIC, "impossible");
650a0483764SConrad Meyer }
651a0483764SConrad Meyer }
652a0483764SConrad Meyer
ZSTD_decodeSeqHeaders(ZSTD_DCtx * dctx,int * nbSeqPtr,const void * src,size_t srcSize)653a0483764SConrad Meyer size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
654a0483764SConrad Meyer const void* src, size_t srcSize)
655a0483764SConrad Meyer {
656*5ff13fbcSAllan Jude const BYTE* const istart = (const BYTE*)src;
657a0483764SConrad Meyer const BYTE* const iend = istart + srcSize;
658a0483764SConrad Meyer const BYTE* ip = istart;
659a0483764SConrad Meyer int nbSeq;
660a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
661a0483764SConrad Meyer
662a0483764SConrad Meyer /* check */
66337f1f268SConrad Meyer RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
664a0483764SConrad Meyer
665a0483764SConrad Meyer /* SeqHead */
666a0483764SConrad Meyer nbSeq = *ip++;
667a0483764SConrad Meyer if (!nbSeq) {
668a0483764SConrad Meyer *nbSeqPtr=0;
66937f1f268SConrad Meyer RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
670a0483764SConrad Meyer return 1;
671a0483764SConrad Meyer }
672a0483764SConrad Meyer if (nbSeq > 0x7F) {
673a0483764SConrad Meyer if (nbSeq == 0xFF) {
67437f1f268SConrad Meyer RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
675f7cd7fe5SConrad Meyer nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
676f7cd7fe5SConrad Meyer ip+=2;
677a0483764SConrad Meyer } else {
67837f1f268SConrad Meyer RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
679a0483764SConrad Meyer nbSeq = ((nbSeq-0x80)<<8) + *ip++;
680a0483764SConrad Meyer }
681a0483764SConrad Meyer }
682a0483764SConrad Meyer *nbSeqPtr = nbSeq;
683a0483764SConrad Meyer
684a0483764SConrad Meyer /* FSE table descriptors */
68537f1f268SConrad Meyer RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
686a0483764SConrad Meyer { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
687a0483764SConrad Meyer symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
688a0483764SConrad Meyer symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
689a0483764SConrad Meyer ip++;
690a0483764SConrad Meyer
691a0483764SConrad Meyer /* Build DTables */
692a0483764SConrad Meyer { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
693a0483764SConrad Meyer LLtype, MaxLL, LLFSELog,
694a0483764SConrad Meyer ip, iend-ip,
695a0483764SConrad Meyer LL_base, LL_bits,
696a0483764SConrad Meyer LL_defaultDTable, dctx->fseEntropy,
697f7cd7fe5SConrad Meyer dctx->ddictIsCold, nbSeq,
698f7cd7fe5SConrad Meyer dctx->workspace, sizeof(dctx->workspace),
699*5ff13fbcSAllan Jude ZSTD_DCtx_get_bmi2(dctx));
70037f1f268SConrad Meyer RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
701a0483764SConrad Meyer ip += llhSize;
702a0483764SConrad Meyer }
703a0483764SConrad Meyer
704a0483764SConrad Meyer { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
705a0483764SConrad Meyer OFtype, MaxOff, OffFSELog,
706a0483764SConrad Meyer ip, iend-ip,
707a0483764SConrad Meyer OF_base, OF_bits,
708a0483764SConrad Meyer OF_defaultDTable, dctx->fseEntropy,
709f7cd7fe5SConrad Meyer dctx->ddictIsCold, nbSeq,
710f7cd7fe5SConrad Meyer dctx->workspace, sizeof(dctx->workspace),
711*5ff13fbcSAllan Jude ZSTD_DCtx_get_bmi2(dctx));
71237f1f268SConrad Meyer RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
713a0483764SConrad Meyer ip += ofhSize;
714a0483764SConrad Meyer }
715a0483764SConrad Meyer
716a0483764SConrad Meyer { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
717a0483764SConrad Meyer MLtype, MaxML, MLFSELog,
718a0483764SConrad Meyer ip, iend-ip,
719a0483764SConrad Meyer ML_base, ML_bits,
720a0483764SConrad Meyer ML_defaultDTable, dctx->fseEntropy,
721f7cd7fe5SConrad Meyer dctx->ddictIsCold, nbSeq,
722f7cd7fe5SConrad Meyer dctx->workspace, sizeof(dctx->workspace),
723*5ff13fbcSAllan Jude ZSTD_DCtx_get_bmi2(dctx));
72437f1f268SConrad Meyer RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
725a0483764SConrad Meyer ip += mlhSize;
726a0483764SConrad Meyer }
727a0483764SConrad Meyer }
728a0483764SConrad Meyer
729a0483764SConrad Meyer return ip-istart;
730a0483764SConrad Meyer }
731a0483764SConrad Meyer
732a0483764SConrad Meyer
733a0483764SConrad Meyer typedef struct {
734a0483764SConrad Meyer size_t litLength;
735a0483764SConrad Meyer size_t matchLength;
736a0483764SConrad Meyer size_t offset;
737a0483764SConrad Meyer } seq_t;
738a0483764SConrad Meyer
739a0483764SConrad Meyer typedef struct {
740a0483764SConrad Meyer size_t state;
741a0483764SConrad Meyer const ZSTD_seqSymbol* table;
742a0483764SConrad Meyer } ZSTD_fseState;
743a0483764SConrad Meyer
744a0483764SConrad Meyer typedef struct {
745a0483764SConrad Meyer BIT_DStream_t DStream;
746a0483764SConrad Meyer ZSTD_fseState stateLL;
747a0483764SConrad Meyer ZSTD_fseState stateOffb;
748a0483764SConrad Meyer ZSTD_fseState stateML;
749a0483764SConrad Meyer size_t prevOffset[ZSTD_REP_NUM];
750a0483764SConrad Meyer } seqState_t;
751a0483764SConrad Meyer
7529cbefe25SConrad Meyer /*! ZSTD_overlapCopy8() :
7539cbefe25SConrad Meyer * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
7549cbefe25SConrad Meyer * If the offset is < 8 then the offset is spread to at least 8 bytes.
7559cbefe25SConrad Meyer *
7569cbefe25SConrad Meyer * Precondition: *ip <= *op
7579cbefe25SConrad Meyer * Postcondition: *op - *op >= 8
7589cbefe25SConrad Meyer */
ZSTD_overlapCopy8(BYTE ** op,BYTE const ** ip,size_t offset)75937f1f268SConrad Meyer HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
7609cbefe25SConrad Meyer assert(*ip <= *op);
7619cbefe25SConrad Meyer if (offset < 8) {
7629cbefe25SConrad Meyer /* close range match, overlap */
7639cbefe25SConrad Meyer static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
7649cbefe25SConrad Meyer static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
7659cbefe25SConrad Meyer int const sub2 = dec64table[offset];
7669cbefe25SConrad Meyer (*op)[0] = (*ip)[0];
7679cbefe25SConrad Meyer (*op)[1] = (*ip)[1];
7689cbefe25SConrad Meyer (*op)[2] = (*ip)[2];
7699cbefe25SConrad Meyer (*op)[3] = (*ip)[3];
7709cbefe25SConrad Meyer *ip += dec32table[offset];
7719cbefe25SConrad Meyer ZSTD_copy4(*op+4, *ip);
7729cbefe25SConrad Meyer *ip -= sub2;
7739cbefe25SConrad Meyer } else {
7749cbefe25SConrad Meyer ZSTD_copy8(*op, *ip);
7759cbefe25SConrad Meyer }
7769cbefe25SConrad Meyer *ip += 8;
7779cbefe25SConrad Meyer *op += 8;
7789cbefe25SConrad Meyer assert(*op - *ip >= 8);
7799cbefe25SConrad Meyer }
780a0483764SConrad Meyer
7819cbefe25SConrad Meyer /*! ZSTD_safecopy() :
7829cbefe25SConrad Meyer * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
7839cbefe25SConrad Meyer * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
7849cbefe25SConrad Meyer * This function is only called in the uncommon case where the sequence is near the end of the block. It
7859cbefe25SConrad Meyer * should be fast for a single long sequence, but can be slow for several short sequences.
7869cbefe25SConrad Meyer *
7879cbefe25SConrad Meyer * @param ovtype controls the overlap detection
7889cbefe25SConrad Meyer * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
7899cbefe25SConrad Meyer * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
7909cbefe25SConrad Meyer * The src buffer must be before the dst buffer.
7919cbefe25SConrad Meyer */
ZSTD_safecopy(BYTE * op,const BYTE * const oend_w,BYTE const * ip,ptrdiff_t length,ZSTD_overlap_e ovtype)792*5ff13fbcSAllan Jude static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
7939cbefe25SConrad Meyer ptrdiff_t const diff = op - ip;
7949cbefe25SConrad Meyer BYTE* const oend = op + length;
7959cbefe25SConrad Meyer
7969cbefe25SConrad Meyer assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
7979cbefe25SConrad Meyer (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
7989cbefe25SConrad Meyer
7999cbefe25SConrad Meyer if (length < 8) {
8009cbefe25SConrad Meyer /* Handle short lengths. */
8019cbefe25SConrad Meyer while (op < oend) *op++ = *ip++;
8029cbefe25SConrad Meyer return;
8039cbefe25SConrad Meyer }
8049cbefe25SConrad Meyer if (ovtype == ZSTD_overlap_src_before_dst) {
8059cbefe25SConrad Meyer /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
8069cbefe25SConrad Meyer assert(length >= 8);
8079cbefe25SConrad Meyer ZSTD_overlapCopy8(&op, &ip, diff);
808*5ff13fbcSAllan Jude length -= 8;
8099cbefe25SConrad Meyer assert(op - ip >= 8);
8109cbefe25SConrad Meyer assert(op <= oend);
8119cbefe25SConrad Meyer }
8129cbefe25SConrad Meyer
8139cbefe25SConrad Meyer if (oend <= oend_w) {
8149cbefe25SConrad Meyer /* No risk of overwrite. */
8159cbefe25SConrad Meyer ZSTD_wildcopy(op, ip, length, ovtype);
8169cbefe25SConrad Meyer return;
8179cbefe25SConrad Meyer }
8189cbefe25SConrad Meyer if (op <= oend_w) {
8199cbefe25SConrad Meyer /* Wildcopy until we get close to the end. */
8209cbefe25SConrad Meyer assert(oend > oend_w);
8219cbefe25SConrad Meyer ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
8229cbefe25SConrad Meyer ip += oend_w - op;
823*5ff13fbcSAllan Jude op += oend_w - op;
8249cbefe25SConrad Meyer }
8259cbefe25SConrad Meyer /* Handle the leftovers. */
8269cbefe25SConrad Meyer while (op < oend) *op++ = *ip++;
8279cbefe25SConrad Meyer }
8289cbefe25SConrad Meyer
829*5ff13fbcSAllan Jude /* ZSTD_safecopyDstBeforeSrc():
830*5ff13fbcSAllan Jude * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
831*5ff13fbcSAllan Jude * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
ZSTD_safecopyDstBeforeSrc(BYTE * op,BYTE const * ip,ptrdiff_t length)832*5ff13fbcSAllan Jude static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
833*5ff13fbcSAllan Jude ptrdiff_t const diff = op - ip;
834*5ff13fbcSAllan Jude BYTE* const oend = op + length;
835*5ff13fbcSAllan Jude
836*5ff13fbcSAllan Jude if (length < 8 || diff > -8) {
837*5ff13fbcSAllan Jude /* Handle short lengths, close overlaps, and dst not before src. */
838*5ff13fbcSAllan Jude while (op < oend) *op++ = *ip++;
839*5ff13fbcSAllan Jude return;
840*5ff13fbcSAllan Jude }
841*5ff13fbcSAllan Jude
842*5ff13fbcSAllan Jude if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
843*5ff13fbcSAllan Jude ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
844*5ff13fbcSAllan Jude ip += oend - WILDCOPY_OVERLENGTH - op;
845*5ff13fbcSAllan Jude op += oend - WILDCOPY_OVERLENGTH - op;
846*5ff13fbcSAllan Jude }
847*5ff13fbcSAllan Jude
848*5ff13fbcSAllan Jude /* Handle the leftovers. */
849*5ff13fbcSAllan Jude while (op < oend) *op++ = *ip++;
850*5ff13fbcSAllan Jude }
851*5ff13fbcSAllan Jude
8529cbefe25SConrad Meyer /* ZSTD_execSequenceEnd():
8539cbefe25SConrad Meyer * This version handles cases that are near the end of the output buffer. It requires
8549cbefe25SConrad Meyer * more careful checks to make sure there is no overflow. By separating out these hard
8559cbefe25SConrad Meyer * and unlikely cases, we can speed up the common cases.
8569cbefe25SConrad Meyer *
8579cbefe25SConrad Meyer * NOTE: This function needs to be fast for a single long sequence, but doesn't need
8589cbefe25SConrad Meyer * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
8599cbefe25SConrad Meyer */
860a0483764SConrad Meyer FORCE_NOINLINE
ZSTD_execSequenceEnd(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)8619cbefe25SConrad Meyer size_t ZSTD_execSequenceEnd(BYTE* op,
862a0483764SConrad Meyer BYTE* const oend, seq_t sequence,
863a0483764SConrad Meyer const BYTE** litPtr, const BYTE* const litLimit,
8649cbefe25SConrad Meyer const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
865a0483764SConrad Meyer {
866a0483764SConrad Meyer BYTE* const oLitEnd = op + sequence.litLength;
867a0483764SConrad Meyer size_t const sequenceLength = sequence.litLength + sequence.matchLength;
868a0483764SConrad Meyer const BYTE* const iLitEnd = *litPtr + sequence.litLength;
869a0483764SConrad Meyer const BYTE* match = oLitEnd - sequence.offset;
8709cbefe25SConrad Meyer BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
871a0483764SConrad Meyer
87237f1f268SConrad Meyer /* bounds checks : careful of address space overflow in 32-bit mode */
87337f1f268SConrad Meyer RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
87437f1f268SConrad Meyer RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
87537f1f268SConrad Meyer assert(op < op + sequenceLength);
87637f1f268SConrad Meyer assert(oLitEnd < op + sequenceLength);
877a0483764SConrad Meyer
878a0483764SConrad Meyer /* copy literals */
8799cbefe25SConrad Meyer ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
8809cbefe25SConrad Meyer op = oLitEnd;
8819cbefe25SConrad Meyer *litPtr = iLitEnd;
882a0483764SConrad Meyer
883a0483764SConrad Meyer /* copy Match */
8849cbefe25SConrad Meyer if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
885a0483764SConrad Meyer /* offset beyond prefix */
88637f1f268SConrad Meyer RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
8879cbefe25SConrad Meyer match = dictEnd - (prefixStart - match);
888a0483764SConrad Meyer if (match + sequence.matchLength <= dictEnd) {
889f7cd7fe5SConrad Meyer ZSTD_memmove(oLitEnd, match, sequence.matchLength);
890a0483764SConrad Meyer return sequenceLength;
891a0483764SConrad Meyer }
892a0483764SConrad Meyer /* span extDict & currentPrefixSegment */
893a0483764SConrad Meyer { size_t const length1 = dictEnd - match;
894f7cd7fe5SConrad Meyer ZSTD_memmove(oLitEnd, match, length1);
895a0483764SConrad Meyer op = oLitEnd + length1;
896a0483764SConrad Meyer sequence.matchLength -= length1;
8979cbefe25SConrad Meyer match = prefixStart;
898*5ff13fbcSAllan Jude }
899*5ff13fbcSAllan Jude }
900*5ff13fbcSAllan Jude ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
901*5ff13fbcSAllan Jude return sequenceLength;
902*5ff13fbcSAllan Jude }
903*5ff13fbcSAllan Jude
904*5ff13fbcSAllan Jude /* ZSTD_execSequenceEndSplitLitBuffer():
905*5ff13fbcSAllan Jude * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
906*5ff13fbcSAllan Jude */
907*5ff13fbcSAllan Jude FORCE_NOINLINE
ZSTD_execSequenceEndSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)908*5ff13fbcSAllan Jude size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
909*5ff13fbcSAllan Jude BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
910*5ff13fbcSAllan Jude const BYTE** litPtr, const BYTE* const litLimit,
911*5ff13fbcSAllan Jude const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912*5ff13fbcSAllan Jude {
913*5ff13fbcSAllan Jude BYTE* const oLitEnd = op + sequence.litLength;
914*5ff13fbcSAllan Jude size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915*5ff13fbcSAllan Jude const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916*5ff13fbcSAllan Jude const BYTE* match = oLitEnd - sequence.offset;
917*5ff13fbcSAllan Jude
918*5ff13fbcSAllan Jude
919*5ff13fbcSAllan Jude /* bounds checks : careful of address space overflow in 32-bit mode */
920*5ff13fbcSAllan Jude RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921*5ff13fbcSAllan Jude RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922*5ff13fbcSAllan Jude assert(op < op + sequenceLength);
923*5ff13fbcSAllan Jude assert(oLitEnd < op + sequenceLength);
924*5ff13fbcSAllan Jude
925*5ff13fbcSAllan Jude /* copy literals */
926*5ff13fbcSAllan Jude RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
927*5ff13fbcSAllan Jude ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
928*5ff13fbcSAllan Jude op = oLitEnd;
929*5ff13fbcSAllan Jude *litPtr = iLitEnd;
930*5ff13fbcSAllan Jude
931*5ff13fbcSAllan Jude /* copy Match */
932*5ff13fbcSAllan Jude if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
933*5ff13fbcSAllan Jude /* offset beyond prefix */
934*5ff13fbcSAllan Jude RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
935*5ff13fbcSAllan Jude match = dictEnd - (prefixStart - match);
936*5ff13fbcSAllan Jude if (match + sequence.matchLength <= dictEnd) {
937*5ff13fbcSAllan Jude ZSTD_memmove(oLitEnd, match, sequence.matchLength);
938*5ff13fbcSAllan Jude return sequenceLength;
939*5ff13fbcSAllan Jude }
940*5ff13fbcSAllan Jude /* span extDict & currentPrefixSegment */
941*5ff13fbcSAllan Jude { size_t const length1 = dictEnd - match;
942*5ff13fbcSAllan Jude ZSTD_memmove(oLitEnd, match, length1);
943*5ff13fbcSAllan Jude op = oLitEnd + length1;
944*5ff13fbcSAllan Jude sequence.matchLength -= length1;
945*5ff13fbcSAllan Jude match = prefixStart;
946*5ff13fbcSAllan Jude }
947*5ff13fbcSAllan Jude }
9489cbefe25SConrad Meyer ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
949a0483764SConrad Meyer return sequenceLength;
950a0483764SConrad Meyer }
951a0483764SConrad Meyer
952a0483764SConrad Meyer HINT_INLINE
ZSTD_execSequence(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)953a0483764SConrad Meyer size_t ZSTD_execSequence(BYTE* op,
954a0483764SConrad Meyer BYTE* const oend, seq_t sequence,
955a0483764SConrad Meyer const BYTE** litPtr, const BYTE* const litLimit,
956a0483764SConrad Meyer const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
957a0483764SConrad Meyer {
958a0483764SConrad Meyer BYTE* const oLitEnd = op + sequence.litLength;
959a0483764SConrad Meyer size_t const sequenceLength = sequence.litLength + sequence.matchLength;
960a0483764SConrad Meyer BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
96137f1f268SConrad Meyer BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
962a0483764SConrad Meyer const BYTE* const iLitEnd = *litPtr + sequence.litLength;
963a0483764SConrad Meyer const BYTE* match = oLitEnd - sequence.offset;
964a0483764SConrad Meyer
96537f1f268SConrad Meyer assert(op != NULL /* Precondition */);
96637f1f268SConrad Meyer assert(oend_w < oend /* No underflow */);
96737f1f268SConrad Meyer /* Handle edge cases in a slow path:
96837f1f268SConrad Meyer * - Read beyond end of literals
96937f1f268SConrad Meyer * - Match end is within WILDCOPY_OVERLIMIT of oend
97037f1f268SConrad Meyer * - 32-bit mode and the match length overflows
97137f1f268SConrad Meyer */
97237f1f268SConrad Meyer if (UNLIKELY(
97337f1f268SConrad Meyer iLitEnd > litLimit ||
97437f1f268SConrad Meyer oMatchEnd > oend_w ||
97537f1f268SConrad Meyer (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
9769cbefe25SConrad Meyer return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
977a0483764SConrad Meyer
9789cbefe25SConrad Meyer /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
97937f1f268SConrad Meyer assert(op <= oLitEnd /* No overflow */);
98037f1f268SConrad Meyer assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
98137f1f268SConrad Meyer assert(oMatchEnd <= oend /* No underflow */);
9829cbefe25SConrad Meyer assert(iLitEnd <= litLimit /* Literal length is in bounds */);
9839cbefe25SConrad Meyer assert(oLitEnd <= oend_w /* Can wildcopy literals */);
9849cbefe25SConrad Meyer assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
9859cbefe25SConrad Meyer
9869cbefe25SConrad Meyer /* Copy Literals:
9879cbefe25SConrad Meyer * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
9889cbefe25SConrad Meyer * We likely don't need the full 32-byte wildcopy.
9899cbefe25SConrad Meyer */
9909cbefe25SConrad Meyer assert(WILDCOPY_OVERLENGTH >= 16);
9919cbefe25SConrad Meyer ZSTD_copy16(op, (*litPtr));
99237f1f268SConrad Meyer if (UNLIKELY(sequence.litLength > 16)) {
9939cbefe25SConrad Meyer ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
9949cbefe25SConrad Meyer }
995a0483764SConrad Meyer op = oLitEnd;
996a0483764SConrad Meyer *litPtr = iLitEnd; /* update for next sequence */
997a0483764SConrad Meyer
9989cbefe25SConrad Meyer /* Copy Match */
999a0483764SConrad Meyer if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1000a0483764SConrad Meyer /* offset beyond prefix -> go into extDict */
100137f1f268SConrad Meyer RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1002a0483764SConrad Meyer match = dictEnd + (match - prefixStart);
1003a0483764SConrad Meyer if (match + sequence.matchLength <= dictEnd) {
1004f7cd7fe5SConrad Meyer ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1005a0483764SConrad Meyer return sequenceLength;
1006a0483764SConrad Meyer }
1007a0483764SConrad Meyer /* span extDict & currentPrefixSegment */
1008a0483764SConrad Meyer { size_t const length1 = dictEnd - match;
1009f7cd7fe5SConrad Meyer ZSTD_memmove(oLitEnd, match, length1);
1010a0483764SConrad Meyer op = oLitEnd + length1;
1011a0483764SConrad Meyer sequence.matchLength -= length1;
1012a0483764SConrad Meyer match = prefixStart;
1013*5ff13fbcSAllan Jude }
1014*5ff13fbcSAllan Jude }
1015*5ff13fbcSAllan Jude /* Match within prefix of 1 or more bytes */
1016*5ff13fbcSAllan Jude assert(op <= oMatchEnd);
1017*5ff13fbcSAllan Jude assert(oMatchEnd <= oend_w);
1018*5ff13fbcSAllan Jude assert(match >= prefixStart);
1019*5ff13fbcSAllan Jude assert(sequence.matchLength >= 1);
1020*5ff13fbcSAllan Jude
1021*5ff13fbcSAllan Jude /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1022*5ff13fbcSAllan Jude * without overlap checking.
1023*5ff13fbcSAllan Jude */
1024*5ff13fbcSAllan Jude if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1025*5ff13fbcSAllan Jude /* We bet on a full wildcopy for matches, since we expect matches to be
1026*5ff13fbcSAllan Jude * longer than literals (in general). In silesia, ~10% of matches are longer
1027*5ff13fbcSAllan Jude * than 16 bytes.
1028*5ff13fbcSAllan Jude */
1029*5ff13fbcSAllan Jude ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1030*5ff13fbcSAllan Jude return sequenceLength;
1031*5ff13fbcSAllan Jude }
1032*5ff13fbcSAllan Jude assert(sequence.offset < WILDCOPY_VECLEN);
1033*5ff13fbcSAllan Jude
1034*5ff13fbcSAllan Jude /* Copy 8 bytes and spread the offset to be >= 8. */
1035*5ff13fbcSAllan Jude ZSTD_overlapCopy8(&op, &match, sequence.offset);
1036*5ff13fbcSAllan Jude
1037*5ff13fbcSAllan Jude /* If the match length is > 8 bytes, then continue with the wildcopy. */
1038*5ff13fbcSAllan Jude if (sequence.matchLength > 8) {
1039*5ff13fbcSAllan Jude assert(op < oMatchEnd);
1040*5ff13fbcSAllan Jude ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1041*5ff13fbcSAllan Jude }
1042*5ff13fbcSAllan Jude return sequenceLength;
1043*5ff13fbcSAllan Jude }
1044*5ff13fbcSAllan Jude
1045*5ff13fbcSAllan Jude HINT_INLINE
ZSTD_execSequenceSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)1046*5ff13fbcSAllan Jude size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1047*5ff13fbcSAllan Jude BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1048*5ff13fbcSAllan Jude const BYTE** litPtr, const BYTE* const litLimit,
1049*5ff13fbcSAllan Jude const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1050*5ff13fbcSAllan Jude {
1051*5ff13fbcSAllan Jude BYTE* const oLitEnd = op + sequence.litLength;
1052*5ff13fbcSAllan Jude size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1053*5ff13fbcSAllan Jude BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1054*5ff13fbcSAllan Jude const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1055*5ff13fbcSAllan Jude const BYTE* match = oLitEnd - sequence.offset;
1056*5ff13fbcSAllan Jude
1057*5ff13fbcSAllan Jude assert(op != NULL /* Precondition */);
1058*5ff13fbcSAllan Jude assert(oend_w < oend /* No underflow */);
1059*5ff13fbcSAllan Jude /* Handle edge cases in a slow path:
1060*5ff13fbcSAllan Jude * - Read beyond end of literals
1061*5ff13fbcSAllan Jude * - Match end is within WILDCOPY_OVERLIMIT of oend
1062*5ff13fbcSAllan Jude * - 32-bit mode and the match length overflows
1063*5ff13fbcSAllan Jude */
1064*5ff13fbcSAllan Jude if (UNLIKELY(
1065*5ff13fbcSAllan Jude iLitEnd > litLimit ||
1066*5ff13fbcSAllan Jude oMatchEnd > oend_w ||
1067*5ff13fbcSAllan Jude (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1068*5ff13fbcSAllan Jude return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1069*5ff13fbcSAllan Jude
1070*5ff13fbcSAllan Jude /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1071*5ff13fbcSAllan Jude assert(op <= oLitEnd /* No overflow */);
1072*5ff13fbcSAllan Jude assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1073*5ff13fbcSAllan Jude assert(oMatchEnd <= oend /* No underflow */);
1074*5ff13fbcSAllan Jude assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1075*5ff13fbcSAllan Jude assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1076*5ff13fbcSAllan Jude assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1077*5ff13fbcSAllan Jude
1078*5ff13fbcSAllan Jude /* Copy Literals:
1079*5ff13fbcSAllan Jude * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1080*5ff13fbcSAllan Jude * We likely don't need the full 32-byte wildcopy.
1081*5ff13fbcSAllan Jude */
1082*5ff13fbcSAllan Jude assert(WILDCOPY_OVERLENGTH >= 16);
1083*5ff13fbcSAllan Jude ZSTD_copy16(op, (*litPtr));
1084*5ff13fbcSAllan Jude if (UNLIKELY(sequence.litLength > 16)) {
1085*5ff13fbcSAllan Jude ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
1086*5ff13fbcSAllan Jude }
1087*5ff13fbcSAllan Jude op = oLitEnd;
1088*5ff13fbcSAllan Jude *litPtr = iLitEnd; /* update for next sequence */
1089*5ff13fbcSAllan Jude
1090*5ff13fbcSAllan Jude /* Copy Match */
1091*5ff13fbcSAllan Jude if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1092*5ff13fbcSAllan Jude /* offset beyond prefix -> go into extDict */
1093*5ff13fbcSAllan Jude RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1094*5ff13fbcSAllan Jude match = dictEnd + (match - prefixStart);
1095*5ff13fbcSAllan Jude if (match + sequence.matchLength <= dictEnd) {
1096*5ff13fbcSAllan Jude ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1097*5ff13fbcSAllan Jude return sequenceLength;
1098*5ff13fbcSAllan Jude }
1099*5ff13fbcSAllan Jude /* span extDict & currentPrefixSegment */
1100*5ff13fbcSAllan Jude { size_t const length1 = dictEnd - match;
1101*5ff13fbcSAllan Jude ZSTD_memmove(oLitEnd, match, length1);
1102*5ff13fbcSAllan Jude op = oLitEnd + length1;
1103*5ff13fbcSAllan Jude sequence.matchLength -= length1;
1104*5ff13fbcSAllan Jude match = prefixStart;
1105a0483764SConrad Meyer } }
11069cbefe25SConrad Meyer /* Match within prefix of 1 or more bytes */
11079cbefe25SConrad Meyer assert(op <= oMatchEnd);
11089cbefe25SConrad Meyer assert(oMatchEnd <= oend_w);
11099cbefe25SConrad Meyer assert(match >= prefixStart);
11109cbefe25SConrad Meyer assert(sequence.matchLength >= 1);
1111a0483764SConrad Meyer
11129cbefe25SConrad Meyer /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
11139cbefe25SConrad Meyer * without overlap checking.
11149cbefe25SConrad Meyer */
111537f1f268SConrad Meyer if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
11169cbefe25SConrad Meyer /* We bet on a full wildcopy for matches, since we expect matches to be
11179cbefe25SConrad Meyer * longer than literals (in general). In silesia, ~10% of matches are longer
11189cbefe25SConrad Meyer * than 16 bytes.
11199cbefe25SConrad Meyer */
11209cbefe25SConrad Meyer ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1121a0483764SConrad Meyer return sequenceLength;
1122a0483764SConrad Meyer }
11239cbefe25SConrad Meyer assert(sequence.offset < WILDCOPY_VECLEN);
1124a0483764SConrad Meyer
11259cbefe25SConrad Meyer /* Copy 8 bytes and spread the offset to be >= 8. */
11269cbefe25SConrad Meyer ZSTD_overlapCopy8(&op, &match, sequence.offset);
1127a0483764SConrad Meyer
11289cbefe25SConrad Meyer /* If the match length is > 8 bytes, then continue with the wildcopy. */
11299cbefe25SConrad Meyer if (sequence.matchLength > 8) {
11309cbefe25SConrad Meyer assert(op < oMatchEnd);
11319cbefe25SConrad Meyer ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
1132a0483764SConrad Meyer }
1133a0483764SConrad Meyer return sequenceLength;
1134a0483764SConrad Meyer }
1135a0483764SConrad Meyer
1136*5ff13fbcSAllan Jude
1137a0483764SConrad Meyer static void
ZSTD_initFseState(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,const ZSTD_seqSymbol * dt)1138a0483764SConrad Meyer ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1139a0483764SConrad Meyer {
1140a0483764SConrad Meyer const void* ptr = dt;
1141a0483764SConrad Meyer const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1142a0483764SConrad Meyer DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1143a0483764SConrad Meyer DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1144a0483764SConrad Meyer (U32)DStatePtr->state, DTableH->tableLog);
1145a0483764SConrad Meyer BIT_reloadDStream(bitD);
1146a0483764SConrad Meyer DStatePtr->table = dt + 1;
1147a0483764SConrad Meyer }
1148a0483764SConrad Meyer
1149a0483764SConrad Meyer FORCE_INLINE_TEMPLATE void
ZSTD_updateFseStateWithDInfo(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,U16 nextState,U32 nbBits)1150*5ff13fbcSAllan Jude ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1151a0483764SConrad Meyer {
1152a0483764SConrad Meyer size_t const lowBits = BIT_readBits(bitD, nbBits);
1153*5ff13fbcSAllan Jude DStatePtr->state = nextState + lowBits;
115437f1f268SConrad Meyer }
115537f1f268SConrad Meyer
1156a0483764SConrad Meyer /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1157a0483764SConrad Meyer * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1158a0483764SConrad Meyer * bits before reloading. This value is the maximum number of bytes we read
11592b9c00cbSConrad Meyer * after reloading when we are decoding long offsets.
1160a0483764SConrad Meyer */
1161a0483764SConrad Meyer #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1162a0483764SConrad Meyer (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1163a0483764SConrad Meyer ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1164a0483764SConrad Meyer : 0)
1165a0483764SConrad Meyer
1166a0483764SConrad Meyer typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1167a0483764SConrad Meyer
1168a0483764SConrad Meyer FORCE_INLINE_TEMPLATE seq_t
ZSTD_decodeSequence(seqState_t * seqState,const ZSTD_longOffset_e longOffsets)1169*5ff13fbcSAllan Jude ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1170a0483764SConrad Meyer {
1171a0483764SConrad Meyer seq_t seq;
1172*5ff13fbcSAllan Jude const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1173*5ff13fbcSAllan Jude const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1174*5ff13fbcSAllan Jude const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1175*5ff13fbcSAllan Jude seq.matchLength = mlDInfo->baseValue;
1176*5ff13fbcSAllan Jude seq.litLength = llDInfo->baseValue;
1177*5ff13fbcSAllan Jude { U32 const ofBase = ofDInfo->baseValue;
1178*5ff13fbcSAllan Jude BYTE const llBits = llDInfo->nbAdditionalBits;
1179*5ff13fbcSAllan Jude BYTE const mlBits = mlDInfo->nbAdditionalBits;
1180*5ff13fbcSAllan Jude BYTE const ofBits = ofDInfo->nbAdditionalBits;
118137f1f268SConrad Meyer BYTE const totalBits = llBits+mlBits+ofBits;
1182a0483764SConrad Meyer
1183*5ff13fbcSAllan Jude U16 const llNext = llDInfo->nextState;
1184*5ff13fbcSAllan Jude U16 const mlNext = mlDInfo->nextState;
1185*5ff13fbcSAllan Jude U16 const ofNext = ofDInfo->nextState;
1186*5ff13fbcSAllan Jude U32 const llnbBits = llDInfo->nbBits;
1187*5ff13fbcSAllan Jude U32 const mlnbBits = mlDInfo->nbBits;
1188*5ff13fbcSAllan Jude U32 const ofnbBits = ofDInfo->nbBits;
1189*5ff13fbcSAllan Jude /*
1190*5ff13fbcSAllan Jude * As gcc has better branch and block analyzers, sometimes it is only
1191*5ff13fbcSAllan Jude * valuable to mark likelyness for clang, it gives around 3-4% of
1192*5ff13fbcSAllan Jude * performance.
1193*5ff13fbcSAllan Jude */
1194*5ff13fbcSAllan Jude
1195a0483764SConrad Meyer /* sequence */
1196a0483764SConrad Meyer { size_t offset;
1197*5ff13fbcSAllan Jude #if defined(__clang__)
1198*5ff13fbcSAllan Jude if (LIKELY(ofBits > 1)) {
1199*5ff13fbcSAllan Jude #else
120037f1f268SConrad Meyer if (ofBits > 1) {
1201*5ff13fbcSAllan Jude #endif
1202a0483764SConrad Meyer ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1203a0483764SConrad Meyer ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1204a0483764SConrad Meyer assert(ofBits <= MaxOff);
1205a0483764SConrad Meyer if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1206a0483764SConrad Meyer U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1207a0483764SConrad Meyer offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1208a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream);
1209a0483764SConrad Meyer if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1210a0483764SConrad Meyer assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
1211a0483764SConrad Meyer } else {
1212a0483764SConrad Meyer offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1213a0483764SConrad Meyer if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1214a0483764SConrad Meyer }
121537f1f268SConrad Meyer seqState->prevOffset[2] = seqState->prevOffset[1];
121637f1f268SConrad Meyer seqState->prevOffset[1] = seqState->prevOffset[0];
121737f1f268SConrad Meyer seqState->prevOffset[0] = offset;
121837f1f268SConrad Meyer } else {
1219*5ff13fbcSAllan Jude U32 const ll0 = (llDInfo->baseValue == 0);
122037f1f268SConrad Meyer if (LIKELY((ofBits == 0))) {
1221*5ff13fbcSAllan Jude offset = seqState->prevOffset[ll0];
1222*5ff13fbcSAllan Jude seqState->prevOffset[1] = seqState->prevOffset[!ll0];
122337f1f268SConrad Meyer seqState->prevOffset[0] = offset;
122437f1f268SConrad Meyer } else {
122537f1f268SConrad Meyer offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
122637f1f268SConrad Meyer { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1227a0483764SConrad Meyer temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1228a0483764SConrad Meyer if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1229a0483764SConrad Meyer seqState->prevOffset[1] = seqState->prevOffset[0];
1230a0483764SConrad Meyer seqState->prevOffset[0] = offset = temp;
123137f1f268SConrad Meyer } } }
1232a0483764SConrad Meyer seq.offset = offset;
1233a0483764SConrad Meyer }
1234a0483764SConrad Meyer
1235*5ff13fbcSAllan Jude #if defined(__clang__)
1236*5ff13fbcSAllan Jude if (UNLIKELY(mlBits > 0))
1237*5ff13fbcSAllan Jude #else
123837f1f268SConrad Meyer if (mlBits > 0)
1239*5ff13fbcSAllan Jude #endif
124037f1f268SConrad Meyer seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
124137f1f268SConrad Meyer
1242a0483764SConrad Meyer if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1243a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream);
124437f1f268SConrad Meyer if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1245a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream);
1246a0483764SConrad Meyer /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1247a0483764SConrad Meyer ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1248a0483764SConrad Meyer
1249*5ff13fbcSAllan Jude #if defined(__clang__)
1250*5ff13fbcSAllan Jude if (UNLIKELY(llBits > 0))
1251*5ff13fbcSAllan Jude #else
125237f1f268SConrad Meyer if (llBits > 0)
1253*5ff13fbcSAllan Jude #endif
125437f1f268SConrad Meyer seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
125537f1f268SConrad Meyer
1256a0483764SConrad Meyer if (MEM_32bits())
1257a0483764SConrad Meyer BIT_reloadDStream(&seqState->DStream);
1258a0483764SConrad Meyer
1259a0483764SConrad Meyer DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1260a0483764SConrad Meyer (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1261a0483764SConrad Meyer
1262*5ff13fbcSAllan Jude ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
1263*5ff13fbcSAllan Jude ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
1264a0483764SConrad Meyer if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1265*5ff13fbcSAllan Jude ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
126637f1f268SConrad Meyer }
1267a0483764SConrad Meyer
1268a0483764SConrad Meyer return seq;
1269a0483764SConrad Meyer }
1270a0483764SConrad Meyer
127137f1f268SConrad Meyer #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1272f7cd7fe5SConrad Meyer MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
127337f1f268SConrad Meyer {
127437f1f268SConrad Meyer size_t const windowSize = dctx->fParams.windowSize;
127537f1f268SConrad Meyer /* No dictionary used. */
127637f1f268SConrad Meyer if (dctx->dictContentEndForFuzzing == NULL) return 0;
127737f1f268SConrad Meyer /* Dictionary is our prefix. */
127837f1f268SConrad Meyer if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
127937f1f268SConrad Meyer /* Dictionary is not our ext-dict. */
128037f1f268SConrad Meyer if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
128137f1f268SConrad Meyer /* Dictionary is not within our window size. */
128237f1f268SConrad Meyer if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
128337f1f268SConrad Meyer /* Dictionary is active. */
128437f1f268SConrad Meyer return 1;
128537f1f268SConrad Meyer }
128637f1f268SConrad Meyer
128737f1f268SConrad Meyer MEM_STATIC void ZSTD_assertValidSequence(
128837f1f268SConrad Meyer ZSTD_DCtx const* dctx,
128937f1f268SConrad Meyer BYTE const* op, BYTE const* oend,
129037f1f268SConrad Meyer seq_t const seq,
129137f1f268SConrad Meyer BYTE const* prefixStart, BYTE const* virtualStart)
129237f1f268SConrad Meyer {
1293f7cd7fe5SConrad Meyer #if DEBUGLEVEL >= 1
129437f1f268SConrad Meyer size_t const windowSize = dctx->fParams.windowSize;
129537f1f268SConrad Meyer size_t const sequenceSize = seq.litLength + seq.matchLength;
129637f1f268SConrad Meyer BYTE const* const oLitEnd = op + seq.litLength;
129737f1f268SConrad Meyer DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
129837f1f268SConrad Meyer (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
129937f1f268SConrad Meyer assert(op <= oend);
130037f1f268SConrad Meyer assert((size_t)(oend - op) >= sequenceSize);
130137f1f268SConrad Meyer assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
130237f1f268SConrad Meyer if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
130337f1f268SConrad Meyer size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
130437f1f268SConrad Meyer /* Offset must be within the dictionary. */
130537f1f268SConrad Meyer assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
130637f1f268SConrad Meyer assert(seq.offset <= windowSize + dictSize);
130737f1f268SConrad Meyer } else {
130837f1f268SConrad Meyer /* Offset must be within our window. */
130937f1f268SConrad Meyer assert(seq.offset <= windowSize);
131037f1f268SConrad Meyer }
1311f7cd7fe5SConrad Meyer #else
1312f7cd7fe5SConrad Meyer (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1313f7cd7fe5SConrad Meyer #endif
131437f1f268SConrad Meyer }
131537f1f268SConrad Meyer #endif
131637f1f268SConrad Meyer
131737f1f268SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1318*5ff13fbcSAllan Jude
1319*5ff13fbcSAllan Jude
1320a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t
13214d3f1eafSConrad Meyer DONT_VECTORIZE
1322*5ff13fbcSAllan Jude ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1323a0483764SConrad Meyer void* dst, size_t maxDstSize,
1324a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
132537f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
132637f1f268SConrad Meyer const int frame)
1327a0483764SConrad Meyer {
1328a0483764SConrad Meyer const BYTE* ip = (const BYTE*)seqStart;
1329a0483764SConrad Meyer const BYTE* const iend = ip + seqSize;
1330*5ff13fbcSAllan Jude BYTE* const ostart = (BYTE*)dst;
1331a0483764SConrad Meyer BYTE* const oend = ostart + maxDstSize;
1332a0483764SConrad Meyer BYTE* op = ostart;
1333a0483764SConrad Meyer const BYTE* litPtr = dctx->litPtr;
1334*5ff13fbcSAllan Jude const BYTE* litBufferEnd = dctx->litBufferEnd;
1335a0483764SConrad Meyer const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1336a0483764SConrad Meyer const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1337a0483764SConrad Meyer const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1338*5ff13fbcSAllan Jude DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
133937f1f268SConrad Meyer (void)frame;
1340a0483764SConrad Meyer
1341a0483764SConrad Meyer /* Regen sequences */
1342a0483764SConrad Meyer if (nbSeq) {
1343a0483764SConrad Meyer seqState_t seqState;
1344a0483764SConrad Meyer dctx->fseEntropy = 1;
1345a0483764SConrad Meyer { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
13462b9c00cbSConrad Meyer RETURN_ERROR_IF(
13472b9c00cbSConrad Meyer ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
134837f1f268SConrad Meyer corruption_detected, "");
1349a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1350a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1351a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
135237f1f268SConrad Meyer assert(dst != NULL);
1353a0483764SConrad Meyer
13544d3f1eafSConrad Meyer ZSTD_STATIC_ASSERT(
13554d3f1eafSConrad Meyer BIT_DStream_unfinished < BIT_DStream_completed &&
13564d3f1eafSConrad Meyer BIT_DStream_endOfBuffer < BIT_DStream_completed &&
13574d3f1eafSConrad Meyer BIT_DStream_completed < BIT_DStream_overflow);
13584d3f1eafSConrad Meyer
1359*5ff13fbcSAllan Jude /* decompress without overrunning litPtr begins */
1360*5ff13fbcSAllan Jude {
1361*5ff13fbcSAllan Jude seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
136237f1f268SConrad Meyer /* Align the decompression loop to 32 + 16 bytes.
136337f1f268SConrad Meyer *
136437f1f268SConrad Meyer * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
136537f1f268SConrad Meyer * speed swings based on the alignment of the decompression loop. This
136637f1f268SConrad Meyer * performance swing is caused by parts of the decompression loop falling
136737f1f268SConrad Meyer * out of the DSB. The entire decompression loop should fit in the DSB,
136837f1f268SConrad Meyer * when it can't we get much worse performance. You can measure if you've
136937f1f268SConrad Meyer * hit the good case or the bad case with this perf command for some
137037f1f268SConrad Meyer * compressed file test.zst:
137137f1f268SConrad Meyer *
137237f1f268SConrad Meyer * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
137337f1f268SConrad Meyer * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
137437f1f268SConrad Meyer *
137537f1f268SConrad Meyer * If you see most cycles served out of the MITE you've hit the bad case.
137637f1f268SConrad Meyer * If you see most cycles served out of the DSB you've hit the good case.
137737f1f268SConrad Meyer * If it is pretty even then you may be in an okay case.
137837f1f268SConrad Meyer *
1379*5ff13fbcSAllan Jude * This issue has been reproduced on the following CPUs:
138037f1f268SConrad Meyer * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
138137f1f268SConrad Meyer * Use Instruments->Counters to get DSB/MITE cycles.
138237f1f268SConrad Meyer * I never got performance swings, but I was able to
138337f1f268SConrad Meyer * go from the good case of mostly DSB to half of the
138437f1f268SConrad Meyer * cycles served from MITE.
138537f1f268SConrad Meyer * - Coffeelake: Intel i9-9900k
1386*5ff13fbcSAllan Jude * - Coffeelake: Intel i7-9700k
138737f1f268SConrad Meyer *
138837f1f268SConrad Meyer * I haven't been able to reproduce the instability or DSB misses on any
138937f1f268SConrad Meyer * of the following CPUS:
139037f1f268SConrad Meyer * - Haswell
139137f1f268SConrad Meyer * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
139237f1f268SConrad Meyer * - Skylake
139337f1f268SConrad Meyer *
1394*5ff13fbcSAllan Jude * Alignment is done for each of the three major decompression loops:
1395*5ff13fbcSAllan Jude * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1396*5ff13fbcSAllan Jude * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1397*5ff13fbcSAllan Jude * - ZSTD_decompressSequences_body
1398*5ff13fbcSAllan Jude * Alignment choices are made to minimize large swings on bad cases and influence on performance
1399*5ff13fbcSAllan Jude * from changes external to this code, rather than to overoptimize on the current commit.
1400*5ff13fbcSAllan Jude *
140137f1f268SConrad Meyer * If you are seeing performance stability this script can help test.
140237f1f268SConrad Meyer * It tests on 4 commits in zstd where I saw performance change.
140337f1f268SConrad Meyer *
140437f1f268SConrad Meyer * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
140537f1f268SConrad Meyer */
1406*5ff13fbcSAllan Jude #if defined(__GNUC__) && defined(__x86_64__)
1407*5ff13fbcSAllan Jude __asm__(".p2align 6");
1408*5ff13fbcSAllan Jude # if __GNUC__ >= 7
1409*5ff13fbcSAllan Jude /* good for gcc-7, gcc-9, and gcc-11 */
1410*5ff13fbcSAllan Jude __asm__("nop");
141137f1f268SConrad Meyer __asm__(".p2align 5");
141237f1f268SConrad Meyer __asm__("nop");
141337f1f268SConrad Meyer __asm__(".p2align 4");
1414*5ff13fbcSAllan Jude # if __GNUC__ == 8 || __GNUC__ == 10
1415*5ff13fbcSAllan Jude /* good for gcc-8 and gcc-10 */
1416*5ff13fbcSAllan Jude __asm__("nop");
1417*5ff13fbcSAllan Jude __asm__(".p2align 3");
141837f1f268SConrad Meyer # endif
1419*5ff13fbcSAllan Jude # endif
1420*5ff13fbcSAllan Jude #endif
1421*5ff13fbcSAllan Jude
1422*5ff13fbcSAllan Jude /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1423*5ff13fbcSAllan Jude for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
1424*5ff13fbcSAllan Jude size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1425*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1426*5ff13fbcSAllan Jude assert(!ZSTD_isError(oneSeqSize));
1427*5ff13fbcSAllan Jude if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1428*5ff13fbcSAllan Jude #endif
1429*5ff13fbcSAllan Jude if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1430*5ff13fbcSAllan Jude return oneSeqSize;
1431*5ff13fbcSAllan Jude DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1432*5ff13fbcSAllan Jude op += oneSeqSize;
1433*5ff13fbcSAllan Jude if (UNLIKELY(!--nbSeq))
1434*5ff13fbcSAllan Jude break;
1435*5ff13fbcSAllan Jude BIT_reloadDStream(&(seqState.DStream));
1436*5ff13fbcSAllan Jude sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1437*5ff13fbcSAllan Jude }
1438*5ff13fbcSAllan Jude
1439*5ff13fbcSAllan Jude /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1440*5ff13fbcSAllan Jude if (nbSeq > 0) {
1441*5ff13fbcSAllan Jude const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1442*5ff13fbcSAllan Jude if (leftoverLit)
1443*5ff13fbcSAllan Jude {
1444*5ff13fbcSAllan Jude RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1445*5ff13fbcSAllan Jude ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1446*5ff13fbcSAllan Jude sequence.litLength -= leftoverLit;
1447*5ff13fbcSAllan Jude op += leftoverLit;
1448*5ff13fbcSAllan Jude }
1449*5ff13fbcSAllan Jude litPtr = dctx->litExtraBuffer;
1450*5ff13fbcSAllan Jude litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1451*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_not_in_dst;
1452*5ff13fbcSAllan Jude {
1453*5ff13fbcSAllan Jude size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1454*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455*5ff13fbcSAllan Jude assert(!ZSTD_isError(oneSeqSize));
1456*5ff13fbcSAllan Jude if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1457*5ff13fbcSAllan Jude #endif
1458*5ff13fbcSAllan Jude if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1459*5ff13fbcSAllan Jude return oneSeqSize;
1460*5ff13fbcSAllan Jude DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1461*5ff13fbcSAllan Jude op += oneSeqSize;
1462*5ff13fbcSAllan Jude if (--nbSeq)
1463*5ff13fbcSAllan Jude BIT_reloadDStream(&(seqState.DStream));
1464*5ff13fbcSAllan Jude }
1465*5ff13fbcSAllan Jude }
1466*5ff13fbcSAllan Jude }
1467*5ff13fbcSAllan Jude
1468*5ff13fbcSAllan Jude if (nbSeq > 0) /* there is remaining lit from extra buffer */
1469*5ff13fbcSAllan Jude {
1470*5ff13fbcSAllan Jude
1471*5ff13fbcSAllan Jude #if defined(__GNUC__) && defined(__x86_64__)
1472*5ff13fbcSAllan Jude __asm__(".p2align 6");
1473*5ff13fbcSAllan Jude __asm__("nop");
1474*5ff13fbcSAllan Jude # if __GNUC__ != 7
1475*5ff13fbcSAllan Jude /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1476*5ff13fbcSAllan Jude __asm__(".p2align 4");
1477*5ff13fbcSAllan Jude __asm__("nop");
1478*5ff13fbcSAllan Jude __asm__(".p2align 3");
1479*5ff13fbcSAllan Jude # elif __GNUC__ >= 11
1480*5ff13fbcSAllan Jude __asm__(".p2align 3");
1481*5ff13fbcSAllan Jude # else
1482*5ff13fbcSAllan Jude __asm__(".p2align 5");
1483*5ff13fbcSAllan Jude __asm__("nop");
1484*5ff13fbcSAllan Jude __asm__(".p2align 3");
1485*5ff13fbcSAllan Jude # endif
1486*5ff13fbcSAllan Jude #endif
1487*5ff13fbcSAllan Jude
148837f1f268SConrad Meyer for (; ; ) {
1489*5ff13fbcSAllan Jude seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1490*5ff13fbcSAllan Jude size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1491*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1492*5ff13fbcSAllan Jude assert(!ZSTD_isError(oneSeqSize));
1493*5ff13fbcSAllan Jude if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1494*5ff13fbcSAllan Jude #endif
1495*5ff13fbcSAllan Jude if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1496*5ff13fbcSAllan Jude return oneSeqSize;
1497*5ff13fbcSAllan Jude DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1498*5ff13fbcSAllan Jude op += oneSeqSize;
1499*5ff13fbcSAllan Jude if (UNLIKELY(!--nbSeq))
1500*5ff13fbcSAllan Jude break;
1501*5ff13fbcSAllan Jude BIT_reloadDStream(&(seqState.DStream));
1502*5ff13fbcSAllan Jude }
1503*5ff13fbcSAllan Jude }
1504*5ff13fbcSAllan Jude
1505*5ff13fbcSAllan Jude /* check if reached exact end */
1506*5ff13fbcSAllan Jude DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1507*5ff13fbcSAllan Jude RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1508*5ff13fbcSAllan Jude RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1509*5ff13fbcSAllan Jude /* save reps for next block */
1510*5ff13fbcSAllan Jude { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1511*5ff13fbcSAllan Jude }
1512*5ff13fbcSAllan Jude
1513*5ff13fbcSAllan Jude /* last literal segment */
1514*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1515*5ff13fbcSAllan Jude {
1516*5ff13fbcSAllan Jude size_t const lastLLSize = litBufferEnd - litPtr;
1517*5ff13fbcSAllan Jude RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1518*5ff13fbcSAllan Jude if (op != NULL) {
1519*5ff13fbcSAllan Jude ZSTD_memmove(op, litPtr, lastLLSize);
1520*5ff13fbcSAllan Jude op += lastLLSize;
1521*5ff13fbcSAllan Jude }
1522*5ff13fbcSAllan Jude litPtr = dctx->litExtraBuffer;
1523*5ff13fbcSAllan Jude litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1524*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_not_in_dst;
1525*5ff13fbcSAllan Jude }
1526*5ff13fbcSAllan Jude { size_t const lastLLSize = litBufferEnd - litPtr;
1527*5ff13fbcSAllan Jude RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1528*5ff13fbcSAllan Jude if (op != NULL) {
1529*5ff13fbcSAllan Jude ZSTD_memcpy(op, litPtr, lastLLSize);
1530*5ff13fbcSAllan Jude op += lastLLSize;
1531*5ff13fbcSAllan Jude }
1532*5ff13fbcSAllan Jude }
1533*5ff13fbcSAllan Jude
1534*5ff13fbcSAllan Jude return op-ostart;
1535*5ff13fbcSAllan Jude }
1536*5ff13fbcSAllan Jude
1537*5ff13fbcSAllan Jude FORCE_INLINE_TEMPLATE size_t
1538*5ff13fbcSAllan Jude DONT_VECTORIZE
1539*5ff13fbcSAllan Jude ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1540*5ff13fbcSAllan Jude void* dst, size_t maxDstSize,
1541*5ff13fbcSAllan Jude const void* seqStart, size_t seqSize, int nbSeq,
1542*5ff13fbcSAllan Jude const ZSTD_longOffset_e isLongOffset,
1543*5ff13fbcSAllan Jude const int frame)
1544*5ff13fbcSAllan Jude {
1545*5ff13fbcSAllan Jude const BYTE* ip = (const BYTE*)seqStart;
1546*5ff13fbcSAllan Jude const BYTE* const iend = ip + seqSize;
1547*5ff13fbcSAllan Jude BYTE* const ostart = (BYTE*)dst;
1548*5ff13fbcSAllan Jude BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
1549*5ff13fbcSAllan Jude BYTE* op = ostart;
1550*5ff13fbcSAllan Jude const BYTE* litPtr = dctx->litPtr;
1551*5ff13fbcSAllan Jude const BYTE* const litEnd = litPtr + dctx->litSize;
1552*5ff13fbcSAllan Jude const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1553*5ff13fbcSAllan Jude const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1554*5ff13fbcSAllan Jude const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1555*5ff13fbcSAllan Jude DEBUGLOG(5, "ZSTD_decompressSequences_body");
1556*5ff13fbcSAllan Jude (void)frame;
1557*5ff13fbcSAllan Jude
1558*5ff13fbcSAllan Jude /* Regen sequences */
1559*5ff13fbcSAllan Jude if (nbSeq) {
1560*5ff13fbcSAllan Jude seqState_t seqState;
1561*5ff13fbcSAllan Jude dctx->fseEntropy = 1;
1562*5ff13fbcSAllan Jude { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1563*5ff13fbcSAllan Jude RETURN_ERROR_IF(
1564*5ff13fbcSAllan Jude ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1565*5ff13fbcSAllan Jude corruption_detected, "");
1566*5ff13fbcSAllan Jude ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1567*5ff13fbcSAllan Jude ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1568*5ff13fbcSAllan Jude ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1569*5ff13fbcSAllan Jude assert(dst != NULL);
1570*5ff13fbcSAllan Jude
1571*5ff13fbcSAllan Jude ZSTD_STATIC_ASSERT(
1572*5ff13fbcSAllan Jude BIT_DStream_unfinished < BIT_DStream_completed &&
1573*5ff13fbcSAllan Jude BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1574*5ff13fbcSAllan Jude BIT_DStream_completed < BIT_DStream_overflow);
1575*5ff13fbcSAllan Jude
1576*5ff13fbcSAllan Jude #if defined(__GNUC__) && defined(__x86_64__)
1577*5ff13fbcSAllan Jude __asm__(".p2align 6");
1578*5ff13fbcSAllan Jude __asm__("nop");
1579*5ff13fbcSAllan Jude # if __GNUC__ >= 7
1580*5ff13fbcSAllan Jude __asm__(".p2align 5");
1581*5ff13fbcSAllan Jude __asm__("nop");
1582*5ff13fbcSAllan Jude __asm__(".p2align 3");
1583*5ff13fbcSAllan Jude # else
1584*5ff13fbcSAllan Jude __asm__(".p2align 4");
1585*5ff13fbcSAllan Jude __asm__("nop");
1586*5ff13fbcSAllan Jude __asm__(".p2align 3");
1587*5ff13fbcSAllan Jude # endif
1588*5ff13fbcSAllan Jude #endif
1589*5ff13fbcSAllan Jude
1590*5ff13fbcSAllan Jude for ( ; ; ) {
1591*5ff13fbcSAllan Jude seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1592a0483764SConrad Meyer size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
159337f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
159437f1f268SConrad Meyer assert(!ZSTD_isError(oneSeqSize));
159537f1f268SConrad Meyer if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
159637f1f268SConrad Meyer #endif
1597*5ff13fbcSAllan Jude if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1598*5ff13fbcSAllan Jude return oneSeqSize;
1599a0483764SConrad Meyer DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1600f7cd7fe5SConrad Meyer op += oneSeqSize;
1601*5ff13fbcSAllan Jude if (UNLIKELY(!--nbSeq))
1602f7cd7fe5SConrad Meyer break;
1603*5ff13fbcSAllan Jude BIT_reloadDStream(&(seqState.DStream));
160437f1f268SConrad Meyer }
1605a0483764SConrad Meyer
1606a0483764SConrad Meyer /* check if reached exact end */
1607a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
160837f1f268SConrad Meyer RETURN_ERROR_IF(nbSeq, corruption_detected, "");
160937f1f268SConrad Meyer RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1610a0483764SConrad Meyer /* save reps for next block */
1611a0483764SConrad Meyer { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1612a0483764SConrad Meyer }
1613a0483764SConrad Meyer
1614a0483764SConrad Meyer /* last literal segment */
1615a0483764SConrad Meyer { size_t const lastLLSize = litEnd - litPtr;
161637f1f268SConrad Meyer RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
161737f1f268SConrad Meyer if (op != NULL) {
1618f7cd7fe5SConrad Meyer ZSTD_memcpy(op, litPtr, lastLLSize);
1619a0483764SConrad Meyer op += lastLLSize;
1620a0483764SConrad Meyer }
162137f1f268SConrad Meyer }
1622a0483764SConrad Meyer
1623a0483764SConrad Meyer return op-ostart;
1624a0483764SConrad Meyer }
1625a0483764SConrad Meyer
1626a0483764SConrad Meyer static size_t
1627a0483764SConrad Meyer ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1628a0483764SConrad Meyer void* dst, size_t maxDstSize,
1629a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
163037f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
163137f1f268SConrad Meyer const int frame)
1632a0483764SConrad Meyer {
163337f1f268SConrad Meyer return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1634a0483764SConrad Meyer }
1635*5ff13fbcSAllan Jude
1636*5ff13fbcSAllan Jude static size_t
1637*5ff13fbcSAllan Jude ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1638*5ff13fbcSAllan Jude void* dst, size_t maxDstSize,
1639*5ff13fbcSAllan Jude const void* seqStart, size_t seqSize, int nbSeq,
1640*5ff13fbcSAllan Jude const ZSTD_longOffset_e isLongOffset,
1641*5ff13fbcSAllan Jude const int frame)
1642*5ff13fbcSAllan Jude {
1643*5ff13fbcSAllan Jude return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1644*5ff13fbcSAllan Jude }
1645a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1646a0483764SConrad Meyer
1647a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1648*5ff13fbcSAllan Jude
1649*5ff13fbcSAllan Jude FORCE_INLINE_TEMPLATE size_t
1650*5ff13fbcSAllan Jude ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1651*5ff13fbcSAllan Jude const BYTE* const prefixStart, const BYTE* const dictEnd)
1652*5ff13fbcSAllan Jude {
1653*5ff13fbcSAllan Jude prefetchPos += sequence.litLength;
1654*5ff13fbcSAllan Jude { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1655*5ff13fbcSAllan Jude const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1656*5ff13fbcSAllan Jude * No consequence though : memory address is only used for prefetching, not for dereferencing */
1657*5ff13fbcSAllan Jude PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1658*5ff13fbcSAllan Jude }
1659*5ff13fbcSAllan Jude return prefetchPos + sequence.matchLength;
1660*5ff13fbcSAllan Jude }
1661*5ff13fbcSAllan Jude
1662*5ff13fbcSAllan Jude /* This decoding function employs prefetching
1663*5ff13fbcSAllan Jude * to reduce latency impact of cache misses.
1664*5ff13fbcSAllan Jude * It's generally employed when block contains a significant portion of long-distance matches
1665*5ff13fbcSAllan Jude * or when coupled with a "cold" dictionary */
1666a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t
1667a0483764SConrad Meyer ZSTD_decompressSequencesLong_body(
1668a0483764SConrad Meyer ZSTD_DCtx* dctx,
1669a0483764SConrad Meyer void* dst, size_t maxDstSize,
1670a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
167137f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
167237f1f268SConrad Meyer const int frame)
1673a0483764SConrad Meyer {
1674a0483764SConrad Meyer const BYTE* ip = (const BYTE*)seqStart;
1675a0483764SConrad Meyer const BYTE* const iend = ip + seqSize;
1676*5ff13fbcSAllan Jude BYTE* const ostart = (BYTE*)dst;
1677*5ff13fbcSAllan Jude BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
1678a0483764SConrad Meyer BYTE* op = ostart;
1679a0483764SConrad Meyer const BYTE* litPtr = dctx->litPtr;
1680*5ff13fbcSAllan Jude const BYTE* litBufferEnd = dctx->litBufferEnd;
1681a0483764SConrad Meyer const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1682a0483764SConrad Meyer const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1683a0483764SConrad Meyer const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
168437f1f268SConrad Meyer (void)frame;
1685a0483764SConrad Meyer
1686a0483764SConrad Meyer /* Regen sequences */
1687a0483764SConrad Meyer if (nbSeq) {
1688*5ff13fbcSAllan Jude #define STORED_SEQS 8
1689a0483764SConrad Meyer #define STORED_SEQS_MASK (STORED_SEQS-1)
1690*5ff13fbcSAllan Jude #define ADVANCED_SEQS STORED_SEQS
1691a0483764SConrad Meyer seq_t sequences[STORED_SEQS];
1692a0483764SConrad Meyer int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1693a0483764SConrad Meyer seqState_t seqState;
1694a0483764SConrad Meyer int seqNb;
1695*5ff13fbcSAllan Jude size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1696*5ff13fbcSAllan Jude
1697a0483764SConrad Meyer dctx->fseEntropy = 1;
1698a0483764SConrad Meyer { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
169937f1f268SConrad Meyer assert(dst != NULL);
1700a0483764SConrad Meyer assert(iend >= ip);
17012b9c00cbSConrad Meyer RETURN_ERROR_IF(
17022b9c00cbSConrad Meyer ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
170337f1f268SConrad Meyer corruption_detected, "");
1704a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1705a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1706a0483764SConrad Meyer ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1707a0483764SConrad Meyer
1708a0483764SConrad Meyer /* prepare in advance */
1709a0483764SConrad Meyer for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1710*5ff13fbcSAllan Jude seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1711*5ff13fbcSAllan Jude prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1712*5ff13fbcSAllan Jude sequences[seqNb] = sequence;
1713a0483764SConrad Meyer }
171437f1f268SConrad Meyer RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1715a0483764SConrad Meyer
1716*5ff13fbcSAllan Jude /* decompress without stomping litBuffer */
1717a0483764SConrad Meyer for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
1718*5ff13fbcSAllan Jude seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1719*5ff13fbcSAllan Jude size_t oneSeqSize;
1720*5ff13fbcSAllan Jude
1721*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
1722*5ff13fbcSAllan Jude {
1723*5ff13fbcSAllan Jude /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1724*5ff13fbcSAllan Jude const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1725*5ff13fbcSAllan Jude if (leftoverLit)
1726*5ff13fbcSAllan Jude {
1727*5ff13fbcSAllan Jude RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1728*5ff13fbcSAllan Jude ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1729*5ff13fbcSAllan Jude sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1730*5ff13fbcSAllan Jude op += leftoverLit;
1731*5ff13fbcSAllan Jude }
1732*5ff13fbcSAllan Jude litPtr = dctx->litExtraBuffer;
1733*5ff13fbcSAllan Jude litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1734*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_not_in_dst;
1735*5ff13fbcSAllan Jude oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
173637f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
173737f1f268SConrad Meyer assert(!ZSTD_isError(oneSeqSize));
173837f1f268SConrad Meyer if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
173937f1f268SConrad Meyer #endif
1740a0483764SConrad Meyer if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1741*5ff13fbcSAllan Jude
1742*5ff13fbcSAllan Jude prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1743a0483764SConrad Meyer sequences[seqNb & STORED_SEQS_MASK] = sequence;
1744a0483764SConrad Meyer op += oneSeqSize;
1745a0483764SConrad Meyer }
1746*5ff13fbcSAllan Jude else
1747*5ff13fbcSAllan Jude {
1748*5ff13fbcSAllan Jude /* lit buffer is either wholly contained in first or second split, or not split at all*/
1749*5ff13fbcSAllan Jude oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1750*5ff13fbcSAllan Jude ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1751*5ff13fbcSAllan Jude ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1752*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1753*5ff13fbcSAllan Jude assert(!ZSTD_isError(oneSeqSize));
1754*5ff13fbcSAllan Jude if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1755*5ff13fbcSAllan Jude #endif
1756*5ff13fbcSAllan Jude if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1757*5ff13fbcSAllan Jude
1758*5ff13fbcSAllan Jude prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1759*5ff13fbcSAllan Jude sequences[seqNb & STORED_SEQS_MASK] = sequence;
1760*5ff13fbcSAllan Jude op += oneSeqSize;
1761*5ff13fbcSAllan Jude }
1762*5ff13fbcSAllan Jude }
176337f1f268SConrad Meyer RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1764a0483764SConrad Meyer
1765a0483764SConrad Meyer /* finish queue */
1766a0483764SConrad Meyer seqNb -= seqAdvance;
1767a0483764SConrad Meyer for ( ; seqNb<nbSeq ; seqNb++) {
1768*5ff13fbcSAllan Jude seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1769*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
1770*5ff13fbcSAllan Jude {
1771*5ff13fbcSAllan Jude const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1772*5ff13fbcSAllan Jude if (leftoverLit)
1773*5ff13fbcSAllan Jude {
1774*5ff13fbcSAllan Jude RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1775*5ff13fbcSAllan Jude ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1776*5ff13fbcSAllan Jude sequence->litLength -= leftoverLit;
1777*5ff13fbcSAllan Jude op += leftoverLit;
1778*5ff13fbcSAllan Jude }
1779*5ff13fbcSAllan Jude litPtr = dctx->litExtraBuffer;
1780*5ff13fbcSAllan Jude litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1781*5ff13fbcSAllan Jude dctx->litBufferLocation = ZSTD_not_in_dst;
1782*5ff13fbcSAllan Jude {
1783*5ff13fbcSAllan Jude size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
178437f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
178537f1f268SConrad Meyer assert(!ZSTD_isError(oneSeqSize));
178637f1f268SConrad Meyer if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
178737f1f268SConrad Meyer #endif
1788a0483764SConrad Meyer if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1789a0483764SConrad Meyer op += oneSeqSize;
1790a0483764SConrad Meyer }
1791*5ff13fbcSAllan Jude }
1792*5ff13fbcSAllan Jude else
1793*5ff13fbcSAllan Jude {
1794*5ff13fbcSAllan Jude size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1795*5ff13fbcSAllan Jude ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1796*5ff13fbcSAllan Jude ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1797*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1798*5ff13fbcSAllan Jude assert(!ZSTD_isError(oneSeqSize));
1799*5ff13fbcSAllan Jude if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1800*5ff13fbcSAllan Jude #endif
1801*5ff13fbcSAllan Jude if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1802*5ff13fbcSAllan Jude op += oneSeqSize;
1803*5ff13fbcSAllan Jude }
1804*5ff13fbcSAllan Jude }
1805a0483764SConrad Meyer
1806a0483764SConrad Meyer /* save reps for next block */
1807a0483764SConrad Meyer { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1808a0483764SConrad Meyer }
1809a0483764SConrad Meyer
1810a0483764SConrad Meyer /* last literal segment */
1811*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
1812*5ff13fbcSAllan Jude {
1813*5ff13fbcSAllan Jude size_t const lastLLSize = litBufferEnd - litPtr;
181437f1f268SConrad Meyer RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
181537f1f268SConrad Meyer if (op != NULL) {
1816*5ff13fbcSAllan Jude ZSTD_memmove(op, litPtr, lastLLSize);
1817*5ff13fbcSAllan Jude op += lastLLSize;
1818*5ff13fbcSAllan Jude }
1819*5ff13fbcSAllan Jude litPtr = dctx->litExtraBuffer;
1820*5ff13fbcSAllan Jude litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1821*5ff13fbcSAllan Jude }
1822*5ff13fbcSAllan Jude { size_t const lastLLSize = litBufferEnd - litPtr;
1823*5ff13fbcSAllan Jude RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1824*5ff13fbcSAllan Jude if (op != NULL) {
1825*5ff13fbcSAllan Jude ZSTD_memmove(op, litPtr, lastLLSize);
1826a0483764SConrad Meyer op += lastLLSize;
1827a0483764SConrad Meyer }
182837f1f268SConrad Meyer }
1829a0483764SConrad Meyer
1830a0483764SConrad Meyer return op-ostart;
1831a0483764SConrad Meyer }
1832a0483764SConrad Meyer
1833a0483764SConrad Meyer static size_t
1834a0483764SConrad Meyer ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1835a0483764SConrad Meyer void* dst, size_t maxDstSize,
1836a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
183737f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
183837f1f268SConrad Meyer const int frame)
1839a0483764SConrad Meyer {
184037f1f268SConrad Meyer return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1841a0483764SConrad Meyer }
1842a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1843a0483764SConrad Meyer
1844a0483764SConrad Meyer
1845a0483764SConrad Meyer
1846a0483764SConrad Meyer #if DYNAMIC_BMI2
1847a0483764SConrad Meyer
1848a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1849*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
18504d3f1eafSConrad Meyer DONT_VECTORIZE
1851a0483764SConrad Meyer ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1852a0483764SConrad Meyer void* dst, size_t maxDstSize,
1853a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
185437f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
185537f1f268SConrad Meyer const int frame)
1856a0483764SConrad Meyer {
185737f1f268SConrad Meyer return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1858a0483764SConrad Meyer }
1859*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
1860*5ff13fbcSAllan Jude DONT_VECTORIZE
1861*5ff13fbcSAllan Jude ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1862*5ff13fbcSAllan Jude void* dst, size_t maxDstSize,
1863*5ff13fbcSAllan Jude const void* seqStart, size_t seqSize, int nbSeq,
1864*5ff13fbcSAllan Jude const ZSTD_longOffset_e isLongOffset,
1865*5ff13fbcSAllan Jude const int frame)
1866*5ff13fbcSAllan Jude {
1867*5ff13fbcSAllan Jude return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1868*5ff13fbcSAllan Jude }
1869a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1870a0483764SConrad Meyer
1871a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1872*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
1873a0483764SConrad Meyer ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1874a0483764SConrad Meyer void* dst, size_t maxDstSize,
1875a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
187637f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
187737f1f268SConrad Meyer const int frame)
1878a0483764SConrad Meyer {
187937f1f268SConrad Meyer return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1880a0483764SConrad Meyer }
1881a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1882a0483764SConrad Meyer
1883a0483764SConrad Meyer #endif /* DYNAMIC_BMI2 */
1884a0483764SConrad Meyer
1885a0483764SConrad Meyer typedef size_t (*ZSTD_decompressSequences_t)(
1886a0483764SConrad Meyer ZSTD_DCtx* dctx,
1887a0483764SConrad Meyer void* dst, size_t maxDstSize,
1888a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
188937f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
189037f1f268SConrad Meyer const int frame);
1891a0483764SConrad Meyer
1892a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1893a0483764SConrad Meyer static size_t
1894a0483764SConrad Meyer ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1895a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
189637f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
189737f1f268SConrad Meyer const int frame)
1898a0483764SConrad Meyer {
1899a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressSequences");
1900a0483764SConrad Meyer #if DYNAMIC_BMI2
1901*5ff13fbcSAllan Jude if (ZSTD_DCtx_get_bmi2(dctx)) {
190237f1f268SConrad Meyer return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1903a0483764SConrad Meyer }
1904a0483764SConrad Meyer #endif
190537f1f268SConrad Meyer return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1906a0483764SConrad Meyer }
1907*5ff13fbcSAllan Jude static size_t
1908*5ff13fbcSAllan Jude ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1909*5ff13fbcSAllan Jude const void* seqStart, size_t seqSize, int nbSeq,
1910*5ff13fbcSAllan Jude const ZSTD_longOffset_e isLongOffset,
1911*5ff13fbcSAllan Jude const int frame)
1912*5ff13fbcSAllan Jude {
1913*5ff13fbcSAllan Jude DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1914*5ff13fbcSAllan Jude #if DYNAMIC_BMI2
1915*5ff13fbcSAllan Jude if (ZSTD_DCtx_get_bmi2(dctx)) {
1916*5ff13fbcSAllan Jude return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1917*5ff13fbcSAllan Jude }
1918*5ff13fbcSAllan Jude #endif
1919*5ff13fbcSAllan Jude return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1920*5ff13fbcSAllan Jude }
1921a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1922a0483764SConrad Meyer
1923a0483764SConrad Meyer
1924a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1925a0483764SConrad Meyer /* ZSTD_decompressSequencesLong() :
1926a0483764SConrad Meyer * decompression function triggered when a minimum share of offsets is considered "long",
1927a0483764SConrad Meyer * aka out of cache.
19282b9c00cbSConrad Meyer * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1929a0483764SConrad Meyer * This function will try to mitigate main memory latency through the use of prefetching */
1930a0483764SConrad Meyer static size_t
1931a0483764SConrad Meyer ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1932a0483764SConrad Meyer void* dst, size_t maxDstSize,
1933a0483764SConrad Meyer const void* seqStart, size_t seqSize, int nbSeq,
193437f1f268SConrad Meyer const ZSTD_longOffset_e isLongOffset,
193537f1f268SConrad Meyer const int frame)
1936a0483764SConrad Meyer {
1937a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1938a0483764SConrad Meyer #if DYNAMIC_BMI2
1939*5ff13fbcSAllan Jude if (ZSTD_DCtx_get_bmi2(dctx)) {
194037f1f268SConrad Meyer return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1941a0483764SConrad Meyer }
1942a0483764SConrad Meyer #endif
194337f1f268SConrad Meyer return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1944a0483764SConrad Meyer }
1945a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1946a0483764SConrad Meyer
1947a0483764SConrad Meyer
1948a0483764SConrad Meyer
1949a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951a0483764SConrad Meyer /* ZSTD_getLongOffsetsShare() :
1952a0483764SConrad Meyer * condition : offTable must be valid
1953a0483764SConrad Meyer * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954a0483764SConrad Meyer * compared to maximum possible of (1<<OffFSELog) */
1955a0483764SConrad Meyer static unsigned
1956a0483764SConrad Meyer ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1957a0483764SConrad Meyer {
1958a0483764SConrad Meyer const void* ptr = offTable;
1959a0483764SConrad Meyer U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1960a0483764SConrad Meyer const ZSTD_seqSymbol* table = offTable + 1;
1961a0483764SConrad Meyer U32 const max = 1 << tableLog;
1962a0483764SConrad Meyer U32 u, total = 0;
1963a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1964a0483764SConrad Meyer
1965a0483764SConrad Meyer assert(max <= (1 << OffFSELog)); /* max not too large */
1966a0483764SConrad Meyer for (u=0; u<max; u++) {
1967a0483764SConrad Meyer if (table[u].nbAdditionalBits > 22) total += 1;
1968a0483764SConrad Meyer }
1969a0483764SConrad Meyer
1970a0483764SConrad Meyer assert(tableLog <= OffFSELog);
1971a0483764SConrad Meyer total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1972a0483764SConrad Meyer
1973a0483764SConrad Meyer return total;
1974a0483764SConrad Meyer }
1975a0483764SConrad Meyer #endif
1976a0483764SConrad Meyer
1977a0483764SConrad Meyer size_t
1978a0483764SConrad Meyer ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1979a0483764SConrad Meyer void* dst, size_t dstCapacity,
1980*5ff13fbcSAllan Jude const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
1981a0483764SConrad Meyer { /* blockType == blockCompressed */
1982a0483764SConrad Meyer const BYTE* ip = (const BYTE*)src;
1983a0483764SConrad Meyer /* isLongOffset must be true if there are long offsets.
1984a0483764SConrad Meyer * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985a0483764SConrad Meyer * We don't expect that to be the case in 64-bit mode.
1986a0483764SConrad Meyer * In block mode, window size is not known, so we have to be conservative.
1987a0483764SConrad Meyer * (note: but it could be evaluated from current-lowLimit)
1988a0483764SConrad Meyer */
1989a0483764SConrad Meyer ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1990a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1991a0483764SConrad Meyer
199237f1f268SConrad Meyer RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1993a0483764SConrad Meyer
1994a0483764SConrad Meyer /* Decode literals section */
1995*5ff13fbcSAllan Jude { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
1996a0483764SConrad Meyer DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1997a0483764SConrad Meyer if (ZSTD_isError(litCSize)) return litCSize;
1998a0483764SConrad Meyer ip += litCSize;
1999a0483764SConrad Meyer srcSize -= litCSize;
2000a0483764SConrad Meyer }
2001a0483764SConrad Meyer
2002a0483764SConrad Meyer /* Build Decoding Tables */
2003a0483764SConrad Meyer {
2004a0483764SConrad Meyer /* These macros control at build-time which decompressor implementation
2005a0483764SConrad Meyer * we use. If neither is defined, we do some inspection and dispatch at
2006a0483764SConrad Meyer * runtime.
2007a0483764SConrad Meyer */
2008a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010a0483764SConrad Meyer int usePrefetchDecoder = dctx->ddictIsCold;
2011a0483764SConrad Meyer #endif
2012a0483764SConrad Meyer int nbSeq;
2013a0483764SConrad Meyer size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2014a0483764SConrad Meyer if (ZSTD_isError(seqHSize)) return seqHSize;
2015a0483764SConrad Meyer ip += seqHSize;
2016a0483764SConrad Meyer srcSize -= seqHSize;
2017a0483764SConrad Meyer
201837f1f268SConrad Meyer RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
201937f1f268SConrad Meyer
2020a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022a0483764SConrad Meyer if ( !usePrefetchDecoder
2023a0483764SConrad Meyer && (!frame || (dctx->fParams.windowSize > (1<<24)))
2024a0483764SConrad Meyer && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
2025a0483764SConrad Meyer U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
2026a0483764SConrad Meyer U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2027a0483764SConrad Meyer usePrefetchDecoder = (shareLongOffsets >= minShare);
2028a0483764SConrad Meyer }
2029a0483764SConrad Meyer #endif
2030a0483764SConrad Meyer
2031a0483764SConrad Meyer dctx->ddictIsCold = 0;
2032a0483764SConrad Meyer
2033a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034a0483764SConrad Meyer !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035a0483764SConrad Meyer if (usePrefetchDecoder)
2036a0483764SConrad Meyer #endif
2037a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
203837f1f268SConrad Meyer return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2039a0483764SConrad Meyer #endif
2040a0483764SConrad Meyer
2041a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2042a0483764SConrad Meyer /* else */
2043*5ff13fbcSAllan Jude if (dctx->litBufferLocation == ZSTD_split)
2044*5ff13fbcSAllan Jude return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2045*5ff13fbcSAllan Jude else
204637f1f268SConrad Meyer return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2047a0483764SConrad Meyer #endif
2048a0483764SConrad Meyer }
2049a0483764SConrad Meyer }
2050a0483764SConrad Meyer
2051a0483764SConrad Meyer
2052*5ff13fbcSAllan Jude void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
205337f1f268SConrad Meyer {
2054*5ff13fbcSAllan Jude if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
205537f1f268SConrad Meyer dctx->dictEnd = dctx->previousDstEnd;
205637f1f268SConrad Meyer dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
205737f1f268SConrad Meyer dctx->prefixStart = dst;
205837f1f268SConrad Meyer dctx->previousDstEnd = dst;
205937f1f268SConrad Meyer }
206037f1f268SConrad Meyer }
206137f1f268SConrad Meyer
206237f1f268SConrad Meyer
2063a0483764SConrad Meyer size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2064a0483764SConrad Meyer void* dst, size_t dstCapacity,
2065a0483764SConrad Meyer const void* src, size_t srcSize)
2066a0483764SConrad Meyer {
2067a0483764SConrad Meyer size_t dSize;
2068*5ff13fbcSAllan Jude ZSTD_checkContinuity(dctx, dst, dstCapacity);
2069*5ff13fbcSAllan Jude dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
2070a0483764SConrad Meyer dctx->previousDstEnd = (char*)dst + dSize;
2071a0483764SConrad Meyer return dSize;
2072a0483764SConrad Meyer }
2073