1a0483764SConrad Meyer /*
2*5ff13fbcSAllan Jude  * Copyright (c) Yann Collet, Facebook, Inc.
3a0483764SConrad Meyer  * All rights reserved.
4a0483764SConrad Meyer  *
5a0483764SConrad Meyer  * This source code is licensed under both the BSD-style license (found in the
6a0483764SConrad Meyer  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a0483764SConrad Meyer  * in the COPYING file in the root directory of this source tree).
8a0483764SConrad Meyer  * You may select, at your option, one of the above-listed licenses.
9a0483764SConrad Meyer  */
10a0483764SConrad Meyer 
11a0483764SConrad Meyer /* zstd_decompress_block :
12a0483764SConrad Meyer  * this module takes care of decompressing _compressed_ block */
13a0483764SConrad Meyer 
14a0483764SConrad Meyer /*-*******************************************************
15a0483764SConrad Meyer *  Dependencies
16a0483764SConrad Meyer *********************************************************/
17f7cd7fe5SConrad Meyer #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
1837f1f268SConrad Meyer #include "../common/compiler.h"    /* prefetch */
1937f1f268SConrad Meyer #include "../common/cpu.h"         /* bmi2 */
2037f1f268SConrad Meyer #include "../common/mem.h"         /* low level memory routines */
21a0483764SConrad Meyer #define FSE_STATIC_LINKING_ONLY
2237f1f268SConrad Meyer #include "../common/fse.h"
23a0483764SConrad Meyer #define HUF_STATIC_LINKING_ONLY
2437f1f268SConrad Meyer #include "../common/huf.h"
2537f1f268SConrad Meyer #include "../common/zstd_internal.h"
26a0483764SConrad Meyer #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
27a0483764SConrad Meyer #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
28a0483764SConrad Meyer #include "zstd_decompress_block.h"
29a0483764SConrad Meyer 
30a0483764SConrad Meyer /*_*******************************************************
31a0483764SConrad Meyer *  Macros
32a0483764SConrad Meyer **********************************************************/
33a0483764SConrad Meyer 
34a0483764SConrad Meyer /* These two optional macros force the use one way or another of the two
35a0483764SConrad Meyer  * ZSTD_decompressSequences implementations. You can't force in both directions
36a0483764SConrad Meyer  * at the same time.
37a0483764SConrad Meyer  */
38a0483764SConrad Meyer #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39a0483764SConrad Meyer     defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40a0483764SConrad Meyer #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41a0483764SConrad Meyer #endif
42a0483764SConrad Meyer 
43a0483764SConrad Meyer 
44a0483764SConrad Meyer /*_*******************************************************
45a0483764SConrad Meyer *  Memory operations
46a0483764SConrad Meyer **********************************************************/
ZSTD_copy4(void * dst,const void * src)47f7cd7fe5SConrad Meyer static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48a0483764SConrad Meyer 
49a0483764SConrad Meyer 
50a0483764SConrad Meyer /*-*************************************************************
51a0483764SConrad Meyer  *   Block decoding
52a0483764SConrad Meyer  ***************************************************************/
53a0483764SConrad Meyer 
54a0483764SConrad Meyer /*! ZSTD_getcBlockSize() :
55a0483764SConrad Meyer  *  Provides the size of compressed block from block header `src` */
ZSTD_getcBlockSize(const void * src,size_t srcSize,blockProperties_t * bpPtr)56a0483764SConrad Meyer size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57a0483764SConrad Meyer                           blockProperties_t* bpPtr)
58a0483764SConrad Meyer {
5937f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
602b9c00cbSConrad Meyer 
61a0483764SConrad Meyer     {   U32 const cBlockHeader = MEM_readLE24(src);
62a0483764SConrad Meyer         U32 const cSize = cBlockHeader >> 3;
63a0483764SConrad Meyer         bpPtr->lastBlock = cBlockHeader & 1;
64a0483764SConrad Meyer         bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65a0483764SConrad Meyer         bpPtr->origSize = cSize;   /* only useful for RLE */
66a0483764SConrad Meyer         if (bpPtr->blockType == bt_rle) return 1;
6737f1f268SConrad Meyer         RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68a0483764SConrad Meyer         return cSize;
69a0483764SConrad Meyer     }
70a0483764SConrad Meyer }
71a0483764SConrad Meyer 
72*5ff13fbcSAllan Jude /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
ZSTD_allocateLiteralsBuffer(ZSTD_DCtx * dctx,void * const dst,const size_t dstCapacity,const size_t litSize,const streaming_operation streaming,const size_t expectedWriteSize,const unsigned splitImmediately)73*5ff13fbcSAllan Jude static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
74*5ff13fbcSAllan Jude     const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
75*5ff13fbcSAllan Jude {
76*5ff13fbcSAllan Jude     if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
77*5ff13fbcSAllan Jude     {
78*5ff13fbcSAllan Jude         /* room for litbuffer to fit without read faulting */
79*5ff13fbcSAllan Jude         dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
80*5ff13fbcSAllan Jude         dctx->litBufferEnd = dctx->litBuffer + litSize;
81*5ff13fbcSAllan Jude         dctx->litBufferLocation = ZSTD_in_dst;
82*5ff13fbcSAllan Jude     }
83*5ff13fbcSAllan Jude     else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
84*5ff13fbcSAllan Jude     {
85*5ff13fbcSAllan Jude         /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
86*5ff13fbcSAllan Jude         if (splitImmediately) {
87*5ff13fbcSAllan Jude             /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
88*5ff13fbcSAllan Jude             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
89*5ff13fbcSAllan Jude             dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90*5ff13fbcSAllan Jude         }
91*5ff13fbcSAllan Jude         else {
92*5ff13fbcSAllan Jude             /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
93*5ff13fbcSAllan Jude             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94*5ff13fbcSAllan Jude             dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95*5ff13fbcSAllan Jude         }
96*5ff13fbcSAllan Jude         dctx->litBufferLocation = ZSTD_split;
97*5ff13fbcSAllan Jude     }
98*5ff13fbcSAllan Jude     else
99*5ff13fbcSAllan Jude     {
100*5ff13fbcSAllan Jude         /* fits entirely within litExtraBuffer, so no split is necessary */
101*5ff13fbcSAllan Jude         dctx->litBuffer = dctx->litExtraBuffer;
102*5ff13fbcSAllan Jude         dctx->litBufferEnd = dctx->litBuffer + litSize;
103*5ff13fbcSAllan Jude         dctx->litBufferLocation = ZSTD_not_in_dst;
104*5ff13fbcSAllan Jude     }
105*5ff13fbcSAllan Jude }
106a0483764SConrad Meyer 
107a0483764SConrad Meyer /* Hidden declaration for fullbench */
108a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
109*5ff13fbcSAllan Jude                           const void* src, size_t srcSize,
110*5ff13fbcSAllan Jude                           void* dst, size_t dstCapacity, const streaming_operation streaming);
111a0483764SConrad Meyer /*! ZSTD_decodeLiteralsBlock() :
112*5ff13fbcSAllan Jude  * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113*5ff13fbcSAllan Jude  * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
114*5ff13fbcSAllan Jude  * block will be output.  Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115*5ff13fbcSAllan Jude  * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
116*5ff13fbcSAllan Jude  *
117a0483764SConrad Meyer  * @return : nb of bytes read from src (< srcSize )
118a0483764SConrad Meyer  *  note : symbol not declared but exposed for fullbench */
ZSTD_decodeLiteralsBlock(ZSTD_DCtx * dctx,const void * src,size_t srcSize,void * dst,size_t dstCapacity,const streaming_operation streaming)119a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120*5ff13fbcSAllan Jude                           const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
121*5ff13fbcSAllan Jude                           void* dst, size_t dstCapacity, const streaming_operation streaming)
122a0483764SConrad Meyer {
1239cbefe25SConrad Meyer     DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
12437f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
125a0483764SConrad Meyer 
126a0483764SConrad Meyer     {   const BYTE* const istart = (const BYTE*) src;
127a0483764SConrad Meyer         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
128a0483764SConrad Meyer 
129a0483764SConrad Meyer         switch(litEncType)
130a0483764SConrad Meyer         {
131a0483764SConrad Meyer         case set_repeat:
1329cbefe25SConrad Meyer             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
13337f1f268SConrad Meyer             RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
134*5ff13fbcSAllan Jude             ZSTD_FALLTHROUGH;
135a0483764SConrad Meyer 
136a0483764SConrad Meyer         case set_compressed:
1372b9c00cbSConrad Meyer             RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
138a0483764SConrad Meyer             {   size_t lhSize, litSize, litCSize;
139a0483764SConrad Meyer                 U32 singleStream=0;
140a0483764SConrad Meyer                 U32 const lhlCode = (istart[0] >> 2) & 3;
141a0483764SConrad Meyer                 U32 const lhc = MEM_readLE32(istart);
142a0483764SConrad Meyer                 size_t hufSuccess;
143*5ff13fbcSAllan Jude                 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144a0483764SConrad Meyer                 switch(lhlCode)
145a0483764SConrad Meyer                 {
146a0483764SConrad Meyer                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
147a0483764SConrad Meyer                     /* 2 - 2 - 10 - 10 */
148a0483764SConrad Meyer                     singleStream = !lhlCode;
149a0483764SConrad Meyer                     lhSize = 3;
150a0483764SConrad Meyer                     litSize  = (lhc >> 4) & 0x3FF;
151a0483764SConrad Meyer                     litCSize = (lhc >> 14) & 0x3FF;
152a0483764SConrad Meyer                     break;
153a0483764SConrad Meyer                 case 2:
154a0483764SConrad Meyer                     /* 2 - 2 - 14 - 14 */
155a0483764SConrad Meyer                     lhSize = 4;
156a0483764SConrad Meyer                     litSize  = (lhc >> 4) & 0x3FFF;
157a0483764SConrad Meyer                     litCSize = lhc >> 18;
158a0483764SConrad Meyer                     break;
159a0483764SConrad Meyer                 case 3:
160a0483764SConrad Meyer                     /* 2 - 2 - 18 - 18 */
161a0483764SConrad Meyer                     lhSize = 5;
162a0483764SConrad Meyer                     litSize  = (lhc >> 4) & 0x3FFFF;
1639cbefe25SConrad Meyer                     litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
164a0483764SConrad Meyer                     break;
165a0483764SConrad Meyer                 }
166*5ff13fbcSAllan Jude                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
16737f1f268SConrad Meyer                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
16837f1f268SConrad Meyer                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
169*5ff13fbcSAllan Jude                 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
170*5ff13fbcSAllan Jude                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
171a0483764SConrad Meyer 
172a0483764SConrad Meyer                 /* prefetch huffman table if cold */
173a0483764SConrad Meyer                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
174a0483764SConrad Meyer                     PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
175a0483764SConrad Meyer                 }
176a0483764SConrad Meyer 
177a0483764SConrad Meyer                 if (litEncType==set_repeat) {
178a0483764SConrad Meyer                     if (singleStream) {
179a0483764SConrad Meyer                         hufSuccess = HUF_decompress1X_usingDTable_bmi2(
180a0483764SConrad Meyer                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
181*5ff13fbcSAllan Jude                             dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
182a0483764SConrad Meyer                     } else {
183a0483764SConrad Meyer                         hufSuccess = HUF_decompress4X_usingDTable_bmi2(
184a0483764SConrad Meyer                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
185*5ff13fbcSAllan Jude                             dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
186a0483764SConrad Meyer                     }
187a0483764SConrad Meyer                 } else {
188a0483764SConrad Meyer                     if (singleStream) {
189a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X2)
190a0483764SConrad Meyer                         hufSuccess = HUF_decompress1X_DCtx_wksp(
191a0483764SConrad Meyer                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
192a0483764SConrad Meyer                             istart+lhSize, litCSize, dctx->workspace,
193a0483764SConrad Meyer                             sizeof(dctx->workspace));
194a0483764SConrad Meyer #else
195a0483764SConrad Meyer                         hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
196a0483764SConrad Meyer                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
197a0483764SConrad Meyer                             istart+lhSize, litCSize, dctx->workspace,
198*5ff13fbcSAllan Jude                             sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
199a0483764SConrad Meyer #endif
200a0483764SConrad Meyer                     } else {
201a0483764SConrad Meyer                         hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
202a0483764SConrad Meyer                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
203a0483764SConrad Meyer                             istart+lhSize, litCSize, dctx->workspace,
204*5ff13fbcSAllan Jude                             sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
205a0483764SConrad Meyer                     }
206a0483764SConrad Meyer                 }
207*5ff13fbcSAllan Jude                 if (dctx->litBufferLocation == ZSTD_split)
208*5ff13fbcSAllan Jude                 {
209*5ff13fbcSAllan Jude                     ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
210*5ff13fbcSAllan Jude                     ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
211*5ff13fbcSAllan Jude                     dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
212*5ff13fbcSAllan Jude                     dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
213*5ff13fbcSAllan Jude                 }
214a0483764SConrad Meyer 
21537f1f268SConrad Meyer                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
216a0483764SConrad Meyer 
217a0483764SConrad Meyer                 dctx->litPtr = dctx->litBuffer;
218a0483764SConrad Meyer                 dctx->litSize = litSize;
219a0483764SConrad Meyer                 dctx->litEntropy = 1;
220a0483764SConrad Meyer                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
221a0483764SConrad Meyer                 return litCSize + lhSize;
222a0483764SConrad Meyer             }
223a0483764SConrad Meyer 
224a0483764SConrad Meyer         case set_basic:
225a0483764SConrad Meyer             {   size_t litSize, lhSize;
226a0483764SConrad Meyer                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
227*5ff13fbcSAllan Jude                 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
228a0483764SConrad Meyer                 switch(lhlCode)
229a0483764SConrad Meyer                 {
230a0483764SConrad Meyer                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
231a0483764SConrad Meyer                     lhSize = 1;
232a0483764SConrad Meyer                     litSize = istart[0] >> 3;
233a0483764SConrad Meyer                     break;
234a0483764SConrad Meyer                 case 1:
235a0483764SConrad Meyer                     lhSize = 2;
236a0483764SConrad Meyer                     litSize = MEM_readLE16(istart) >> 4;
237a0483764SConrad Meyer                     break;
238a0483764SConrad Meyer                 case 3:
239a0483764SConrad Meyer                     lhSize = 3;
240a0483764SConrad Meyer                     litSize = MEM_readLE24(istart) >> 4;
241a0483764SConrad Meyer                     break;
242a0483764SConrad Meyer                 }
243a0483764SConrad Meyer 
244*5ff13fbcSAllan Jude                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
245*5ff13fbcSAllan Jude                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
246*5ff13fbcSAllan Jude                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
247a0483764SConrad Meyer                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
24837f1f268SConrad Meyer                     RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
249*5ff13fbcSAllan Jude                     if (dctx->litBufferLocation == ZSTD_split)
250*5ff13fbcSAllan Jude                     {
251*5ff13fbcSAllan Jude                         ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
252*5ff13fbcSAllan Jude                         ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
253*5ff13fbcSAllan Jude                     }
254*5ff13fbcSAllan Jude                     else
255*5ff13fbcSAllan Jude                     {
256f7cd7fe5SConrad Meyer                         ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
257*5ff13fbcSAllan Jude                     }
258a0483764SConrad Meyer                     dctx->litPtr = dctx->litBuffer;
259a0483764SConrad Meyer                     dctx->litSize = litSize;
260a0483764SConrad Meyer                     return lhSize+litSize;
261a0483764SConrad Meyer                 }
262a0483764SConrad Meyer                 /* direct reference into compressed stream */
263a0483764SConrad Meyer                 dctx->litPtr = istart+lhSize;
264a0483764SConrad Meyer                 dctx->litSize = litSize;
265*5ff13fbcSAllan Jude                 dctx->litBufferEnd = dctx->litPtr + litSize;
266*5ff13fbcSAllan Jude                 dctx->litBufferLocation = ZSTD_not_in_dst;
267a0483764SConrad Meyer                 return lhSize+litSize;
268a0483764SConrad Meyer             }
269a0483764SConrad Meyer 
270a0483764SConrad Meyer         case set_rle:
271a0483764SConrad Meyer             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
272a0483764SConrad Meyer                 size_t litSize, lhSize;
273*5ff13fbcSAllan Jude                 size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
274a0483764SConrad Meyer                 switch(lhlCode)
275a0483764SConrad Meyer                 {
276a0483764SConrad Meyer                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
277a0483764SConrad Meyer                     lhSize = 1;
278a0483764SConrad Meyer                     litSize = istart[0] >> 3;
279a0483764SConrad Meyer                     break;
280a0483764SConrad Meyer                 case 1:
281a0483764SConrad Meyer                     lhSize = 2;
282a0483764SConrad Meyer                     litSize = MEM_readLE16(istart) >> 4;
283a0483764SConrad Meyer                     break;
284a0483764SConrad Meyer                 case 3:
285a0483764SConrad Meyer                     lhSize = 3;
286a0483764SConrad Meyer                     litSize = MEM_readLE24(istart) >> 4;
2872b9c00cbSConrad Meyer                     RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
288a0483764SConrad Meyer                     break;
289a0483764SConrad Meyer                 }
290*5ff13fbcSAllan Jude                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
29137f1f268SConrad Meyer                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
292*5ff13fbcSAllan Jude                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
293*5ff13fbcSAllan Jude                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
294*5ff13fbcSAllan Jude                 if (dctx->litBufferLocation == ZSTD_split)
295*5ff13fbcSAllan Jude                 {
296*5ff13fbcSAllan Jude                     ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
297*5ff13fbcSAllan Jude                     ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
298*5ff13fbcSAllan Jude                 }
299*5ff13fbcSAllan Jude                 else
300*5ff13fbcSAllan Jude                 {
301*5ff13fbcSAllan Jude                     ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
302*5ff13fbcSAllan Jude                 }
303a0483764SConrad Meyer                 dctx->litPtr = dctx->litBuffer;
304a0483764SConrad Meyer                 dctx->litSize = litSize;
305a0483764SConrad Meyer                 return lhSize+1;
306a0483764SConrad Meyer             }
307a0483764SConrad Meyer         default:
3082b9c00cbSConrad Meyer             RETURN_ERROR(corruption_detected, "impossible");
309a0483764SConrad Meyer         }
310a0483764SConrad Meyer     }
311a0483764SConrad Meyer }
312a0483764SConrad Meyer 
313a0483764SConrad Meyer /* Default FSE distribution tables.
314a0483764SConrad Meyer  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
315f7cd7fe5SConrad Meyer  * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
316a0483764SConrad Meyer  * They were generated programmatically with following method :
317a0483764SConrad Meyer  * - start from default distributions, present in /lib/common/zstd_internal.h
318a0483764SConrad Meyer  * - generate tables normally, using ZSTD_buildFSETable()
319a0483764SConrad Meyer  * - printout the content of tables
320a0483764SConrad Meyer  * - pretify output, report below, test with fuzzer to ensure it's correct */
321a0483764SConrad Meyer 
322a0483764SConrad Meyer /* Default FSE distribution table for Literal Lengths */
323a0483764SConrad Meyer static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
324a0483764SConrad Meyer      {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
325a0483764SConrad Meyer      /* nextState, nbAddBits, nbBits, baseVal */
326a0483764SConrad Meyer      {  0,  0,  4,    0},  { 16,  0,  4,    0},
327a0483764SConrad Meyer      { 32,  0,  5,    1},  {  0,  0,  5,    3},
328a0483764SConrad Meyer      {  0,  0,  5,    4},  {  0,  0,  5,    6},
329a0483764SConrad Meyer      {  0,  0,  5,    7},  {  0,  0,  5,    9},
330a0483764SConrad Meyer      {  0,  0,  5,   10},  {  0,  0,  5,   12},
331a0483764SConrad Meyer      {  0,  0,  6,   14},  {  0,  1,  5,   16},
332a0483764SConrad Meyer      {  0,  1,  5,   20},  {  0,  1,  5,   22},
333a0483764SConrad Meyer      {  0,  2,  5,   28},  {  0,  3,  5,   32},
334a0483764SConrad Meyer      {  0,  4,  5,   48},  { 32,  6,  5,   64},
335a0483764SConrad Meyer      {  0,  7,  5,  128},  {  0,  8,  6,  256},
336a0483764SConrad Meyer      {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
337a0483764SConrad Meyer      { 32,  0,  4,    0},  {  0,  0,  4,    1},
338a0483764SConrad Meyer      {  0,  0,  5,    2},  { 32,  0,  5,    4},
339a0483764SConrad Meyer      {  0,  0,  5,    5},  { 32,  0,  5,    7},
340a0483764SConrad Meyer      {  0,  0,  5,    8},  { 32,  0,  5,   10},
341a0483764SConrad Meyer      {  0,  0,  5,   11},  {  0,  0,  6,   13},
342a0483764SConrad Meyer      { 32,  1,  5,   16},  {  0,  1,  5,   18},
343a0483764SConrad Meyer      { 32,  1,  5,   22},  {  0,  2,  5,   24},
344a0483764SConrad Meyer      { 32,  3,  5,   32},  {  0,  3,  5,   40},
345a0483764SConrad Meyer      {  0,  6,  4,   64},  { 16,  6,  4,   64},
346a0483764SConrad Meyer      { 32,  7,  5,  128},  {  0,  9,  6,  512},
347a0483764SConrad Meyer      {  0, 11,  6, 2048},  { 48,  0,  4,    0},
348a0483764SConrad Meyer      { 16,  0,  4,    1},  { 32,  0,  5,    2},
349a0483764SConrad Meyer      { 32,  0,  5,    3},  { 32,  0,  5,    5},
350a0483764SConrad Meyer      { 32,  0,  5,    6},  { 32,  0,  5,    8},
351a0483764SConrad Meyer      { 32,  0,  5,    9},  { 32,  0,  5,   11},
352a0483764SConrad Meyer      { 32,  0,  5,   12},  {  0,  0,  6,   15},
353a0483764SConrad Meyer      { 32,  1,  5,   18},  { 32,  1,  5,   20},
354a0483764SConrad Meyer      { 32,  2,  5,   24},  { 32,  2,  5,   28},
355a0483764SConrad Meyer      { 32,  3,  5,   40},  { 32,  4,  5,   48},
356a0483764SConrad Meyer      {  0, 16,  6,65536},  {  0, 15,  6,32768},
357a0483764SConrad Meyer      {  0, 14,  6,16384},  {  0, 13,  6, 8192},
358a0483764SConrad Meyer };   /* LL_defaultDTable */
359a0483764SConrad Meyer 
360a0483764SConrad Meyer /* Default FSE distribution table for Offset Codes */
361a0483764SConrad Meyer static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
362a0483764SConrad Meyer     {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
363a0483764SConrad Meyer     /* nextState, nbAddBits, nbBits, baseVal */
364a0483764SConrad Meyer     {  0,  0,  5,    0},     {  0,  6,  4,   61},
365a0483764SConrad Meyer     {  0,  9,  5,  509},     {  0, 15,  5,32765},
366a0483764SConrad Meyer     {  0, 21,  5,2097149},   {  0,  3,  5,    5},
367a0483764SConrad Meyer     {  0,  7,  4,  125},     {  0, 12,  5, 4093},
368a0483764SConrad Meyer     {  0, 18,  5,262141},    {  0, 23,  5,8388605},
369a0483764SConrad Meyer     {  0,  5,  5,   29},     {  0,  8,  4,  253},
370a0483764SConrad Meyer     {  0, 14,  5,16381},     {  0, 20,  5,1048573},
371a0483764SConrad Meyer     {  0,  2,  5,    1},     { 16,  7,  4,  125},
372a0483764SConrad Meyer     {  0, 11,  5, 2045},     {  0, 17,  5,131069},
373a0483764SConrad Meyer     {  0, 22,  5,4194301},   {  0,  4,  5,   13},
374a0483764SConrad Meyer     { 16,  8,  4,  253},     {  0, 13,  5, 8189},
375a0483764SConrad Meyer     {  0, 19,  5,524285},    {  0,  1,  5,    1},
376a0483764SConrad Meyer     { 16,  6,  4,   61},     {  0, 10,  5, 1021},
377a0483764SConrad Meyer     {  0, 16,  5,65533},     {  0, 28,  5,268435453},
378a0483764SConrad Meyer     {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
379a0483764SConrad Meyer     {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
380a0483764SConrad Meyer };   /* OF_defaultDTable */
381a0483764SConrad Meyer 
382a0483764SConrad Meyer 
383a0483764SConrad Meyer /* Default FSE distribution table for Match Lengths */
384a0483764SConrad Meyer static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
385a0483764SConrad Meyer     {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
386a0483764SConrad Meyer     /* nextState, nbAddBits, nbBits, baseVal */
387a0483764SConrad Meyer     {  0,  0,  6,    3},  {  0,  0,  4,    4},
388a0483764SConrad Meyer     { 32,  0,  5,    5},  {  0,  0,  5,    6},
389a0483764SConrad Meyer     {  0,  0,  5,    8},  {  0,  0,  5,    9},
390a0483764SConrad Meyer     {  0,  0,  5,   11},  {  0,  0,  6,   13},
391a0483764SConrad Meyer     {  0,  0,  6,   16},  {  0,  0,  6,   19},
392a0483764SConrad Meyer     {  0,  0,  6,   22},  {  0,  0,  6,   25},
393a0483764SConrad Meyer     {  0,  0,  6,   28},  {  0,  0,  6,   31},
394a0483764SConrad Meyer     {  0,  0,  6,   34},  {  0,  1,  6,   37},
395a0483764SConrad Meyer     {  0,  1,  6,   41},  {  0,  2,  6,   47},
396a0483764SConrad Meyer     {  0,  3,  6,   59},  {  0,  4,  6,   83},
397a0483764SConrad Meyer     {  0,  7,  6,  131},  {  0,  9,  6,  515},
398a0483764SConrad Meyer     { 16,  0,  4,    4},  {  0,  0,  4,    5},
399a0483764SConrad Meyer     { 32,  0,  5,    6},  {  0,  0,  5,    7},
400a0483764SConrad Meyer     { 32,  0,  5,    9},  {  0,  0,  5,   10},
401a0483764SConrad Meyer     {  0,  0,  6,   12},  {  0,  0,  6,   15},
402a0483764SConrad Meyer     {  0,  0,  6,   18},  {  0,  0,  6,   21},
403a0483764SConrad Meyer     {  0,  0,  6,   24},  {  0,  0,  6,   27},
404a0483764SConrad Meyer     {  0,  0,  6,   30},  {  0,  0,  6,   33},
405a0483764SConrad Meyer     {  0,  1,  6,   35},  {  0,  1,  6,   39},
406a0483764SConrad Meyer     {  0,  2,  6,   43},  {  0,  3,  6,   51},
407a0483764SConrad Meyer     {  0,  4,  6,   67},  {  0,  5,  6,   99},
408a0483764SConrad Meyer     {  0,  8,  6,  259},  { 32,  0,  4,    4},
409a0483764SConrad Meyer     { 48,  0,  4,    4},  { 16,  0,  4,    5},
410a0483764SConrad Meyer     { 32,  0,  5,    7},  { 32,  0,  5,    8},
411a0483764SConrad Meyer     { 32,  0,  5,   10},  { 32,  0,  5,   11},
412a0483764SConrad Meyer     {  0,  0,  6,   14},  {  0,  0,  6,   17},
413a0483764SConrad Meyer     {  0,  0,  6,   20},  {  0,  0,  6,   23},
414a0483764SConrad Meyer     {  0,  0,  6,   26},  {  0,  0,  6,   29},
415a0483764SConrad Meyer     {  0,  0,  6,   32},  {  0, 16,  6,65539},
416a0483764SConrad Meyer     {  0, 15,  6,32771},  {  0, 14,  6,16387},
417a0483764SConrad Meyer     {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
418a0483764SConrad Meyer     {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
419a0483764SConrad Meyer };   /* ML_defaultDTable */
420a0483764SConrad Meyer 
421a0483764SConrad Meyer 
ZSTD_buildSeqTable_rle(ZSTD_seqSymbol * dt,U32 baseValue,U8 nbAddBits)422*5ff13fbcSAllan Jude static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
423a0483764SConrad Meyer {
424a0483764SConrad Meyer     void* ptr = dt;
425a0483764SConrad Meyer     ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
426a0483764SConrad Meyer     ZSTD_seqSymbol* const cell = dt + 1;
427a0483764SConrad Meyer 
428a0483764SConrad Meyer     DTableH->tableLog = 0;
429a0483764SConrad Meyer     DTableH->fastMode = 0;
430a0483764SConrad Meyer 
431a0483764SConrad Meyer     cell->nbBits = 0;
432a0483764SConrad Meyer     cell->nextState = 0;
433a0483764SConrad Meyer     assert(nbAddBits < 255);
434*5ff13fbcSAllan Jude     cell->nbAdditionalBits = nbAddBits;
435a0483764SConrad Meyer     cell->baseValue = baseValue;
436a0483764SConrad Meyer }
437a0483764SConrad Meyer 
438a0483764SConrad Meyer 
439a0483764SConrad Meyer /* ZSTD_buildFSETable() :
440a0483764SConrad Meyer  * generate FSE decoding table for one symbol (ll, ml or off)
441a0483764SConrad Meyer  * cannot fail if input is valid =>
442a0483764SConrad Meyer  * all inputs are presumed validated at this stage */
443f7cd7fe5SConrad Meyer FORCE_INLINE_TEMPLATE
ZSTD_buildFSETable_body(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)444f7cd7fe5SConrad Meyer void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
445a0483764SConrad Meyer             const short* normalizedCounter, unsigned maxSymbolValue,
446*5ff13fbcSAllan Jude             const U32* baseValue, const U8* nbAdditionalBits,
447f7cd7fe5SConrad Meyer             unsigned tableLog, void* wksp, size_t wkspSize)
448a0483764SConrad Meyer {
449a0483764SConrad Meyer     ZSTD_seqSymbol* const tableDecode = dt+1;
450a0483764SConrad Meyer     U32 const maxSV1 = maxSymbolValue + 1;
451a0483764SConrad Meyer     U32 const tableSize = 1 << tableLog;
452f7cd7fe5SConrad Meyer 
453f7cd7fe5SConrad Meyer     U16* symbolNext = (U16*)wksp;
454f7cd7fe5SConrad Meyer     BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
455a0483764SConrad Meyer     U32 highThreshold = tableSize - 1;
456a0483764SConrad Meyer 
457f7cd7fe5SConrad Meyer 
458a0483764SConrad Meyer     /* Sanity Checks */
459a0483764SConrad Meyer     assert(maxSymbolValue <= MaxSeq);
460a0483764SConrad Meyer     assert(tableLog <= MaxFSELog);
461f7cd7fe5SConrad Meyer     assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
462f7cd7fe5SConrad Meyer     (void)wkspSize;
463a0483764SConrad Meyer     /* Init, lay down lowprob symbols */
464a0483764SConrad Meyer     {   ZSTD_seqSymbol_header DTableH;
465a0483764SConrad Meyer         DTableH.tableLog = tableLog;
466a0483764SConrad Meyer         DTableH.fastMode = 1;
467a0483764SConrad Meyer         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
468a0483764SConrad Meyer             U32 s;
469a0483764SConrad Meyer             for (s=0; s<maxSV1; s++) {
470a0483764SConrad Meyer                 if (normalizedCounter[s]==-1) {
471a0483764SConrad Meyer                     tableDecode[highThreshold--].baseValue = s;
472a0483764SConrad Meyer                     symbolNext[s] = 1;
473a0483764SConrad Meyer                 } else {
474a0483764SConrad Meyer                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
4759cbefe25SConrad Meyer                     assert(normalizedCounter[s]>=0);
4769cbefe25SConrad Meyer                     symbolNext[s] = (U16)normalizedCounter[s];
477a0483764SConrad Meyer         }   }   }
478f7cd7fe5SConrad Meyer         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
479a0483764SConrad Meyer     }
480a0483764SConrad Meyer 
481a0483764SConrad Meyer     /* Spread symbols */
482f7cd7fe5SConrad Meyer     assert(tableSize <= 512);
483f7cd7fe5SConrad Meyer     /* Specialized symbol spreading for the case when there are
484f7cd7fe5SConrad Meyer      * no low probability (-1 count) symbols. When compressing
485f7cd7fe5SConrad Meyer      * small blocks we avoid low probability symbols to hit this
486f7cd7fe5SConrad Meyer      * case, since header decoding speed matters more.
487f7cd7fe5SConrad Meyer      */
488f7cd7fe5SConrad Meyer     if (highThreshold == tableSize - 1) {
489f7cd7fe5SConrad Meyer         size_t const tableMask = tableSize-1;
490f7cd7fe5SConrad Meyer         size_t const step = FSE_TABLESTEP(tableSize);
491f7cd7fe5SConrad Meyer         /* First lay down the symbols in order.
492f7cd7fe5SConrad Meyer          * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
493f7cd7fe5SConrad Meyer          * misses since small blocks generally have small table logs, so nearly
494f7cd7fe5SConrad Meyer          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
495f7cd7fe5SConrad Meyer          * our buffer to handle the over-write.
496f7cd7fe5SConrad Meyer          */
497f7cd7fe5SConrad Meyer         {
498f7cd7fe5SConrad Meyer             U64 const add = 0x0101010101010101ull;
499f7cd7fe5SConrad Meyer             size_t pos = 0;
500f7cd7fe5SConrad Meyer             U64 sv = 0;
501f7cd7fe5SConrad Meyer             U32 s;
502f7cd7fe5SConrad Meyer             for (s=0; s<maxSV1; ++s, sv += add) {
503f7cd7fe5SConrad Meyer                 int i;
504f7cd7fe5SConrad Meyer                 int const n = normalizedCounter[s];
505f7cd7fe5SConrad Meyer                 MEM_write64(spread + pos, sv);
506f7cd7fe5SConrad Meyer                 for (i = 8; i < n; i += 8) {
507f7cd7fe5SConrad Meyer                     MEM_write64(spread + pos + i, sv);
508f7cd7fe5SConrad Meyer                 }
509f7cd7fe5SConrad Meyer                 pos += n;
510f7cd7fe5SConrad Meyer             }
511f7cd7fe5SConrad Meyer         }
512f7cd7fe5SConrad Meyer         /* Now we spread those positions across the table.
513f7cd7fe5SConrad Meyer          * The benefit of doing it in two stages is that we avoid the the
514f7cd7fe5SConrad Meyer          * variable size inner loop, which caused lots of branch misses.
515f7cd7fe5SConrad Meyer          * Now we can run through all the positions without any branch misses.
516f7cd7fe5SConrad Meyer          * We unroll the loop twice, since that is what emperically worked best.
517f7cd7fe5SConrad Meyer          */
518f7cd7fe5SConrad Meyer         {
519f7cd7fe5SConrad Meyer             size_t position = 0;
520f7cd7fe5SConrad Meyer             size_t s;
521f7cd7fe5SConrad Meyer             size_t const unroll = 2;
522f7cd7fe5SConrad Meyer             assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
523f7cd7fe5SConrad Meyer             for (s = 0; s < (size_t)tableSize; s += unroll) {
524f7cd7fe5SConrad Meyer                 size_t u;
525f7cd7fe5SConrad Meyer                 for (u = 0; u < unroll; ++u) {
526f7cd7fe5SConrad Meyer                     size_t const uPosition = (position + (u * step)) & tableMask;
527f7cd7fe5SConrad Meyer                     tableDecode[uPosition].baseValue = spread[s + u];
528f7cd7fe5SConrad Meyer                 }
529f7cd7fe5SConrad Meyer                 position = (position + (unroll * step)) & tableMask;
530f7cd7fe5SConrad Meyer             }
531f7cd7fe5SConrad Meyer             assert(position == 0);
532f7cd7fe5SConrad Meyer         }
533f7cd7fe5SConrad Meyer     } else {
534f7cd7fe5SConrad Meyer         U32 const tableMask = tableSize-1;
535a0483764SConrad Meyer         U32 const step = FSE_TABLESTEP(tableSize);
536a0483764SConrad Meyer         U32 s, position = 0;
537a0483764SConrad Meyer         for (s=0; s<maxSV1; s++) {
538a0483764SConrad Meyer             int i;
539f7cd7fe5SConrad Meyer             int const n = normalizedCounter[s];
540f7cd7fe5SConrad Meyer             for (i=0; i<n; i++) {
541a0483764SConrad Meyer                 tableDecode[position].baseValue = s;
542a0483764SConrad Meyer                 position = (position + step) & tableMask;
543a0483764SConrad Meyer                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
544a0483764SConrad Meyer         }   }
545a0483764SConrad Meyer         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
546a0483764SConrad Meyer     }
547a0483764SConrad Meyer 
548a0483764SConrad Meyer     /* Build Decoding table */
549f7cd7fe5SConrad Meyer     {
550f7cd7fe5SConrad Meyer         U32 u;
551a0483764SConrad Meyer         for (u=0; u<tableSize; u++) {
552a0483764SConrad Meyer             U32 const symbol = tableDecode[u].baseValue;
553a0483764SConrad Meyer             U32 const nextState = symbolNext[symbol]++;
554a0483764SConrad Meyer             tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
555a0483764SConrad Meyer             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
556a0483764SConrad Meyer             assert(nbAdditionalBits[symbol] < 255);
557*5ff13fbcSAllan Jude             tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
558a0483764SConrad Meyer             tableDecode[u].baseValue = baseValue[symbol];
559f7cd7fe5SConrad Meyer         }
560f7cd7fe5SConrad Meyer     }
561f7cd7fe5SConrad Meyer }
562f7cd7fe5SConrad Meyer 
563f7cd7fe5SConrad Meyer /* Avoids the FORCE_INLINE of the _body() function. */
ZSTD_buildFSETable_body_default(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)564f7cd7fe5SConrad Meyer static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
565f7cd7fe5SConrad Meyer             const short* normalizedCounter, unsigned maxSymbolValue,
566*5ff13fbcSAllan Jude             const U32* baseValue, const U8* nbAdditionalBits,
567f7cd7fe5SConrad Meyer             unsigned tableLog, void* wksp, size_t wkspSize)
568f7cd7fe5SConrad Meyer {
569f7cd7fe5SConrad Meyer     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
570f7cd7fe5SConrad Meyer             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
571f7cd7fe5SConrad Meyer }
572f7cd7fe5SConrad Meyer 
573f7cd7fe5SConrad Meyer #if DYNAMIC_BMI2
ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize)574*5ff13fbcSAllan Jude BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
575f7cd7fe5SConrad Meyer             const short* normalizedCounter, unsigned maxSymbolValue,
576*5ff13fbcSAllan Jude             const U32* baseValue, const U8* nbAdditionalBits,
577f7cd7fe5SConrad Meyer             unsigned tableLog, void* wksp, size_t wkspSize)
578f7cd7fe5SConrad Meyer {
579f7cd7fe5SConrad Meyer     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
580f7cd7fe5SConrad Meyer             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
581f7cd7fe5SConrad Meyer }
582f7cd7fe5SConrad Meyer #endif
583f7cd7fe5SConrad Meyer 
ZSTD_buildFSETable(ZSTD_seqSymbol * dt,const short * normalizedCounter,unsigned maxSymbolValue,const U32 * baseValue,const U8 * nbAdditionalBits,unsigned tableLog,void * wksp,size_t wkspSize,int bmi2)584f7cd7fe5SConrad Meyer void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
585f7cd7fe5SConrad Meyer             const short* normalizedCounter, unsigned maxSymbolValue,
586*5ff13fbcSAllan Jude             const U32* baseValue, const U8* nbAdditionalBits,
587f7cd7fe5SConrad Meyer             unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
588f7cd7fe5SConrad Meyer {
589f7cd7fe5SConrad Meyer #if DYNAMIC_BMI2
590f7cd7fe5SConrad Meyer     if (bmi2) {
591f7cd7fe5SConrad Meyer         ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
592f7cd7fe5SConrad Meyer                 baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
593f7cd7fe5SConrad Meyer         return;
594f7cd7fe5SConrad Meyer     }
595f7cd7fe5SConrad Meyer #endif
596f7cd7fe5SConrad Meyer     (void)bmi2;
597f7cd7fe5SConrad Meyer     ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
598f7cd7fe5SConrad Meyer             baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
599a0483764SConrad Meyer }
600a0483764SConrad Meyer 
601a0483764SConrad Meyer 
602a0483764SConrad Meyer /*! ZSTD_buildSeqTable() :
603a0483764SConrad Meyer  * @return : nb bytes read from src,
604a0483764SConrad Meyer  *           or an error code if it fails */
ZSTD_buildSeqTable(ZSTD_seqSymbol * DTableSpace,const ZSTD_seqSymbol ** DTablePtr,symbolEncodingType_e type,unsigned max,U32 maxLog,const void * src,size_t srcSize,const U32 * baseValue,const U8 * nbAdditionalBits,const ZSTD_seqSymbol * defaultTable,U32 flagRepeatTable,int ddictIsCold,int nbSeq,U32 * wksp,size_t wkspSize,int bmi2)605a0483764SConrad Meyer static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
606a0483764SConrad Meyer                                  symbolEncodingType_e type, unsigned max, U32 maxLog,
607a0483764SConrad Meyer                                  const void* src, size_t srcSize,
608*5ff13fbcSAllan Jude                                  const U32* baseValue, const U8* nbAdditionalBits,
609a0483764SConrad Meyer                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
610f7cd7fe5SConrad Meyer                                  int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
611f7cd7fe5SConrad Meyer                                  int bmi2)
612a0483764SConrad Meyer {
613a0483764SConrad Meyer     switch(type)
614a0483764SConrad Meyer     {
615a0483764SConrad Meyer     case set_rle :
61637f1f268SConrad Meyer         RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
61737f1f268SConrad Meyer         RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
618a0483764SConrad Meyer         {   U32 const symbol = *(const BYTE*)src;
619a0483764SConrad Meyer             U32 const baseline = baseValue[symbol];
620*5ff13fbcSAllan Jude             U8 const nbBits = nbAdditionalBits[symbol];
621a0483764SConrad Meyer             ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
622a0483764SConrad Meyer         }
623a0483764SConrad Meyer         *DTablePtr = DTableSpace;
624a0483764SConrad Meyer         return 1;
625a0483764SConrad Meyer     case set_basic :
626a0483764SConrad Meyer         *DTablePtr = defaultTable;
627a0483764SConrad Meyer         return 0;
628a0483764SConrad Meyer     case set_repeat:
62937f1f268SConrad Meyer         RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
630a0483764SConrad Meyer         /* prefetch FSE table if used */
631a0483764SConrad Meyer         if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
632a0483764SConrad Meyer             const void* const pStart = *DTablePtr;
633a0483764SConrad Meyer             size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
634a0483764SConrad Meyer             PREFETCH_AREA(pStart, pSize);
635a0483764SConrad Meyer         }
636a0483764SConrad Meyer         return 0;
637a0483764SConrad Meyer     case set_compressed :
638a0483764SConrad Meyer         {   unsigned tableLog;
639a0483764SConrad Meyer             S16 norm[MaxSeq+1];
640a0483764SConrad Meyer             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
64137f1f268SConrad Meyer             RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
64237f1f268SConrad Meyer             RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
643f7cd7fe5SConrad Meyer             ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
644a0483764SConrad Meyer             *DTablePtr = DTableSpace;
645a0483764SConrad Meyer             return headerSize;
646a0483764SConrad Meyer         }
6472b9c00cbSConrad Meyer     default :
648a0483764SConrad Meyer         assert(0);
6492b9c00cbSConrad Meyer         RETURN_ERROR(GENERIC, "impossible");
650a0483764SConrad Meyer     }
651a0483764SConrad Meyer }
652a0483764SConrad Meyer 
ZSTD_decodeSeqHeaders(ZSTD_DCtx * dctx,int * nbSeqPtr,const void * src,size_t srcSize)653a0483764SConrad Meyer size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
654a0483764SConrad Meyer                              const void* src, size_t srcSize)
655a0483764SConrad Meyer {
656*5ff13fbcSAllan Jude     const BYTE* const istart = (const BYTE*)src;
657a0483764SConrad Meyer     const BYTE* const iend = istart + srcSize;
658a0483764SConrad Meyer     const BYTE* ip = istart;
659a0483764SConrad Meyer     int nbSeq;
660a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
661a0483764SConrad Meyer 
662a0483764SConrad Meyer     /* check */
66337f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
664a0483764SConrad Meyer 
665a0483764SConrad Meyer     /* SeqHead */
666a0483764SConrad Meyer     nbSeq = *ip++;
667a0483764SConrad Meyer     if (!nbSeq) {
668a0483764SConrad Meyer         *nbSeqPtr=0;
66937f1f268SConrad Meyer         RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
670a0483764SConrad Meyer         return 1;
671a0483764SConrad Meyer     }
672a0483764SConrad Meyer     if (nbSeq > 0x7F) {
673a0483764SConrad Meyer         if (nbSeq == 0xFF) {
67437f1f268SConrad Meyer             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
675f7cd7fe5SConrad Meyer             nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
676f7cd7fe5SConrad Meyer             ip+=2;
677a0483764SConrad Meyer         } else {
67837f1f268SConrad Meyer             RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
679a0483764SConrad Meyer             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
680a0483764SConrad Meyer         }
681a0483764SConrad Meyer     }
682a0483764SConrad Meyer     *nbSeqPtr = nbSeq;
683a0483764SConrad Meyer 
684a0483764SConrad Meyer     /* FSE table descriptors */
68537f1f268SConrad Meyer     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
686a0483764SConrad Meyer     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
687a0483764SConrad Meyer         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
688a0483764SConrad Meyer         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
689a0483764SConrad Meyer         ip++;
690a0483764SConrad Meyer 
691a0483764SConrad Meyer         /* Build DTables */
692a0483764SConrad Meyer         {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
693a0483764SConrad Meyer                                                       LLtype, MaxLL, LLFSELog,
694a0483764SConrad Meyer                                                       ip, iend-ip,
695a0483764SConrad Meyer                                                       LL_base, LL_bits,
696a0483764SConrad Meyer                                                       LL_defaultDTable, dctx->fseEntropy,
697f7cd7fe5SConrad Meyer                                                       dctx->ddictIsCold, nbSeq,
698f7cd7fe5SConrad Meyer                                                       dctx->workspace, sizeof(dctx->workspace),
699*5ff13fbcSAllan Jude                                                       ZSTD_DCtx_get_bmi2(dctx));
70037f1f268SConrad Meyer             RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
701a0483764SConrad Meyer             ip += llhSize;
702a0483764SConrad Meyer         }
703a0483764SConrad Meyer 
704a0483764SConrad Meyer         {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
705a0483764SConrad Meyer                                                       OFtype, MaxOff, OffFSELog,
706a0483764SConrad Meyer                                                       ip, iend-ip,
707a0483764SConrad Meyer                                                       OF_base, OF_bits,
708a0483764SConrad Meyer                                                       OF_defaultDTable, dctx->fseEntropy,
709f7cd7fe5SConrad Meyer                                                       dctx->ddictIsCold, nbSeq,
710f7cd7fe5SConrad Meyer                                                       dctx->workspace, sizeof(dctx->workspace),
711*5ff13fbcSAllan Jude                                                       ZSTD_DCtx_get_bmi2(dctx));
71237f1f268SConrad Meyer             RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
713a0483764SConrad Meyer             ip += ofhSize;
714a0483764SConrad Meyer         }
715a0483764SConrad Meyer 
716a0483764SConrad Meyer         {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
717a0483764SConrad Meyer                                                       MLtype, MaxML, MLFSELog,
718a0483764SConrad Meyer                                                       ip, iend-ip,
719a0483764SConrad Meyer                                                       ML_base, ML_bits,
720a0483764SConrad Meyer                                                       ML_defaultDTable, dctx->fseEntropy,
721f7cd7fe5SConrad Meyer                                                       dctx->ddictIsCold, nbSeq,
722f7cd7fe5SConrad Meyer                                                       dctx->workspace, sizeof(dctx->workspace),
723*5ff13fbcSAllan Jude                                                       ZSTD_DCtx_get_bmi2(dctx));
72437f1f268SConrad Meyer             RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
725a0483764SConrad Meyer             ip += mlhSize;
726a0483764SConrad Meyer         }
727a0483764SConrad Meyer     }
728a0483764SConrad Meyer 
729a0483764SConrad Meyer     return ip-istart;
730a0483764SConrad Meyer }
731a0483764SConrad Meyer 
732a0483764SConrad Meyer 
733a0483764SConrad Meyer typedef struct {
734a0483764SConrad Meyer     size_t litLength;
735a0483764SConrad Meyer     size_t matchLength;
736a0483764SConrad Meyer     size_t offset;
737a0483764SConrad Meyer } seq_t;
738a0483764SConrad Meyer 
739a0483764SConrad Meyer typedef struct {
740a0483764SConrad Meyer     size_t state;
741a0483764SConrad Meyer     const ZSTD_seqSymbol* table;
742a0483764SConrad Meyer } ZSTD_fseState;
743a0483764SConrad Meyer 
744a0483764SConrad Meyer typedef struct {
745a0483764SConrad Meyer     BIT_DStream_t DStream;
746a0483764SConrad Meyer     ZSTD_fseState stateLL;
747a0483764SConrad Meyer     ZSTD_fseState stateOffb;
748a0483764SConrad Meyer     ZSTD_fseState stateML;
749a0483764SConrad Meyer     size_t prevOffset[ZSTD_REP_NUM];
750a0483764SConrad Meyer } seqState_t;
751a0483764SConrad Meyer 
7529cbefe25SConrad Meyer /*! ZSTD_overlapCopy8() :
7539cbefe25SConrad Meyer  *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
7549cbefe25SConrad Meyer  *  If the offset is < 8 then the offset is spread to at least 8 bytes.
7559cbefe25SConrad Meyer  *
7569cbefe25SConrad Meyer  *  Precondition: *ip <= *op
7579cbefe25SConrad Meyer  *  Postcondition: *op - *op >= 8
7589cbefe25SConrad Meyer  */
ZSTD_overlapCopy8(BYTE ** op,BYTE const ** ip,size_t offset)75937f1f268SConrad Meyer HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
7609cbefe25SConrad Meyer     assert(*ip <= *op);
7619cbefe25SConrad Meyer     if (offset < 8) {
7629cbefe25SConrad Meyer         /* close range match, overlap */
7639cbefe25SConrad Meyer         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
7649cbefe25SConrad Meyer         static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
7659cbefe25SConrad Meyer         int const sub2 = dec64table[offset];
7669cbefe25SConrad Meyer         (*op)[0] = (*ip)[0];
7679cbefe25SConrad Meyer         (*op)[1] = (*ip)[1];
7689cbefe25SConrad Meyer         (*op)[2] = (*ip)[2];
7699cbefe25SConrad Meyer         (*op)[3] = (*ip)[3];
7709cbefe25SConrad Meyer         *ip += dec32table[offset];
7719cbefe25SConrad Meyer         ZSTD_copy4(*op+4, *ip);
7729cbefe25SConrad Meyer         *ip -= sub2;
7739cbefe25SConrad Meyer     } else {
7749cbefe25SConrad Meyer         ZSTD_copy8(*op, *ip);
7759cbefe25SConrad Meyer     }
7769cbefe25SConrad Meyer     *ip += 8;
7779cbefe25SConrad Meyer     *op += 8;
7789cbefe25SConrad Meyer     assert(*op - *ip >= 8);
7799cbefe25SConrad Meyer }
780a0483764SConrad Meyer 
7819cbefe25SConrad Meyer /*! ZSTD_safecopy() :
7829cbefe25SConrad Meyer  *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
7839cbefe25SConrad Meyer  *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
7849cbefe25SConrad Meyer  *  This function is only called in the uncommon case where the sequence is near the end of the block. It
7859cbefe25SConrad Meyer  *  should be fast for a single long sequence, but can be slow for several short sequences.
7869cbefe25SConrad Meyer  *
7879cbefe25SConrad Meyer  *  @param ovtype controls the overlap detection
7889cbefe25SConrad Meyer  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
7899cbefe25SConrad Meyer  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
7909cbefe25SConrad Meyer  *           The src buffer must be before the dst buffer.
7919cbefe25SConrad Meyer  */
ZSTD_safecopy(BYTE * op,const BYTE * const oend_w,BYTE const * ip,ptrdiff_t length,ZSTD_overlap_e ovtype)792*5ff13fbcSAllan Jude static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
7939cbefe25SConrad Meyer     ptrdiff_t const diff = op - ip;
7949cbefe25SConrad Meyer     BYTE* const oend = op + length;
7959cbefe25SConrad Meyer 
7969cbefe25SConrad Meyer     assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
7979cbefe25SConrad Meyer            (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
7989cbefe25SConrad Meyer 
7999cbefe25SConrad Meyer     if (length < 8) {
8009cbefe25SConrad Meyer         /* Handle short lengths. */
8019cbefe25SConrad Meyer         while (op < oend) *op++ = *ip++;
8029cbefe25SConrad Meyer         return;
8039cbefe25SConrad Meyer     }
8049cbefe25SConrad Meyer     if (ovtype == ZSTD_overlap_src_before_dst) {
8059cbefe25SConrad Meyer         /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
8069cbefe25SConrad Meyer         assert(length >= 8);
8079cbefe25SConrad Meyer         ZSTD_overlapCopy8(&op, &ip, diff);
808*5ff13fbcSAllan Jude         length -= 8;
8099cbefe25SConrad Meyer         assert(op - ip >= 8);
8109cbefe25SConrad Meyer         assert(op <= oend);
8119cbefe25SConrad Meyer     }
8129cbefe25SConrad Meyer 
8139cbefe25SConrad Meyer     if (oend <= oend_w) {
8149cbefe25SConrad Meyer         /* No risk of overwrite. */
8159cbefe25SConrad Meyer         ZSTD_wildcopy(op, ip, length, ovtype);
8169cbefe25SConrad Meyer         return;
8179cbefe25SConrad Meyer     }
8189cbefe25SConrad Meyer     if (op <= oend_w) {
8199cbefe25SConrad Meyer         /* Wildcopy until we get close to the end. */
8209cbefe25SConrad Meyer         assert(oend > oend_w);
8219cbefe25SConrad Meyer         ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
8229cbefe25SConrad Meyer         ip += oend_w - op;
823*5ff13fbcSAllan Jude         op += oend_w - op;
8249cbefe25SConrad Meyer     }
8259cbefe25SConrad Meyer     /* Handle the leftovers. */
8269cbefe25SConrad Meyer     while (op < oend) *op++ = *ip++;
8279cbefe25SConrad Meyer }
8289cbefe25SConrad Meyer 
829*5ff13fbcSAllan Jude /* ZSTD_safecopyDstBeforeSrc():
830*5ff13fbcSAllan Jude  * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
831*5ff13fbcSAllan Jude  * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
ZSTD_safecopyDstBeforeSrc(BYTE * op,BYTE const * ip,ptrdiff_t length)832*5ff13fbcSAllan Jude static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
833*5ff13fbcSAllan Jude     ptrdiff_t const diff = op - ip;
834*5ff13fbcSAllan Jude     BYTE* const oend = op + length;
835*5ff13fbcSAllan Jude 
836*5ff13fbcSAllan Jude     if (length < 8 || diff > -8) {
837*5ff13fbcSAllan Jude         /* Handle short lengths, close overlaps, and dst not before src. */
838*5ff13fbcSAllan Jude         while (op < oend) *op++ = *ip++;
839*5ff13fbcSAllan Jude         return;
840*5ff13fbcSAllan Jude     }
841*5ff13fbcSAllan Jude 
842*5ff13fbcSAllan Jude     if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
843*5ff13fbcSAllan Jude         ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
844*5ff13fbcSAllan Jude         ip += oend - WILDCOPY_OVERLENGTH - op;
845*5ff13fbcSAllan Jude         op += oend - WILDCOPY_OVERLENGTH - op;
846*5ff13fbcSAllan Jude     }
847*5ff13fbcSAllan Jude 
848*5ff13fbcSAllan Jude     /* Handle the leftovers. */
849*5ff13fbcSAllan Jude     while (op < oend) *op++ = *ip++;
850*5ff13fbcSAllan Jude }
851*5ff13fbcSAllan Jude 
8529cbefe25SConrad Meyer /* ZSTD_execSequenceEnd():
8539cbefe25SConrad Meyer  * This version handles cases that are near the end of the output buffer. It requires
8549cbefe25SConrad Meyer  * more careful checks to make sure there is no overflow. By separating out these hard
8559cbefe25SConrad Meyer  * and unlikely cases, we can speed up the common cases.
8569cbefe25SConrad Meyer  *
8579cbefe25SConrad Meyer  * NOTE: This function needs to be fast for a single long sequence, but doesn't need
8589cbefe25SConrad Meyer  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
8599cbefe25SConrad Meyer  */
860a0483764SConrad Meyer FORCE_NOINLINE
ZSTD_execSequenceEnd(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)8619cbefe25SConrad Meyer size_t ZSTD_execSequenceEnd(BYTE* op,
862a0483764SConrad Meyer     BYTE* const oend, seq_t sequence,
863a0483764SConrad Meyer     const BYTE** litPtr, const BYTE* const litLimit,
8649cbefe25SConrad Meyer     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
865a0483764SConrad Meyer {
866a0483764SConrad Meyer     BYTE* const oLitEnd = op + sequence.litLength;
867a0483764SConrad Meyer     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
868a0483764SConrad Meyer     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
869a0483764SConrad Meyer     const BYTE* match = oLitEnd - sequence.offset;
8709cbefe25SConrad Meyer     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
871a0483764SConrad Meyer 
87237f1f268SConrad Meyer     /* bounds checks : careful of address space overflow in 32-bit mode */
87337f1f268SConrad Meyer     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
87437f1f268SConrad Meyer     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
87537f1f268SConrad Meyer     assert(op < op + sequenceLength);
87637f1f268SConrad Meyer     assert(oLitEnd < op + sequenceLength);
877a0483764SConrad Meyer 
878a0483764SConrad Meyer     /* copy literals */
8799cbefe25SConrad Meyer     ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
8809cbefe25SConrad Meyer     op = oLitEnd;
8819cbefe25SConrad Meyer     *litPtr = iLitEnd;
882a0483764SConrad Meyer 
883a0483764SConrad Meyer     /* copy Match */
8849cbefe25SConrad Meyer     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
885a0483764SConrad Meyer         /* offset beyond prefix */
88637f1f268SConrad Meyer         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
8879cbefe25SConrad Meyer         match = dictEnd - (prefixStart - match);
888a0483764SConrad Meyer         if (match + sequence.matchLength <= dictEnd) {
889f7cd7fe5SConrad Meyer             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
890a0483764SConrad Meyer             return sequenceLength;
891a0483764SConrad Meyer         }
892a0483764SConrad Meyer         /* span extDict & currentPrefixSegment */
893a0483764SConrad Meyer         {   size_t const length1 = dictEnd - match;
894f7cd7fe5SConrad Meyer         ZSTD_memmove(oLitEnd, match, length1);
895a0483764SConrad Meyer         op = oLitEnd + length1;
896a0483764SConrad Meyer         sequence.matchLength -= length1;
8979cbefe25SConrad Meyer         match = prefixStart;
898*5ff13fbcSAllan Jude         }
899*5ff13fbcSAllan Jude     }
900*5ff13fbcSAllan Jude     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
901*5ff13fbcSAllan Jude     return sequenceLength;
902*5ff13fbcSAllan Jude }
903*5ff13fbcSAllan Jude 
904*5ff13fbcSAllan Jude /* ZSTD_execSequenceEndSplitLitBuffer():
905*5ff13fbcSAllan Jude  * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
906*5ff13fbcSAllan Jude  */
907*5ff13fbcSAllan Jude FORCE_NOINLINE
ZSTD_execSequenceEndSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)908*5ff13fbcSAllan Jude size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
909*5ff13fbcSAllan Jude     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
910*5ff13fbcSAllan Jude     const BYTE** litPtr, const BYTE* const litLimit,
911*5ff13fbcSAllan Jude     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
912*5ff13fbcSAllan Jude {
913*5ff13fbcSAllan Jude     BYTE* const oLitEnd = op + sequence.litLength;
914*5ff13fbcSAllan Jude     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
915*5ff13fbcSAllan Jude     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
916*5ff13fbcSAllan Jude     const BYTE* match = oLitEnd - sequence.offset;
917*5ff13fbcSAllan Jude 
918*5ff13fbcSAllan Jude 
919*5ff13fbcSAllan Jude     /* bounds checks : careful of address space overflow in 32-bit mode */
920*5ff13fbcSAllan Jude     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
921*5ff13fbcSAllan Jude     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
922*5ff13fbcSAllan Jude     assert(op < op + sequenceLength);
923*5ff13fbcSAllan Jude     assert(oLitEnd < op + sequenceLength);
924*5ff13fbcSAllan Jude 
925*5ff13fbcSAllan Jude     /* copy literals */
926*5ff13fbcSAllan Jude     RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
927*5ff13fbcSAllan Jude     ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
928*5ff13fbcSAllan Jude     op = oLitEnd;
929*5ff13fbcSAllan Jude     *litPtr = iLitEnd;
930*5ff13fbcSAllan Jude 
931*5ff13fbcSAllan Jude     /* copy Match */
932*5ff13fbcSAllan Jude     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
933*5ff13fbcSAllan Jude         /* offset beyond prefix */
934*5ff13fbcSAllan Jude         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
935*5ff13fbcSAllan Jude         match = dictEnd - (prefixStart - match);
936*5ff13fbcSAllan Jude         if (match + sequence.matchLength <= dictEnd) {
937*5ff13fbcSAllan Jude             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
938*5ff13fbcSAllan Jude             return sequenceLength;
939*5ff13fbcSAllan Jude         }
940*5ff13fbcSAllan Jude         /* span extDict & currentPrefixSegment */
941*5ff13fbcSAllan Jude         {   size_t const length1 = dictEnd - match;
942*5ff13fbcSAllan Jude         ZSTD_memmove(oLitEnd, match, length1);
943*5ff13fbcSAllan Jude         op = oLitEnd + length1;
944*5ff13fbcSAllan Jude         sequence.matchLength -= length1;
945*5ff13fbcSAllan Jude         match = prefixStart;
946*5ff13fbcSAllan Jude         }
947*5ff13fbcSAllan Jude     }
9489cbefe25SConrad Meyer     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
949a0483764SConrad Meyer     return sequenceLength;
950a0483764SConrad Meyer }
951a0483764SConrad Meyer 
952a0483764SConrad Meyer HINT_INLINE
ZSTD_execSequence(BYTE * op,BYTE * const oend,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)953a0483764SConrad Meyer size_t ZSTD_execSequence(BYTE* op,
954a0483764SConrad Meyer     BYTE* const oend, seq_t sequence,
955a0483764SConrad Meyer     const BYTE** litPtr, const BYTE* const litLimit,
956a0483764SConrad Meyer     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
957a0483764SConrad Meyer {
958a0483764SConrad Meyer     BYTE* const oLitEnd = op + sequence.litLength;
959a0483764SConrad Meyer     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
960a0483764SConrad Meyer     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
96137f1f268SConrad Meyer     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
962a0483764SConrad Meyer     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
963a0483764SConrad Meyer     const BYTE* match = oLitEnd - sequence.offset;
964a0483764SConrad Meyer 
96537f1f268SConrad Meyer     assert(op != NULL /* Precondition */);
96637f1f268SConrad Meyer     assert(oend_w < oend /* No underflow */);
96737f1f268SConrad Meyer     /* Handle edge cases in a slow path:
96837f1f268SConrad Meyer      *   - Read beyond end of literals
96937f1f268SConrad Meyer      *   - Match end is within WILDCOPY_OVERLIMIT of oend
97037f1f268SConrad Meyer      *   - 32-bit mode and the match length overflows
97137f1f268SConrad Meyer      */
97237f1f268SConrad Meyer     if (UNLIKELY(
97337f1f268SConrad Meyer         iLitEnd > litLimit ||
97437f1f268SConrad Meyer         oMatchEnd > oend_w ||
97537f1f268SConrad Meyer         (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
9769cbefe25SConrad Meyer         return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
977a0483764SConrad Meyer 
9789cbefe25SConrad Meyer     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
97937f1f268SConrad Meyer     assert(op <= oLitEnd /* No overflow */);
98037f1f268SConrad Meyer     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
98137f1f268SConrad Meyer     assert(oMatchEnd <= oend /* No underflow */);
9829cbefe25SConrad Meyer     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
9839cbefe25SConrad Meyer     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
9849cbefe25SConrad Meyer     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
9859cbefe25SConrad Meyer 
9869cbefe25SConrad Meyer     /* Copy Literals:
9879cbefe25SConrad Meyer      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
9889cbefe25SConrad Meyer      * We likely don't need the full 32-byte wildcopy.
9899cbefe25SConrad Meyer      */
9909cbefe25SConrad Meyer     assert(WILDCOPY_OVERLENGTH >= 16);
9919cbefe25SConrad Meyer     ZSTD_copy16(op, (*litPtr));
99237f1f268SConrad Meyer     if (UNLIKELY(sequence.litLength > 16)) {
9939cbefe25SConrad Meyer         ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
9949cbefe25SConrad Meyer     }
995a0483764SConrad Meyer     op = oLitEnd;
996a0483764SConrad Meyer     *litPtr = iLitEnd;   /* update for next sequence */
997a0483764SConrad Meyer 
9989cbefe25SConrad Meyer     /* Copy Match */
999a0483764SConrad Meyer     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1000a0483764SConrad Meyer         /* offset beyond prefix -> go into extDict */
100137f1f268SConrad Meyer         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1002a0483764SConrad Meyer         match = dictEnd + (match - prefixStart);
1003a0483764SConrad Meyer         if (match + sequence.matchLength <= dictEnd) {
1004f7cd7fe5SConrad Meyer             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1005a0483764SConrad Meyer             return sequenceLength;
1006a0483764SConrad Meyer         }
1007a0483764SConrad Meyer         /* span extDict & currentPrefixSegment */
1008a0483764SConrad Meyer         {   size_t const length1 = dictEnd - match;
1009f7cd7fe5SConrad Meyer         ZSTD_memmove(oLitEnd, match, length1);
1010a0483764SConrad Meyer         op = oLitEnd + length1;
1011a0483764SConrad Meyer         sequence.matchLength -= length1;
1012a0483764SConrad Meyer         match = prefixStart;
1013*5ff13fbcSAllan Jude         }
1014*5ff13fbcSAllan Jude     }
1015*5ff13fbcSAllan Jude     /* Match within prefix of 1 or more bytes */
1016*5ff13fbcSAllan Jude     assert(op <= oMatchEnd);
1017*5ff13fbcSAllan Jude     assert(oMatchEnd <= oend_w);
1018*5ff13fbcSAllan Jude     assert(match >= prefixStart);
1019*5ff13fbcSAllan Jude     assert(sequence.matchLength >= 1);
1020*5ff13fbcSAllan Jude 
1021*5ff13fbcSAllan Jude     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1022*5ff13fbcSAllan Jude      * without overlap checking.
1023*5ff13fbcSAllan Jude      */
1024*5ff13fbcSAllan Jude     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1025*5ff13fbcSAllan Jude         /* We bet on a full wildcopy for matches, since we expect matches to be
1026*5ff13fbcSAllan Jude          * longer than literals (in general). In silesia, ~10% of matches are longer
1027*5ff13fbcSAllan Jude          * than 16 bytes.
1028*5ff13fbcSAllan Jude          */
1029*5ff13fbcSAllan Jude         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1030*5ff13fbcSAllan Jude         return sequenceLength;
1031*5ff13fbcSAllan Jude     }
1032*5ff13fbcSAllan Jude     assert(sequence.offset < WILDCOPY_VECLEN);
1033*5ff13fbcSAllan Jude 
1034*5ff13fbcSAllan Jude     /* Copy 8 bytes and spread the offset to be >= 8. */
1035*5ff13fbcSAllan Jude     ZSTD_overlapCopy8(&op, &match, sequence.offset);
1036*5ff13fbcSAllan Jude 
1037*5ff13fbcSAllan Jude     /* If the match length is > 8 bytes, then continue with the wildcopy. */
1038*5ff13fbcSAllan Jude     if (sequence.matchLength > 8) {
1039*5ff13fbcSAllan Jude         assert(op < oMatchEnd);
1040*5ff13fbcSAllan Jude         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1041*5ff13fbcSAllan Jude     }
1042*5ff13fbcSAllan Jude     return sequenceLength;
1043*5ff13fbcSAllan Jude }
1044*5ff13fbcSAllan Jude 
1045*5ff13fbcSAllan Jude HINT_INLINE
ZSTD_execSequenceSplitLitBuffer(BYTE * op,BYTE * const oend,const BYTE * const oend_w,seq_t sequence,const BYTE ** litPtr,const BYTE * const litLimit,const BYTE * const prefixStart,const BYTE * const virtualStart,const BYTE * const dictEnd)1046*5ff13fbcSAllan Jude size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1047*5ff13fbcSAllan Jude     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1048*5ff13fbcSAllan Jude     const BYTE** litPtr, const BYTE* const litLimit,
1049*5ff13fbcSAllan Jude     const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1050*5ff13fbcSAllan Jude {
1051*5ff13fbcSAllan Jude     BYTE* const oLitEnd = op + sequence.litLength;
1052*5ff13fbcSAllan Jude     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1053*5ff13fbcSAllan Jude     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
1054*5ff13fbcSAllan Jude     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1055*5ff13fbcSAllan Jude     const BYTE* match = oLitEnd - sequence.offset;
1056*5ff13fbcSAllan Jude 
1057*5ff13fbcSAllan Jude     assert(op != NULL /* Precondition */);
1058*5ff13fbcSAllan Jude     assert(oend_w < oend /* No underflow */);
1059*5ff13fbcSAllan Jude     /* Handle edge cases in a slow path:
1060*5ff13fbcSAllan Jude      *   - Read beyond end of literals
1061*5ff13fbcSAllan Jude      *   - Match end is within WILDCOPY_OVERLIMIT of oend
1062*5ff13fbcSAllan Jude      *   - 32-bit mode and the match length overflows
1063*5ff13fbcSAllan Jude      */
1064*5ff13fbcSAllan Jude     if (UNLIKELY(
1065*5ff13fbcSAllan Jude             iLitEnd > litLimit ||
1066*5ff13fbcSAllan Jude             oMatchEnd > oend_w ||
1067*5ff13fbcSAllan Jude             (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1068*5ff13fbcSAllan Jude         return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1069*5ff13fbcSAllan Jude 
1070*5ff13fbcSAllan Jude     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1071*5ff13fbcSAllan Jude     assert(op <= oLitEnd /* No overflow */);
1072*5ff13fbcSAllan Jude     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1073*5ff13fbcSAllan Jude     assert(oMatchEnd <= oend /* No underflow */);
1074*5ff13fbcSAllan Jude     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1075*5ff13fbcSAllan Jude     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1076*5ff13fbcSAllan Jude     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1077*5ff13fbcSAllan Jude 
1078*5ff13fbcSAllan Jude     /* Copy Literals:
1079*5ff13fbcSAllan Jude      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1080*5ff13fbcSAllan Jude      * We likely don't need the full 32-byte wildcopy.
1081*5ff13fbcSAllan Jude      */
1082*5ff13fbcSAllan Jude     assert(WILDCOPY_OVERLENGTH >= 16);
1083*5ff13fbcSAllan Jude     ZSTD_copy16(op, (*litPtr));
1084*5ff13fbcSAllan Jude     if (UNLIKELY(sequence.litLength > 16)) {
1085*5ff13fbcSAllan Jude         ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
1086*5ff13fbcSAllan Jude     }
1087*5ff13fbcSAllan Jude     op = oLitEnd;
1088*5ff13fbcSAllan Jude     *litPtr = iLitEnd;   /* update for next sequence */
1089*5ff13fbcSAllan Jude 
1090*5ff13fbcSAllan Jude     /* Copy Match */
1091*5ff13fbcSAllan Jude     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1092*5ff13fbcSAllan Jude         /* offset beyond prefix -> go into extDict */
1093*5ff13fbcSAllan Jude         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1094*5ff13fbcSAllan Jude         match = dictEnd + (match - prefixStart);
1095*5ff13fbcSAllan Jude         if (match + sequence.matchLength <= dictEnd) {
1096*5ff13fbcSAllan Jude             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1097*5ff13fbcSAllan Jude             return sequenceLength;
1098*5ff13fbcSAllan Jude         }
1099*5ff13fbcSAllan Jude         /* span extDict & currentPrefixSegment */
1100*5ff13fbcSAllan Jude         {   size_t const length1 = dictEnd - match;
1101*5ff13fbcSAllan Jude             ZSTD_memmove(oLitEnd, match, length1);
1102*5ff13fbcSAllan Jude             op = oLitEnd + length1;
1103*5ff13fbcSAllan Jude             sequence.matchLength -= length1;
1104*5ff13fbcSAllan Jude             match = prefixStart;
1105a0483764SConrad Meyer     }   }
11069cbefe25SConrad Meyer     /* Match within prefix of 1 or more bytes */
11079cbefe25SConrad Meyer     assert(op <= oMatchEnd);
11089cbefe25SConrad Meyer     assert(oMatchEnd <= oend_w);
11099cbefe25SConrad Meyer     assert(match >= prefixStart);
11109cbefe25SConrad Meyer     assert(sequence.matchLength >= 1);
1111a0483764SConrad Meyer 
11129cbefe25SConrad Meyer     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
11139cbefe25SConrad Meyer      * without overlap checking.
11149cbefe25SConrad Meyer      */
111537f1f268SConrad Meyer     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
11169cbefe25SConrad Meyer         /* We bet on a full wildcopy for matches, since we expect matches to be
11179cbefe25SConrad Meyer          * longer than literals (in general). In silesia, ~10% of matches are longer
11189cbefe25SConrad Meyer          * than 16 bytes.
11199cbefe25SConrad Meyer          */
11209cbefe25SConrad Meyer         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1121a0483764SConrad Meyer         return sequenceLength;
1122a0483764SConrad Meyer     }
11239cbefe25SConrad Meyer     assert(sequence.offset < WILDCOPY_VECLEN);
1124a0483764SConrad Meyer 
11259cbefe25SConrad Meyer     /* Copy 8 bytes and spread the offset to be >= 8. */
11269cbefe25SConrad Meyer     ZSTD_overlapCopy8(&op, &match, sequence.offset);
1127a0483764SConrad Meyer 
11289cbefe25SConrad Meyer     /* If the match length is > 8 bytes, then continue with the wildcopy. */
11299cbefe25SConrad Meyer     if (sequence.matchLength > 8) {
11309cbefe25SConrad Meyer         assert(op < oMatchEnd);
11319cbefe25SConrad Meyer         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
1132a0483764SConrad Meyer     }
1133a0483764SConrad Meyer     return sequenceLength;
1134a0483764SConrad Meyer }
1135a0483764SConrad Meyer 
1136*5ff13fbcSAllan Jude 
1137a0483764SConrad Meyer static void
ZSTD_initFseState(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,const ZSTD_seqSymbol * dt)1138a0483764SConrad Meyer ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1139a0483764SConrad Meyer {
1140a0483764SConrad Meyer     const void* ptr = dt;
1141a0483764SConrad Meyer     const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1142a0483764SConrad Meyer     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1143a0483764SConrad Meyer     DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1144a0483764SConrad Meyer                 (U32)DStatePtr->state, DTableH->tableLog);
1145a0483764SConrad Meyer     BIT_reloadDStream(bitD);
1146a0483764SConrad Meyer     DStatePtr->table = dt + 1;
1147a0483764SConrad Meyer }
1148a0483764SConrad Meyer 
1149a0483764SConrad Meyer FORCE_INLINE_TEMPLATE void
ZSTD_updateFseStateWithDInfo(ZSTD_fseState * DStatePtr,BIT_DStream_t * bitD,U16 nextState,U32 nbBits)1150*5ff13fbcSAllan Jude ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1151a0483764SConrad Meyer {
1152a0483764SConrad Meyer     size_t const lowBits = BIT_readBits(bitD, nbBits);
1153*5ff13fbcSAllan Jude     DStatePtr->state = nextState + lowBits;
115437f1f268SConrad Meyer }
115537f1f268SConrad Meyer 
1156a0483764SConrad Meyer /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1157a0483764SConrad Meyer  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1158a0483764SConrad Meyer  * bits before reloading. This value is the maximum number of bytes we read
11592b9c00cbSConrad Meyer  * after reloading when we are decoding long offsets.
1160a0483764SConrad Meyer  */
1161a0483764SConrad Meyer #define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
1162a0483764SConrad Meyer     (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
1163a0483764SConrad Meyer         ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
1164a0483764SConrad Meyer         : 0)
1165a0483764SConrad Meyer 
1166a0483764SConrad Meyer typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1167a0483764SConrad Meyer 
1168a0483764SConrad Meyer FORCE_INLINE_TEMPLATE seq_t
ZSTD_decodeSequence(seqState_t * seqState,const ZSTD_longOffset_e longOffsets)1169*5ff13fbcSAllan Jude ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1170a0483764SConrad Meyer {
1171a0483764SConrad Meyer     seq_t seq;
1172*5ff13fbcSAllan Jude     const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1173*5ff13fbcSAllan Jude     const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1174*5ff13fbcSAllan Jude     const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1175*5ff13fbcSAllan Jude     seq.matchLength = mlDInfo->baseValue;
1176*5ff13fbcSAllan Jude     seq.litLength = llDInfo->baseValue;
1177*5ff13fbcSAllan Jude     {   U32 const ofBase = ofDInfo->baseValue;
1178*5ff13fbcSAllan Jude         BYTE const llBits = llDInfo->nbAdditionalBits;
1179*5ff13fbcSAllan Jude         BYTE const mlBits = mlDInfo->nbAdditionalBits;
1180*5ff13fbcSAllan Jude         BYTE const ofBits = ofDInfo->nbAdditionalBits;
118137f1f268SConrad Meyer         BYTE const totalBits = llBits+mlBits+ofBits;
1182a0483764SConrad Meyer 
1183*5ff13fbcSAllan Jude         U16 const llNext = llDInfo->nextState;
1184*5ff13fbcSAllan Jude         U16 const mlNext = mlDInfo->nextState;
1185*5ff13fbcSAllan Jude         U16 const ofNext = ofDInfo->nextState;
1186*5ff13fbcSAllan Jude         U32 const llnbBits = llDInfo->nbBits;
1187*5ff13fbcSAllan Jude         U32 const mlnbBits = mlDInfo->nbBits;
1188*5ff13fbcSAllan Jude         U32 const ofnbBits = ofDInfo->nbBits;
1189*5ff13fbcSAllan Jude         /*
1190*5ff13fbcSAllan Jude          * As gcc has better branch and block analyzers, sometimes it is only
1191*5ff13fbcSAllan Jude          * valuable to mark likelyness for clang, it gives around 3-4% of
1192*5ff13fbcSAllan Jude          * performance.
1193*5ff13fbcSAllan Jude          */
1194*5ff13fbcSAllan Jude 
1195a0483764SConrad Meyer         /* sequence */
1196a0483764SConrad Meyer         {   size_t offset;
1197*5ff13fbcSAllan Jude     #if defined(__clang__)
1198*5ff13fbcSAllan Jude             if (LIKELY(ofBits > 1)) {
1199*5ff13fbcSAllan Jude     #else
120037f1f268SConrad Meyer             if (ofBits > 1) {
1201*5ff13fbcSAllan Jude     #endif
1202a0483764SConrad Meyer                 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1203a0483764SConrad Meyer                 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1204a0483764SConrad Meyer                 assert(ofBits <= MaxOff);
1205a0483764SConrad Meyer                 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1206a0483764SConrad Meyer                     U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
1207a0483764SConrad Meyer                     offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1208a0483764SConrad Meyer                     BIT_reloadDStream(&seqState->DStream);
1209a0483764SConrad Meyer                     if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1210a0483764SConrad Meyer                     assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
1211a0483764SConrad Meyer                 } else {
1212a0483764SConrad Meyer                     offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
1213a0483764SConrad Meyer                     if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1214a0483764SConrad Meyer                 }
121537f1f268SConrad Meyer                 seqState->prevOffset[2] = seqState->prevOffset[1];
121637f1f268SConrad Meyer                 seqState->prevOffset[1] = seqState->prevOffset[0];
121737f1f268SConrad Meyer                 seqState->prevOffset[0] = offset;
121837f1f268SConrad Meyer             } else {
1219*5ff13fbcSAllan Jude                 U32 const ll0 = (llDInfo->baseValue == 0);
122037f1f268SConrad Meyer                 if (LIKELY((ofBits == 0))) {
1221*5ff13fbcSAllan Jude                     offset = seqState->prevOffset[ll0];
1222*5ff13fbcSAllan Jude                     seqState->prevOffset[1] = seqState->prevOffset[!ll0];
122337f1f268SConrad Meyer                     seqState->prevOffset[0] = offset;
122437f1f268SConrad Meyer                 } else {
122537f1f268SConrad Meyer                     offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
122637f1f268SConrad Meyer                     {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1227a0483764SConrad Meyer                         temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
1228a0483764SConrad Meyer                         if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1229a0483764SConrad Meyer                         seqState->prevOffset[1] = seqState->prevOffset[0];
1230a0483764SConrad Meyer                         seqState->prevOffset[0] = offset = temp;
123137f1f268SConrad Meyer             }   }   }
1232a0483764SConrad Meyer             seq.offset = offset;
1233a0483764SConrad Meyer         }
1234a0483764SConrad Meyer 
1235*5ff13fbcSAllan Jude     #if defined(__clang__)
1236*5ff13fbcSAllan Jude         if (UNLIKELY(mlBits > 0))
1237*5ff13fbcSAllan Jude     #else
123837f1f268SConrad Meyer         if (mlBits > 0)
1239*5ff13fbcSAllan Jude     #endif
124037f1f268SConrad Meyer             seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
124137f1f268SConrad Meyer 
1242a0483764SConrad Meyer         if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1243a0483764SConrad Meyer             BIT_reloadDStream(&seqState->DStream);
124437f1f268SConrad Meyer         if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1245a0483764SConrad Meyer             BIT_reloadDStream(&seqState->DStream);
1246a0483764SConrad Meyer         /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1247a0483764SConrad Meyer         ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1248a0483764SConrad Meyer 
1249*5ff13fbcSAllan Jude     #if defined(__clang__)
1250*5ff13fbcSAllan Jude         if (UNLIKELY(llBits > 0))
1251*5ff13fbcSAllan Jude     #else
125237f1f268SConrad Meyer         if (llBits > 0)
1253*5ff13fbcSAllan Jude     #endif
125437f1f268SConrad Meyer             seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
125537f1f268SConrad Meyer 
1256a0483764SConrad Meyer         if (MEM_32bits())
1257a0483764SConrad Meyer             BIT_reloadDStream(&seqState->DStream);
1258a0483764SConrad Meyer 
1259a0483764SConrad Meyer         DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1260a0483764SConrad Meyer                     (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1261a0483764SConrad Meyer 
1262*5ff13fbcSAllan Jude         ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
1263*5ff13fbcSAllan Jude         ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
1264a0483764SConrad Meyer         if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
1265*5ff13fbcSAllan Jude         ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
126637f1f268SConrad Meyer     }
1267a0483764SConrad Meyer 
1268a0483764SConrad Meyer     return seq;
1269a0483764SConrad Meyer }
1270a0483764SConrad Meyer 
127137f1f268SConrad Meyer #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1272f7cd7fe5SConrad Meyer MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
127337f1f268SConrad Meyer {
127437f1f268SConrad Meyer     size_t const windowSize = dctx->fParams.windowSize;
127537f1f268SConrad Meyer     /* No dictionary used. */
127637f1f268SConrad Meyer     if (dctx->dictContentEndForFuzzing == NULL) return 0;
127737f1f268SConrad Meyer     /* Dictionary is our prefix. */
127837f1f268SConrad Meyer     if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
127937f1f268SConrad Meyer     /* Dictionary is not our ext-dict. */
128037f1f268SConrad Meyer     if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
128137f1f268SConrad Meyer     /* Dictionary is not within our window size. */
128237f1f268SConrad Meyer     if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
128337f1f268SConrad Meyer     /* Dictionary is active. */
128437f1f268SConrad Meyer     return 1;
128537f1f268SConrad Meyer }
128637f1f268SConrad Meyer 
128737f1f268SConrad Meyer MEM_STATIC void ZSTD_assertValidSequence(
128837f1f268SConrad Meyer         ZSTD_DCtx const* dctx,
128937f1f268SConrad Meyer         BYTE const* op, BYTE const* oend,
129037f1f268SConrad Meyer         seq_t const seq,
129137f1f268SConrad Meyer         BYTE const* prefixStart, BYTE const* virtualStart)
129237f1f268SConrad Meyer {
1293f7cd7fe5SConrad Meyer #if DEBUGLEVEL >= 1
129437f1f268SConrad Meyer     size_t const windowSize = dctx->fParams.windowSize;
129537f1f268SConrad Meyer     size_t const sequenceSize = seq.litLength + seq.matchLength;
129637f1f268SConrad Meyer     BYTE const* const oLitEnd = op + seq.litLength;
129737f1f268SConrad Meyer     DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
129837f1f268SConrad Meyer             (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
129937f1f268SConrad Meyer     assert(op <= oend);
130037f1f268SConrad Meyer     assert((size_t)(oend - op) >= sequenceSize);
130137f1f268SConrad Meyer     assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
130237f1f268SConrad Meyer     if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
130337f1f268SConrad Meyer         size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
130437f1f268SConrad Meyer         /* Offset must be within the dictionary. */
130537f1f268SConrad Meyer         assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
130637f1f268SConrad Meyer         assert(seq.offset <= windowSize + dictSize);
130737f1f268SConrad Meyer     } else {
130837f1f268SConrad Meyer         /* Offset must be within our window. */
130937f1f268SConrad Meyer         assert(seq.offset <= windowSize);
131037f1f268SConrad Meyer     }
1311f7cd7fe5SConrad Meyer #else
1312f7cd7fe5SConrad Meyer     (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1313f7cd7fe5SConrad Meyer #endif
131437f1f268SConrad Meyer }
131537f1f268SConrad Meyer #endif
131637f1f268SConrad Meyer 
131737f1f268SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1318*5ff13fbcSAllan Jude 
1319*5ff13fbcSAllan Jude 
1320a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t
13214d3f1eafSConrad Meyer DONT_VECTORIZE
1322*5ff13fbcSAllan Jude ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1323a0483764SConrad Meyer                                void* dst, size_t maxDstSize,
1324a0483764SConrad Meyer                          const void* seqStart, size_t seqSize, int nbSeq,
132537f1f268SConrad Meyer                          const ZSTD_longOffset_e isLongOffset,
132637f1f268SConrad Meyer                          const int frame)
1327a0483764SConrad Meyer {
1328a0483764SConrad Meyer     const BYTE* ip = (const BYTE*)seqStart;
1329a0483764SConrad Meyer     const BYTE* const iend = ip + seqSize;
1330*5ff13fbcSAllan Jude     BYTE* const ostart = (BYTE*)dst;
1331a0483764SConrad Meyer     BYTE* const oend = ostart + maxDstSize;
1332a0483764SConrad Meyer     BYTE* op = ostart;
1333a0483764SConrad Meyer     const BYTE* litPtr = dctx->litPtr;
1334*5ff13fbcSAllan Jude     const BYTE* litBufferEnd = dctx->litBufferEnd;
1335a0483764SConrad Meyer     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1336a0483764SConrad Meyer     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1337a0483764SConrad Meyer     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1338*5ff13fbcSAllan Jude     DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
133937f1f268SConrad Meyer     (void)frame;
1340a0483764SConrad Meyer 
1341a0483764SConrad Meyer     /* Regen sequences */
1342a0483764SConrad Meyer     if (nbSeq) {
1343a0483764SConrad Meyer         seqState_t seqState;
1344a0483764SConrad Meyer         dctx->fseEntropy = 1;
1345a0483764SConrad Meyer         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
13462b9c00cbSConrad Meyer         RETURN_ERROR_IF(
13472b9c00cbSConrad Meyer             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
134837f1f268SConrad Meyer             corruption_detected, "");
1349a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1350a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1351a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
135237f1f268SConrad Meyer         assert(dst != NULL);
1353a0483764SConrad Meyer 
13544d3f1eafSConrad Meyer         ZSTD_STATIC_ASSERT(
13554d3f1eafSConrad Meyer                 BIT_DStream_unfinished < BIT_DStream_completed &&
13564d3f1eafSConrad Meyer                 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
13574d3f1eafSConrad Meyer                 BIT_DStream_completed < BIT_DStream_overflow);
13584d3f1eafSConrad Meyer 
1359*5ff13fbcSAllan Jude         /* decompress without overrunning litPtr begins */
1360*5ff13fbcSAllan Jude         {
1361*5ff13fbcSAllan Jude             seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
136237f1f268SConrad Meyer             /* Align the decompression loop to 32 + 16 bytes.
136337f1f268SConrad Meyer                 *
136437f1f268SConrad Meyer                 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
136537f1f268SConrad Meyer                 * speed swings based on the alignment of the decompression loop. This
136637f1f268SConrad Meyer                 * performance swing is caused by parts of the decompression loop falling
136737f1f268SConrad Meyer                 * out of the DSB. The entire decompression loop should fit in the DSB,
136837f1f268SConrad Meyer                 * when it can't we get much worse performance. You can measure if you've
136937f1f268SConrad Meyer                 * hit the good case or the bad case with this perf command for some
137037f1f268SConrad Meyer                 * compressed file test.zst:
137137f1f268SConrad Meyer                 *
137237f1f268SConrad Meyer                 *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
137337f1f268SConrad Meyer                 *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
137437f1f268SConrad Meyer                 *
137537f1f268SConrad Meyer                 * If you see most cycles served out of the MITE you've hit the bad case.
137637f1f268SConrad Meyer                 * If you see most cycles served out of the DSB you've hit the good case.
137737f1f268SConrad Meyer                 * If it is pretty even then you may be in an okay case.
137837f1f268SConrad Meyer                 *
1379*5ff13fbcSAllan Jude                 * This issue has been reproduced on the following CPUs:
138037f1f268SConrad Meyer                 *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
138137f1f268SConrad Meyer                 *               Use Instruments->Counters to get DSB/MITE cycles.
138237f1f268SConrad Meyer                 *               I never got performance swings, but I was able to
138337f1f268SConrad Meyer                 *               go from the good case of mostly DSB to half of the
138437f1f268SConrad Meyer                 *               cycles served from MITE.
138537f1f268SConrad Meyer                 *   - Coffeelake: Intel i9-9900k
1386*5ff13fbcSAllan Jude                 *   - Coffeelake: Intel i7-9700k
138737f1f268SConrad Meyer                 *
138837f1f268SConrad Meyer                 * I haven't been able to reproduce the instability or DSB misses on any
138937f1f268SConrad Meyer                 * of the following CPUS:
139037f1f268SConrad Meyer                 *   - Haswell
139137f1f268SConrad Meyer                 *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
139237f1f268SConrad Meyer                 *   - Skylake
139337f1f268SConrad Meyer                 *
1394*5ff13fbcSAllan Jude                 * Alignment is done for each of the three major decompression loops:
1395*5ff13fbcSAllan Jude                 *   - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1396*5ff13fbcSAllan Jude                 *   - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1397*5ff13fbcSAllan Jude                 *   - ZSTD_decompressSequences_body
1398*5ff13fbcSAllan Jude                 * Alignment choices are made to minimize large swings on bad cases and influence on performance
1399*5ff13fbcSAllan Jude                 * from changes external to this code, rather than to overoptimize on the current commit.
1400*5ff13fbcSAllan Jude                 *
140137f1f268SConrad Meyer                 * If you are seeing performance stability this script can help test.
140237f1f268SConrad Meyer                 * It tests on 4 commits in zstd where I saw performance change.
140337f1f268SConrad Meyer                 *
140437f1f268SConrad Meyer                 *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
140537f1f268SConrad Meyer                 */
1406*5ff13fbcSAllan Jude #if defined(__GNUC__) && defined(__x86_64__)
1407*5ff13fbcSAllan Jude             __asm__(".p2align 6");
1408*5ff13fbcSAllan Jude #  if __GNUC__ >= 7
1409*5ff13fbcSAllan Jude 	    /* good for gcc-7, gcc-9, and gcc-11 */
1410*5ff13fbcSAllan Jude             __asm__("nop");
141137f1f268SConrad Meyer             __asm__(".p2align 5");
141237f1f268SConrad Meyer             __asm__("nop");
141337f1f268SConrad Meyer             __asm__(".p2align 4");
1414*5ff13fbcSAllan Jude #    if __GNUC__ == 8 || __GNUC__ == 10
1415*5ff13fbcSAllan Jude 	    /* good for gcc-8 and gcc-10 */
1416*5ff13fbcSAllan Jude             __asm__("nop");
1417*5ff13fbcSAllan Jude             __asm__(".p2align 3");
141837f1f268SConrad Meyer #    endif
1419*5ff13fbcSAllan Jude #  endif
1420*5ff13fbcSAllan Jude #endif
1421*5ff13fbcSAllan Jude 
1422*5ff13fbcSAllan Jude             /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1423*5ff13fbcSAllan Jude             for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
1424*5ff13fbcSAllan Jude                 size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1425*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1426*5ff13fbcSAllan Jude                 assert(!ZSTD_isError(oneSeqSize));
1427*5ff13fbcSAllan Jude                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1428*5ff13fbcSAllan Jude #endif
1429*5ff13fbcSAllan Jude                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1430*5ff13fbcSAllan Jude                     return oneSeqSize;
1431*5ff13fbcSAllan Jude                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1432*5ff13fbcSAllan Jude                 op += oneSeqSize;
1433*5ff13fbcSAllan Jude                 if (UNLIKELY(!--nbSeq))
1434*5ff13fbcSAllan Jude                     break;
1435*5ff13fbcSAllan Jude                 BIT_reloadDStream(&(seqState.DStream));
1436*5ff13fbcSAllan Jude                 sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1437*5ff13fbcSAllan Jude             }
1438*5ff13fbcSAllan Jude 
1439*5ff13fbcSAllan Jude             /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1440*5ff13fbcSAllan Jude             if (nbSeq > 0) {
1441*5ff13fbcSAllan Jude                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1442*5ff13fbcSAllan Jude                 if (leftoverLit)
1443*5ff13fbcSAllan Jude                 {
1444*5ff13fbcSAllan Jude                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1445*5ff13fbcSAllan Jude                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1446*5ff13fbcSAllan Jude                     sequence.litLength -= leftoverLit;
1447*5ff13fbcSAllan Jude                     op += leftoverLit;
1448*5ff13fbcSAllan Jude                 }
1449*5ff13fbcSAllan Jude                 litPtr = dctx->litExtraBuffer;
1450*5ff13fbcSAllan Jude                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1451*5ff13fbcSAllan Jude                 dctx->litBufferLocation = ZSTD_not_in_dst;
1452*5ff13fbcSAllan Jude                 {
1453*5ff13fbcSAllan Jude                     size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1454*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455*5ff13fbcSAllan Jude                     assert(!ZSTD_isError(oneSeqSize));
1456*5ff13fbcSAllan Jude                     if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1457*5ff13fbcSAllan Jude #endif
1458*5ff13fbcSAllan Jude                     if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1459*5ff13fbcSAllan Jude                         return oneSeqSize;
1460*5ff13fbcSAllan Jude                     DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1461*5ff13fbcSAllan Jude                     op += oneSeqSize;
1462*5ff13fbcSAllan Jude                     if (--nbSeq)
1463*5ff13fbcSAllan Jude                         BIT_reloadDStream(&(seqState.DStream));
1464*5ff13fbcSAllan Jude                 }
1465*5ff13fbcSAllan Jude             }
1466*5ff13fbcSAllan Jude         }
1467*5ff13fbcSAllan Jude 
1468*5ff13fbcSAllan Jude         if (nbSeq > 0) /* there is remaining lit from extra buffer */
1469*5ff13fbcSAllan Jude         {
1470*5ff13fbcSAllan Jude 
1471*5ff13fbcSAllan Jude #if defined(__GNUC__) && defined(__x86_64__)
1472*5ff13fbcSAllan Jude             __asm__(".p2align 6");
1473*5ff13fbcSAllan Jude             __asm__("nop");
1474*5ff13fbcSAllan Jude #  if __GNUC__ != 7
1475*5ff13fbcSAllan Jude             /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1476*5ff13fbcSAllan Jude             __asm__(".p2align 4");
1477*5ff13fbcSAllan Jude             __asm__("nop");
1478*5ff13fbcSAllan Jude             __asm__(".p2align 3");
1479*5ff13fbcSAllan Jude #  elif __GNUC__ >= 11
1480*5ff13fbcSAllan Jude             __asm__(".p2align 3");
1481*5ff13fbcSAllan Jude #  else
1482*5ff13fbcSAllan Jude             __asm__(".p2align 5");
1483*5ff13fbcSAllan Jude             __asm__("nop");
1484*5ff13fbcSAllan Jude             __asm__(".p2align 3");
1485*5ff13fbcSAllan Jude #  endif
1486*5ff13fbcSAllan Jude #endif
1487*5ff13fbcSAllan Jude 
148837f1f268SConrad Meyer             for (; ; ) {
1489*5ff13fbcSAllan Jude                 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1490*5ff13fbcSAllan Jude                 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1491*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1492*5ff13fbcSAllan Jude                 assert(!ZSTD_isError(oneSeqSize));
1493*5ff13fbcSAllan Jude                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1494*5ff13fbcSAllan Jude #endif
1495*5ff13fbcSAllan Jude                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1496*5ff13fbcSAllan Jude                     return oneSeqSize;
1497*5ff13fbcSAllan Jude                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1498*5ff13fbcSAllan Jude                 op += oneSeqSize;
1499*5ff13fbcSAllan Jude                 if (UNLIKELY(!--nbSeq))
1500*5ff13fbcSAllan Jude                     break;
1501*5ff13fbcSAllan Jude                 BIT_reloadDStream(&(seqState.DStream));
1502*5ff13fbcSAllan Jude             }
1503*5ff13fbcSAllan Jude         }
1504*5ff13fbcSAllan Jude 
1505*5ff13fbcSAllan Jude         /* check if reached exact end */
1506*5ff13fbcSAllan Jude         DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1507*5ff13fbcSAllan Jude         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1508*5ff13fbcSAllan Jude         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1509*5ff13fbcSAllan Jude         /* save reps for next block */
1510*5ff13fbcSAllan Jude         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1511*5ff13fbcSAllan Jude     }
1512*5ff13fbcSAllan Jude 
1513*5ff13fbcSAllan Jude     /* last literal segment */
1514*5ff13fbcSAllan Jude     if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1515*5ff13fbcSAllan Jude     {
1516*5ff13fbcSAllan Jude         size_t const lastLLSize = litBufferEnd - litPtr;
1517*5ff13fbcSAllan Jude         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1518*5ff13fbcSAllan Jude         if (op != NULL) {
1519*5ff13fbcSAllan Jude             ZSTD_memmove(op, litPtr, lastLLSize);
1520*5ff13fbcSAllan Jude             op += lastLLSize;
1521*5ff13fbcSAllan Jude         }
1522*5ff13fbcSAllan Jude         litPtr = dctx->litExtraBuffer;
1523*5ff13fbcSAllan Jude         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1524*5ff13fbcSAllan Jude         dctx->litBufferLocation = ZSTD_not_in_dst;
1525*5ff13fbcSAllan Jude     }
1526*5ff13fbcSAllan Jude     {   size_t const lastLLSize = litBufferEnd - litPtr;
1527*5ff13fbcSAllan Jude         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1528*5ff13fbcSAllan Jude         if (op != NULL) {
1529*5ff13fbcSAllan Jude             ZSTD_memcpy(op, litPtr, lastLLSize);
1530*5ff13fbcSAllan Jude             op += lastLLSize;
1531*5ff13fbcSAllan Jude         }
1532*5ff13fbcSAllan Jude     }
1533*5ff13fbcSAllan Jude 
1534*5ff13fbcSAllan Jude     return op-ostart;
1535*5ff13fbcSAllan Jude }
1536*5ff13fbcSAllan Jude 
1537*5ff13fbcSAllan Jude FORCE_INLINE_TEMPLATE size_t
1538*5ff13fbcSAllan Jude DONT_VECTORIZE
1539*5ff13fbcSAllan Jude ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1540*5ff13fbcSAllan Jude     void* dst, size_t maxDstSize,
1541*5ff13fbcSAllan Jude     const void* seqStart, size_t seqSize, int nbSeq,
1542*5ff13fbcSAllan Jude     const ZSTD_longOffset_e isLongOffset,
1543*5ff13fbcSAllan Jude     const int frame)
1544*5ff13fbcSAllan Jude {
1545*5ff13fbcSAllan Jude     const BYTE* ip = (const BYTE*)seqStart;
1546*5ff13fbcSAllan Jude     const BYTE* const iend = ip + seqSize;
1547*5ff13fbcSAllan Jude     BYTE* const ostart = (BYTE*)dst;
1548*5ff13fbcSAllan Jude     BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
1549*5ff13fbcSAllan Jude     BYTE* op = ostart;
1550*5ff13fbcSAllan Jude     const BYTE* litPtr = dctx->litPtr;
1551*5ff13fbcSAllan Jude     const BYTE* const litEnd = litPtr + dctx->litSize;
1552*5ff13fbcSAllan Jude     const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1553*5ff13fbcSAllan Jude     const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1554*5ff13fbcSAllan Jude     const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1555*5ff13fbcSAllan Jude     DEBUGLOG(5, "ZSTD_decompressSequences_body");
1556*5ff13fbcSAllan Jude     (void)frame;
1557*5ff13fbcSAllan Jude 
1558*5ff13fbcSAllan Jude     /* Regen sequences */
1559*5ff13fbcSAllan Jude     if (nbSeq) {
1560*5ff13fbcSAllan Jude         seqState_t seqState;
1561*5ff13fbcSAllan Jude         dctx->fseEntropy = 1;
1562*5ff13fbcSAllan Jude         { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1563*5ff13fbcSAllan Jude         RETURN_ERROR_IF(
1564*5ff13fbcSAllan Jude             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1565*5ff13fbcSAllan Jude             corruption_detected, "");
1566*5ff13fbcSAllan Jude         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1567*5ff13fbcSAllan Jude         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1568*5ff13fbcSAllan Jude         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1569*5ff13fbcSAllan Jude         assert(dst != NULL);
1570*5ff13fbcSAllan Jude 
1571*5ff13fbcSAllan Jude         ZSTD_STATIC_ASSERT(
1572*5ff13fbcSAllan Jude             BIT_DStream_unfinished < BIT_DStream_completed &&
1573*5ff13fbcSAllan Jude             BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1574*5ff13fbcSAllan Jude             BIT_DStream_completed < BIT_DStream_overflow);
1575*5ff13fbcSAllan Jude 
1576*5ff13fbcSAllan Jude #if defined(__GNUC__) && defined(__x86_64__)
1577*5ff13fbcSAllan Jude             __asm__(".p2align 6");
1578*5ff13fbcSAllan Jude             __asm__("nop");
1579*5ff13fbcSAllan Jude #  if __GNUC__ >= 7
1580*5ff13fbcSAllan Jude             __asm__(".p2align 5");
1581*5ff13fbcSAllan Jude             __asm__("nop");
1582*5ff13fbcSAllan Jude             __asm__(".p2align 3");
1583*5ff13fbcSAllan Jude #  else
1584*5ff13fbcSAllan Jude             __asm__(".p2align 4");
1585*5ff13fbcSAllan Jude             __asm__("nop");
1586*5ff13fbcSAllan Jude             __asm__(".p2align 3");
1587*5ff13fbcSAllan Jude #  endif
1588*5ff13fbcSAllan Jude #endif
1589*5ff13fbcSAllan Jude 
1590*5ff13fbcSAllan Jude         for ( ; ; ) {
1591*5ff13fbcSAllan Jude             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1592a0483764SConrad Meyer             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
159337f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
159437f1f268SConrad Meyer             assert(!ZSTD_isError(oneSeqSize));
159537f1f268SConrad Meyer             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
159637f1f268SConrad Meyer #endif
1597*5ff13fbcSAllan Jude             if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1598*5ff13fbcSAllan Jude                 return oneSeqSize;
1599a0483764SConrad Meyer             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1600f7cd7fe5SConrad Meyer             op += oneSeqSize;
1601*5ff13fbcSAllan Jude             if (UNLIKELY(!--nbSeq))
1602f7cd7fe5SConrad Meyer                 break;
1603*5ff13fbcSAllan Jude             BIT_reloadDStream(&(seqState.DStream));
160437f1f268SConrad Meyer         }
1605a0483764SConrad Meyer 
1606a0483764SConrad Meyer         /* check if reached exact end */
1607a0483764SConrad Meyer         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
160837f1f268SConrad Meyer         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
160937f1f268SConrad Meyer         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1610a0483764SConrad Meyer         /* save reps for next block */
1611a0483764SConrad Meyer         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1612a0483764SConrad Meyer     }
1613a0483764SConrad Meyer 
1614a0483764SConrad Meyer     /* last literal segment */
1615a0483764SConrad Meyer     {   size_t const lastLLSize = litEnd - litPtr;
161637f1f268SConrad Meyer         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
161737f1f268SConrad Meyer         if (op != NULL) {
1618f7cd7fe5SConrad Meyer             ZSTD_memcpy(op, litPtr, lastLLSize);
1619a0483764SConrad Meyer             op += lastLLSize;
1620a0483764SConrad Meyer         }
162137f1f268SConrad Meyer     }
1622a0483764SConrad Meyer 
1623a0483764SConrad Meyer     return op-ostart;
1624a0483764SConrad Meyer }
1625a0483764SConrad Meyer 
1626a0483764SConrad Meyer static size_t
1627a0483764SConrad Meyer ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1628a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1629a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
163037f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
163137f1f268SConrad Meyer                            const int frame)
1632a0483764SConrad Meyer {
163337f1f268SConrad Meyer     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1634a0483764SConrad Meyer }
1635*5ff13fbcSAllan Jude 
1636*5ff13fbcSAllan Jude static size_t
1637*5ff13fbcSAllan Jude ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1638*5ff13fbcSAllan Jude                                                void* dst, size_t maxDstSize,
1639*5ff13fbcSAllan Jude                                          const void* seqStart, size_t seqSize, int nbSeq,
1640*5ff13fbcSAllan Jude                                          const ZSTD_longOffset_e isLongOffset,
1641*5ff13fbcSAllan Jude                                          const int frame)
1642*5ff13fbcSAllan Jude {
1643*5ff13fbcSAllan Jude     return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1644*5ff13fbcSAllan Jude }
1645a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1646a0483764SConrad Meyer 
1647a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1648*5ff13fbcSAllan Jude 
1649*5ff13fbcSAllan Jude FORCE_INLINE_TEMPLATE size_t
1650*5ff13fbcSAllan Jude ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1651*5ff13fbcSAllan Jude                    const BYTE* const prefixStart, const BYTE* const dictEnd)
1652*5ff13fbcSAllan Jude {
1653*5ff13fbcSAllan Jude     prefetchPos += sequence.litLength;
1654*5ff13fbcSAllan Jude     {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1655*5ff13fbcSAllan Jude         const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1656*5ff13fbcSAllan Jude                                                                               * No consequence though : memory address is only used for prefetching, not for dereferencing */
1657*5ff13fbcSAllan Jude         PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1658*5ff13fbcSAllan Jude     }
1659*5ff13fbcSAllan Jude     return prefetchPos + sequence.matchLength;
1660*5ff13fbcSAllan Jude }
1661*5ff13fbcSAllan Jude 
1662*5ff13fbcSAllan Jude /* This decoding function employs prefetching
1663*5ff13fbcSAllan Jude  * to reduce latency impact of cache misses.
1664*5ff13fbcSAllan Jude  * It's generally employed when block contains a significant portion of long-distance matches
1665*5ff13fbcSAllan Jude  * or when coupled with a "cold" dictionary */
1666a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t
1667a0483764SConrad Meyer ZSTD_decompressSequencesLong_body(
1668a0483764SConrad Meyer                                ZSTD_DCtx* dctx,
1669a0483764SConrad Meyer                                void* dst, size_t maxDstSize,
1670a0483764SConrad Meyer                          const void* seqStart, size_t seqSize, int nbSeq,
167137f1f268SConrad Meyer                          const ZSTD_longOffset_e isLongOffset,
167237f1f268SConrad Meyer                          const int frame)
1673a0483764SConrad Meyer {
1674a0483764SConrad Meyer     const BYTE* ip = (const BYTE*)seqStart;
1675a0483764SConrad Meyer     const BYTE* const iend = ip + seqSize;
1676*5ff13fbcSAllan Jude     BYTE* const ostart = (BYTE*)dst;
1677*5ff13fbcSAllan Jude     BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
1678a0483764SConrad Meyer     BYTE* op = ostart;
1679a0483764SConrad Meyer     const BYTE* litPtr = dctx->litPtr;
1680*5ff13fbcSAllan Jude     const BYTE* litBufferEnd = dctx->litBufferEnd;
1681a0483764SConrad Meyer     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1682a0483764SConrad Meyer     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1683a0483764SConrad Meyer     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
168437f1f268SConrad Meyer     (void)frame;
1685a0483764SConrad Meyer 
1686a0483764SConrad Meyer     /* Regen sequences */
1687a0483764SConrad Meyer     if (nbSeq) {
1688*5ff13fbcSAllan Jude #define STORED_SEQS 8
1689a0483764SConrad Meyer #define STORED_SEQS_MASK (STORED_SEQS-1)
1690*5ff13fbcSAllan Jude #define ADVANCED_SEQS STORED_SEQS
1691a0483764SConrad Meyer         seq_t sequences[STORED_SEQS];
1692a0483764SConrad Meyer         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1693a0483764SConrad Meyer         seqState_t seqState;
1694a0483764SConrad Meyer         int seqNb;
1695*5ff13fbcSAllan Jude         size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1696*5ff13fbcSAllan Jude 
1697a0483764SConrad Meyer         dctx->fseEntropy = 1;
1698a0483764SConrad Meyer         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
169937f1f268SConrad Meyer         assert(dst != NULL);
1700a0483764SConrad Meyer         assert(iend >= ip);
17012b9c00cbSConrad Meyer         RETURN_ERROR_IF(
17022b9c00cbSConrad Meyer             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
170337f1f268SConrad Meyer             corruption_detected, "");
1704a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1705a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1706a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1707a0483764SConrad Meyer 
1708a0483764SConrad Meyer         /* prepare in advance */
1709a0483764SConrad Meyer         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1710*5ff13fbcSAllan Jude             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1711*5ff13fbcSAllan Jude             prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1712*5ff13fbcSAllan Jude             sequences[seqNb] = sequence;
1713a0483764SConrad Meyer         }
171437f1f268SConrad Meyer         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1715a0483764SConrad Meyer 
1716*5ff13fbcSAllan Jude         /* decompress without stomping litBuffer */
1717a0483764SConrad Meyer         for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
1718*5ff13fbcSAllan Jude             seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1719*5ff13fbcSAllan Jude             size_t oneSeqSize;
1720*5ff13fbcSAllan Jude 
1721*5ff13fbcSAllan Jude             if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
1722*5ff13fbcSAllan Jude             {
1723*5ff13fbcSAllan Jude                 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1724*5ff13fbcSAllan Jude                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1725*5ff13fbcSAllan Jude                 if (leftoverLit)
1726*5ff13fbcSAllan Jude                 {
1727*5ff13fbcSAllan Jude                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1728*5ff13fbcSAllan Jude                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1729*5ff13fbcSAllan Jude                     sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1730*5ff13fbcSAllan Jude                     op += leftoverLit;
1731*5ff13fbcSAllan Jude                 }
1732*5ff13fbcSAllan Jude                 litPtr = dctx->litExtraBuffer;
1733*5ff13fbcSAllan Jude                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1734*5ff13fbcSAllan Jude                 dctx->litBufferLocation = ZSTD_not_in_dst;
1735*5ff13fbcSAllan Jude                 oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
173637f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
173737f1f268SConrad Meyer                 assert(!ZSTD_isError(oneSeqSize));
173837f1f268SConrad Meyer                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
173937f1f268SConrad Meyer #endif
1740a0483764SConrad Meyer                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1741*5ff13fbcSAllan Jude 
1742*5ff13fbcSAllan Jude                 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1743a0483764SConrad Meyer                 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1744a0483764SConrad Meyer                 op += oneSeqSize;
1745a0483764SConrad Meyer             }
1746*5ff13fbcSAllan Jude             else
1747*5ff13fbcSAllan Jude             {
1748*5ff13fbcSAllan Jude                 /* lit buffer is either wholly contained in first or second split, or not split at all*/
1749*5ff13fbcSAllan Jude                 oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1750*5ff13fbcSAllan Jude                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1751*5ff13fbcSAllan Jude                     ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1752*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1753*5ff13fbcSAllan Jude                 assert(!ZSTD_isError(oneSeqSize));
1754*5ff13fbcSAllan Jude                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1755*5ff13fbcSAllan Jude #endif
1756*5ff13fbcSAllan Jude                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1757*5ff13fbcSAllan Jude 
1758*5ff13fbcSAllan Jude                 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1759*5ff13fbcSAllan Jude                 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1760*5ff13fbcSAllan Jude                 op += oneSeqSize;
1761*5ff13fbcSAllan Jude             }
1762*5ff13fbcSAllan Jude         }
176337f1f268SConrad Meyer         RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1764a0483764SConrad Meyer 
1765a0483764SConrad Meyer         /* finish queue */
1766a0483764SConrad Meyer         seqNb -= seqAdvance;
1767a0483764SConrad Meyer         for ( ; seqNb<nbSeq ; seqNb++) {
1768*5ff13fbcSAllan Jude             seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1769*5ff13fbcSAllan Jude             if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
1770*5ff13fbcSAllan Jude             {
1771*5ff13fbcSAllan Jude                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1772*5ff13fbcSAllan Jude                 if (leftoverLit)
1773*5ff13fbcSAllan Jude                 {
1774*5ff13fbcSAllan Jude                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1775*5ff13fbcSAllan Jude                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1776*5ff13fbcSAllan Jude                     sequence->litLength -= leftoverLit;
1777*5ff13fbcSAllan Jude                     op += leftoverLit;
1778*5ff13fbcSAllan Jude                 }
1779*5ff13fbcSAllan Jude                 litPtr = dctx->litExtraBuffer;
1780*5ff13fbcSAllan Jude                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1781*5ff13fbcSAllan Jude                 dctx->litBufferLocation = ZSTD_not_in_dst;
1782*5ff13fbcSAllan Jude                 {
1783*5ff13fbcSAllan Jude                     size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
178437f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
178537f1f268SConrad Meyer                     assert(!ZSTD_isError(oneSeqSize));
178637f1f268SConrad Meyer                     if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
178737f1f268SConrad Meyer #endif
1788a0483764SConrad Meyer                     if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1789a0483764SConrad Meyer                     op += oneSeqSize;
1790a0483764SConrad Meyer                 }
1791*5ff13fbcSAllan Jude             }
1792*5ff13fbcSAllan Jude             else
1793*5ff13fbcSAllan Jude             {
1794*5ff13fbcSAllan Jude                 size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1795*5ff13fbcSAllan Jude                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1796*5ff13fbcSAllan Jude                     ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1797*5ff13fbcSAllan Jude #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1798*5ff13fbcSAllan Jude                 assert(!ZSTD_isError(oneSeqSize));
1799*5ff13fbcSAllan Jude                 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1800*5ff13fbcSAllan Jude #endif
1801*5ff13fbcSAllan Jude                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1802*5ff13fbcSAllan Jude                 op += oneSeqSize;
1803*5ff13fbcSAllan Jude             }
1804*5ff13fbcSAllan Jude         }
1805a0483764SConrad Meyer 
1806a0483764SConrad Meyer         /* save reps for next block */
1807a0483764SConrad Meyer         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1808a0483764SConrad Meyer     }
1809a0483764SConrad Meyer 
1810a0483764SConrad Meyer     /* last literal segment */
1811*5ff13fbcSAllan Jude     if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
1812*5ff13fbcSAllan Jude     {
1813*5ff13fbcSAllan Jude         size_t const lastLLSize = litBufferEnd - litPtr;
181437f1f268SConrad Meyer         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
181537f1f268SConrad Meyer         if (op != NULL) {
1816*5ff13fbcSAllan Jude             ZSTD_memmove(op, litPtr, lastLLSize);
1817*5ff13fbcSAllan Jude             op += lastLLSize;
1818*5ff13fbcSAllan Jude         }
1819*5ff13fbcSAllan Jude         litPtr = dctx->litExtraBuffer;
1820*5ff13fbcSAllan Jude         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1821*5ff13fbcSAllan Jude     }
1822*5ff13fbcSAllan Jude     {   size_t const lastLLSize = litBufferEnd - litPtr;
1823*5ff13fbcSAllan Jude         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1824*5ff13fbcSAllan Jude         if (op != NULL) {
1825*5ff13fbcSAllan Jude             ZSTD_memmove(op, litPtr, lastLLSize);
1826a0483764SConrad Meyer             op += lastLLSize;
1827a0483764SConrad Meyer         }
182837f1f268SConrad Meyer     }
1829a0483764SConrad Meyer 
1830a0483764SConrad Meyer     return op-ostart;
1831a0483764SConrad Meyer }
1832a0483764SConrad Meyer 
1833a0483764SConrad Meyer static size_t
1834a0483764SConrad Meyer ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1835a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1836a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
183737f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
183837f1f268SConrad Meyer                            const int frame)
1839a0483764SConrad Meyer {
184037f1f268SConrad Meyer     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1841a0483764SConrad Meyer }
1842a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1843a0483764SConrad Meyer 
1844a0483764SConrad Meyer 
1845a0483764SConrad Meyer 
1846a0483764SConrad Meyer #if DYNAMIC_BMI2
1847a0483764SConrad Meyer 
1848a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1849*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
18504d3f1eafSConrad Meyer DONT_VECTORIZE
1851a0483764SConrad Meyer ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1852a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1853a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
185437f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
185537f1f268SConrad Meyer                            const int frame)
1856a0483764SConrad Meyer {
185737f1f268SConrad Meyer     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1858a0483764SConrad Meyer }
1859*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
1860*5ff13fbcSAllan Jude DONT_VECTORIZE
1861*5ff13fbcSAllan Jude ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1862*5ff13fbcSAllan Jude                                  void* dst, size_t maxDstSize,
1863*5ff13fbcSAllan Jude                            const void* seqStart, size_t seqSize, int nbSeq,
1864*5ff13fbcSAllan Jude                            const ZSTD_longOffset_e isLongOffset,
1865*5ff13fbcSAllan Jude                            const int frame)
1866*5ff13fbcSAllan Jude {
1867*5ff13fbcSAllan Jude     return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1868*5ff13fbcSAllan Jude }
1869a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1870a0483764SConrad Meyer 
1871a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1872*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t
1873a0483764SConrad Meyer ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1874a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1875a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
187637f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
187737f1f268SConrad Meyer                            const int frame)
1878a0483764SConrad Meyer {
187937f1f268SConrad Meyer     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1880a0483764SConrad Meyer }
1881a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1882a0483764SConrad Meyer 
1883a0483764SConrad Meyer #endif /* DYNAMIC_BMI2 */
1884a0483764SConrad Meyer 
1885a0483764SConrad Meyer typedef size_t (*ZSTD_decompressSequences_t)(
1886a0483764SConrad Meyer                             ZSTD_DCtx* dctx,
1887a0483764SConrad Meyer                             void* dst, size_t maxDstSize,
1888a0483764SConrad Meyer                             const void* seqStart, size_t seqSize, int nbSeq,
188937f1f268SConrad Meyer                             const ZSTD_longOffset_e isLongOffset,
189037f1f268SConrad Meyer                             const int frame);
1891a0483764SConrad Meyer 
1892a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1893a0483764SConrad Meyer static size_t
1894a0483764SConrad Meyer ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1895a0483764SConrad Meyer                    const void* seqStart, size_t seqSize, int nbSeq,
189637f1f268SConrad Meyer                    const ZSTD_longOffset_e isLongOffset,
189737f1f268SConrad Meyer                    const int frame)
1898a0483764SConrad Meyer {
1899a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decompressSequences");
1900a0483764SConrad Meyer #if DYNAMIC_BMI2
1901*5ff13fbcSAllan Jude     if (ZSTD_DCtx_get_bmi2(dctx)) {
190237f1f268SConrad Meyer         return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1903a0483764SConrad Meyer     }
1904a0483764SConrad Meyer #endif
190537f1f268SConrad Meyer     return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1906a0483764SConrad Meyer }
1907*5ff13fbcSAllan Jude static size_t
1908*5ff13fbcSAllan Jude ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1909*5ff13fbcSAllan Jude                                  const void* seqStart, size_t seqSize, int nbSeq,
1910*5ff13fbcSAllan Jude                                  const ZSTD_longOffset_e isLongOffset,
1911*5ff13fbcSAllan Jude                                  const int frame)
1912*5ff13fbcSAllan Jude {
1913*5ff13fbcSAllan Jude     DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1914*5ff13fbcSAllan Jude #if DYNAMIC_BMI2
1915*5ff13fbcSAllan Jude     if (ZSTD_DCtx_get_bmi2(dctx)) {
1916*5ff13fbcSAllan Jude         return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1917*5ff13fbcSAllan Jude     }
1918*5ff13fbcSAllan Jude #endif
1919*5ff13fbcSAllan Jude     return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1920*5ff13fbcSAllan Jude }
1921a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1922a0483764SConrad Meyer 
1923a0483764SConrad Meyer 
1924a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1925a0483764SConrad Meyer /* ZSTD_decompressSequencesLong() :
1926a0483764SConrad Meyer  * decompression function triggered when a minimum share of offsets is considered "long",
1927a0483764SConrad Meyer  * aka out of cache.
19282b9c00cbSConrad Meyer  * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1929a0483764SConrad Meyer  * This function will try to mitigate main memory latency through the use of prefetching */
1930a0483764SConrad Meyer static size_t
1931a0483764SConrad Meyer ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1932a0483764SConrad Meyer                              void* dst, size_t maxDstSize,
1933a0483764SConrad Meyer                              const void* seqStart, size_t seqSize, int nbSeq,
193437f1f268SConrad Meyer                              const ZSTD_longOffset_e isLongOffset,
193537f1f268SConrad Meyer                              const int frame)
1936a0483764SConrad Meyer {
1937a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1938a0483764SConrad Meyer #if DYNAMIC_BMI2
1939*5ff13fbcSAllan Jude     if (ZSTD_DCtx_get_bmi2(dctx)) {
194037f1f268SConrad Meyer         return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1941a0483764SConrad Meyer     }
1942a0483764SConrad Meyer #endif
194337f1f268SConrad Meyer   return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1944a0483764SConrad Meyer }
1945a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1946a0483764SConrad Meyer 
1947a0483764SConrad Meyer 
1948a0483764SConrad Meyer 
1949a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951a0483764SConrad Meyer /* ZSTD_getLongOffsetsShare() :
1952a0483764SConrad Meyer  * condition : offTable must be valid
1953a0483764SConrad Meyer  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954a0483764SConrad Meyer  *           compared to maximum possible of (1<<OffFSELog) */
1955a0483764SConrad Meyer static unsigned
1956a0483764SConrad Meyer ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1957a0483764SConrad Meyer {
1958a0483764SConrad Meyer     const void* ptr = offTable;
1959a0483764SConrad Meyer     U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1960a0483764SConrad Meyer     const ZSTD_seqSymbol* table = offTable + 1;
1961a0483764SConrad Meyer     U32 const max = 1 << tableLog;
1962a0483764SConrad Meyer     U32 u, total = 0;
1963a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1964a0483764SConrad Meyer 
1965a0483764SConrad Meyer     assert(max <= (1 << OffFSELog));  /* max not too large */
1966a0483764SConrad Meyer     for (u=0; u<max; u++) {
1967a0483764SConrad Meyer         if (table[u].nbAdditionalBits > 22) total += 1;
1968a0483764SConrad Meyer     }
1969a0483764SConrad Meyer 
1970a0483764SConrad Meyer     assert(tableLog <= OffFSELog);
1971a0483764SConrad Meyer     total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
1972a0483764SConrad Meyer 
1973a0483764SConrad Meyer     return total;
1974a0483764SConrad Meyer }
1975a0483764SConrad Meyer #endif
1976a0483764SConrad Meyer 
1977a0483764SConrad Meyer size_t
1978a0483764SConrad Meyer ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1979a0483764SConrad Meyer                               void* dst, size_t dstCapacity,
1980*5ff13fbcSAllan Jude                         const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
1981a0483764SConrad Meyer {   /* blockType == blockCompressed */
1982a0483764SConrad Meyer     const BYTE* ip = (const BYTE*)src;
1983a0483764SConrad Meyer     /* isLongOffset must be true if there are long offsets.
1984a0483764SConrad Meyer      * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985a0483764SConrad Meyer      * We don't expect that to be the case in 64-bit mode.
1986a0483764SConrad Meyer      * In block mode, window size is not known, so we have to be conservative.
1987a0483764SConrad Meyer      * (note: but it could be evaluated from current-lowLimit)
1988a0483764SConrad Meyer      */
1989a0483764SConrad Meyer     ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1990a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1991a0483764SConrad Meyer 
199237f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1993a0483764SConrad Meyer 
1994a0483764SConrad Meyer     /* Decode literals section */
1995*5ff13fbcSAllan Jude     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
1996a0483764SConrad Meyer         DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1997a0483764SConrad Meyer         if (ZSTD_isError(litCSize)) return litCSize;
1998a0483764SConrad Meyer         ip += litCSize;
1999a0483764SConrad Meyer         srcSize -= litCSize;
2000a0483764SConrad Meyer     }
2001a0483764SConrad Meyer 
2002a0483764SConrad Meyer     /* Build Decoding Tables */
2003a0483764SConrad Meyer     {
2004a0483764SConrad Meyer         /* These macros control at build-time which decompressor implementation
2005a0483764SConrad Meyer          * we use. If neither is defined, we do some inspection and dispatch at
2006a0483764SConrad Meyer          * runtime.
2007a0483764SConrad Meyer          */
2008a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010a0483764SConrad Meyer         int usePrefetchDecoder = dctx->ddictIsCold;
2011a0483764SConrad Meyer #endif
2012a0483764SConrad Meyer         int nbSeq;
2013a0483764SConrad Meyer         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2014a0483764SConrad Meyer         if (ZSTD_isError(seqHSize)) return seqHSize;
2015a0483764SConrad Meyer         ip += seqHSize;
2016a0483764SConrad Meyer         srcSize -= seqHSize;
2017a0483764SConrad Meyer 
201837f1f268SConrad Meyer         RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
201937f1f268SConrad Meyer 
2020a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022a0483764SConrad Meyer         if ( !usePrefetchDecoder
2023a0483764SConrad Meyer           && (!frame || (dctx->fParams.windowSize > (1<<24)))
2024a0483764SConrad Meyer           && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
2025a0483764SConrad Meyer             U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
2026a0483764SConrad Meyer             U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2027a0483764SConrad Meyer             usePrefetchDecoder = (shareLongOffsets >= minShare);
2028a0483764SConrad Meyer         }
2029a0483764SConrad Meyer #endif
2030a0483764SConrad Meyer 
2031a0483764SConrad Meyer         dctx->ddictIsCold = 0;
2032a0483764SConrad Meyer 
2033a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035a0483764SConrad Meyer         if (usePrefetchDecoder)
2036a0483764SConrad Meyer #endif
2037a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
203837f1f268SConrad Meyer             return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2039a0483764SConrad Meyer #endif
2040a0483764SConrad Meyer 
2041a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2042a0483764SConrad Meyer         /* else */
2043*5ff13fbcSAllan Jude         if (dctx->litBufferLocation == ZSTD_split)
2044*5ff13fbcSAllan Jude             return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2045*5ff13fbcSAllan Jude         else
204637f1f268SConrad Meyer             return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2047a0483764SConrad Meyer #endif
2048a0483764SConrad Meyer     }
2049a0483764SConrad Meyer }
2050a0483764SConrad Meyer 
2051a0483764SConrad Meyer 
2052*5ff13fbcSAllan Jude void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
205337f1f268SConrad Meyer {
2054*5ff13fbcSAllan Jude     if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
205537f1f268SConrad Meyer         dctx->dictEnd = dctx->previousDstEnd;
205637f1f268SConrad Meyer         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
205737f1f268SConrad Meyer         dctx->prefixStart = dst;
205837f1f268SConrad Meyer         dctx->previousDstEnd = dst;
205937f1f268SConrad Meyer     }
206037f1f268SConrad Meyer }
206137f1f268SConrad Meyer 
206237f1f268SConrad Meyer 
2063a0483764SConrad Meyer size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2064a0483764SConrad Meyer                             void* dst, size_t dstCapacity,
2065a0483764SConrad Meyer                       const void* src, size_t srcSize)
2066a0483764SConrad Meyer {
2067a0483764SConrad Meyer     size_t dSize;
2068*5ff13fbcSAllan Jude     ZSTD_checkContinuity(dctx, dst, dstCapacity);
2069*5ff13fbcSAllan Jude     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
2070a0483764SConrad Meyer     dctx->previousDstEnd = (char*)dst + dSize;
2071a0483764SConrad Meyer     return dSize;
2072a0483764SConrad Meyer }
2073