1 /* ******************************************************************
2    huff0 huffman decoder,
3    part of Finite State Entropy library
4    Copyright (C) 2013-present, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31     You can contact the author at :
32     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Dependencies
37 ****************************************************************/
38 #include <string.h>     /* memcpy, memset */
39 #include "compiler.h"
40 #include "bitstream.h"  /* BIT_* */
41 #include "fse.h"        /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h"
44 #include "error_private.h"
45 
46 
47 /* **************************************************************
48 *  Error Management
49 ****************************************************************/
50 #define HUF_isError ERR_isError
51 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
52 
53 
54 /* **************************************************************
55 *  Byte alignment for workSpace management
56 ****************************************************************/
57 #define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
58 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
59 
60 
61 /*-***************************/
62 /*  generic DTableDesc       */
63 /*-***************************/
64 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
65 
66 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
67 {
68     DTableDesc dtd;
69     memcpy(&dtd, table, sizeof(dtd));
70     return dtd;
71 }
72 
73 
74 /*-***************************/
75 /*  single-symbol decoding   */
76 /*-***************************/
77 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
78 
79 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
80 {
81     U32 tableLog = 0;
82     U32 nbSymbols = 0;
83     size_t iSize;
84     void* const dtPtr = DTable + 1;
85     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
86 
87     U32* rankVal;
88     BYTE* huffWeight;
89     size_t spaceUsed32 = 0;
90 
91     rankVal = (U32 *)workSpace + spaceUsed32;
92     spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
93     huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
94     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
95 
96     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
97 
98     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
99     /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
100 
101     iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
102     if (HUF_isError(iSize)) return iSize;
103 
104     /* Table header */
105     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
106         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
107         dtd.tableType = 0;
108         dtd.tableLog = (BYTE)tableLog;
109         memcpy(DTable, &dtd, sizeof(dtd));
110     }
111 
112     /* Calculate starting value for each rank */
113     {   U32 n, nextRankStart = 0;
114         for (n=1; n<tableLog+1; n++) {
115             U32 const current = nextRankStart;
116             nextRankStart += (rankVal[n] << (n-1));
117             rankVal[n] = current;
118     }   }
119 
120     /* fill DTable */
121     {   U32 n;
122         for (n=0; n<nbSymbols; n++) {
123             U32 const w = huffWeight[n];
124             U32 const length = (1 << w) >> 1;
125             U32 u;
126             HUF_DEltX1 D;
127             D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
128             for (u = rankVal[w]; u < rankVal[w] + length; u++)
129                 dt[u] = D;
130             rankVal[w] += length;
131     }   }
132 
133     return iSize;
134 }
135 
136 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
137 {
138     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
139     return HUF_readDTableX1_wksp(DTable, src, srcSize,
140                                  workSpace, sizeof(workSpace));
141 }
142 
143 FORCE_INLINE_TEMPLATE BYTE
144 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
145 {
146     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
147     BYTE const c = dt[val].byte;
148     BIT_skipBits(Dstream, dt[val].nbBits);
149     return c;
150 }
151 
152 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
153     *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
154 
155 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
156     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
157         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
158 
159 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
160     if (MEM_64bits()) \
161         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
162 
163 HINT_INLINE size_t
164 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
165 {
166     BYTE* const pStart = p;
167 
168     /* up to 4 symbols at a time */
169     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
170         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
171         HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
172         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
173         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
174     }
175 
176     /* [0-3] symbols remaining */
177     if (MEM_32bits())
178         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
179             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
180 
181     /* no more data to retrieve from bitstream, no need to reload */
182     while (p < pEnd)
183         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
184 
185     return pEnd-pStart;
186 }
187 
188 FORCE_INLINE_TEMPLATE size_t
189 HUF_decompress1X1_usingDTable_internal_body(
190           void* dst,  size_t dstSize,
191     const void* cSrc, size_t cSrcSize,
192     const HUF_DTable* DTable)
193 {
194     BYTE* op = (BYTE*)dst;
195     BYTE* const oend = op + dstSize;
196     const void* dtPtr = DTable + 1;
197     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
198     BIT_DStream_t bitD;
199     DTableDesc const dtd = HUF_getDTableDesc(DTable);
200     U32 const dtLog = dtd.tableLog;
201 
202     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
203 
204     HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
205 
206     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
207 
208     return dstSize;
209 }
210 
211 FORCE_INLINE_TEMPLATE size_t
212 HUF_decompress4X1_usingDTable_internal_body(
213           void* dst,  size_t dstSize,
214     const void* cSrc, size_t cSrcSize,
215     const HUF_DTable* DTable)
216 {
217     /* Check */
218     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
219 
220     {   const BYTE* const istart = (const BYTE*) cSrc;
221         BYTE* const ostart = (BYTE*) dst;
222         BYTE* const oend = ostart + dstSize;
223         const void* const dtPtr = DTable + 1;
224         const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
225 
226         /* Init */
227         BIT_DStream_t bitD1;
228         BIT_DStream_t bitD2;
229         BIT_DStream_t bitD3;
230         BIT_DStream_t bitD4;
231         size_t const length1 = MEM_readLE16(istart);
232         size_t const length2 = MEM_readLE16(istart+2);
233         size_t const length3 = MEM_readLE16(istart+4);
234         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
235         const BYTE* const istart1 = istart + 6;  /* jumpTable */
236         const BYTE* const istart2 = istart1 + length1;
237         const BYTE* const istart3 = istart2 + length2;
238         const BYTE* const istart4 = istart3 + length3;
239         const size_t segmentSize = (dstSize+3) / 4;
240         BYTE* const opStart2 = ostart + segmentSize;
241         BYTE* const opStart3 = opStart2 + segmentSize;
242         BYTE* const opStart4 = opStart3 + segmentSize;
243         BYTE* op1 = ostart;
244         BYTE* op2 = opStart2;
245         BYTE* op3 = opStart3;
246         BYTE* op4 = opStart4;
247         U32 endSignal = BIT_DStream_unfinished;
248         DTableDesc const dtd = HUF_getDTableDesc(DTable);
249         U32 const dtLog = dtd.tableLog;
250 
251         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
252         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
253         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
254         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
255         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
256 
257         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
258         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
259         while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
260             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
261             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
262             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
263             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
264             HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
265             HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
266             HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
267             HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
268             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
269             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
270             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
271             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
272             HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
273             HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
274             HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
275             HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
276             BIT_reloadDStream(&bitD1);
277             BIT_reloadDStream(&bitD2);
278             BIT_reloadDStream(&bitD3);
279             BIT_reloadDStream(&bitD4);
280         }
281 
282         /* check corruption */
283         /* note : should not be necessary : op# advance in lock step, and we control op4.
284          *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
285         if (op1 > opStart2) return ERROR(corruption_detected);
286         if (op2 > opStart3) return ERROR(corruption_detected);
287         if (op3 > opStart4) return ERROR(corruption_detected);
288         /* note : op4 supposed already verified within main loop */
289 
290         /* finish bitStreams one by one */
291         HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
292         HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
293         HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
294         HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
295 
296         /* check */
297         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
298           if (!endCheck) return ERROR(corruption_detected); }
299 
300         /* decoded size */
301         return dstSize;
302     }
303 }
304 
305 
306 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
307                                                const void *cSrc,
308                                                size_t cSrcSize,
309                                                const HUF_DTable *DTable);
310 #if DYNAMIC_BMI2
311 
312 #define HUF_DGEN(fn)                                                               \
313                                                                             \
314     static size_t fn##_default(                                             \
315                   void* dst,  size_t dstSize,                               \
316             const void* cSrc, size_t cSrcSize,                              \
317             const HUF_DTable* DTable)                                       \
318     {                                                                       \
319         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
320     }                                                                       \
321                                                                             \
322     static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
323                   void* dst,  size_t dstSize,                               \
324             const void* cSrc, size_t cSrcSize,                              \
325             const HUF_DTable* DTable)                                       \
326     {                                                                       \
327         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
328     }                                                                       \
329                                                                             \
330     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
331                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
332     {                                                                       \
333         if (bmi2) {                                                         \
334             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
335         }                                                                   \
336         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
337     }
338 
339 #else
340 
341 #define HUF_DGEN(fn)                                                               \
342     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
343                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
344     {                                                                       \
345         (void)bmi2;                                                         \
346         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
347     }
348 
349 #endif
350 
351 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
352 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
353 
354 
355 
356 size_t HUF_decompress1X1_usingDTable(
357           void* dst,  size_t dstSize,
358     const void* cSrc, size_t cSrcSize,
359     const HUF_DTable* DTable)
360 {
361     DTableDesc dtd = HUF_getDTableDesc(DTable);
362     if (dtd.tableType != 0) return ERROR(GENERIC);
363     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
364 }
365 
366 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
367                                    const void* cSrc, size_t cSrcSize,
368                                    void* workSpace, size_t wkspSize)
369 {
370     const BYTE* ip = (const BYTE*) cSrc;
371 
372     size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
373     if (HUF_isError(hSize)) return hSize;
374     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
375     ip += hSize; cSrcSize -= hSize;
376 
377     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
378 }
379 
380 
381 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
382                               const void* cSrc, size_t cSrcSize)
383 {
384     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
385     return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
386                                        workSpace, sizeof(workSpace));
387 }
388 
389 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
390 {
391     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
392     return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
393 }
394 
395 size_t HUF_decompress4X1_usingDTable(
396           void* dst,  size_t dstSize,
397     const void* cSrc, size_t cSrcSize,
398     const HUF_DTable* DTable)
399 {
400     DTableDesc dtd = HUF_getDTableDesc(DTable);
401     if (dtd.tableType != 0) return ERROR(GENERIC);
402     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
403 }
404 
405 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
406                                    const void* cSrc, size_t cSrcSize,
407                                    void* workSpace, size_t wkspSize, int bmi2)
408 {
409     const BYTE* ip = (const BYTE*) cSrc;
410 
411     size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
412                                                 workSpace, wkspSize);
413     if (HUF_isError(hSize)) return hSize;
414     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
415     ip += hSize; cSrcSize -= hSize;
416 
417     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
418 }
419 
420 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
421                                    const void* cSrc, size_t cSrcSize,
422                                    void* workSpace, size_t wkspSize)
423 {
424     return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
425 }
426 
427 
428 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
429 {
430     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
431     return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
432                                        workSpace, sizeof(workSpace));
433 }
434 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
435 {
436     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
437     return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
438 }
439 
440 
441 /* *************************/
442 /* double-symbols decoding */
443 /* *************************/
444 
445 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
446 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
447 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
448 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
449 
450 
451 /* HUF_fillDTableX2Level2() :
452  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
453 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
454                            const U32* rankValOrigin, const int minWeight,
455                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
456                            U32 nbBitsBaseline, U16 baseSeq)
457 {
458     HUF_DEltX2 DElt;
459     U32 rankVal[HUF_TABLELOG_MAX + 1];
460 
461     /* get pre-calculated rankVal */
462     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
463 
464     /* fill skipped values */
465     if (minWeight>1) {
466         U32 i, skipSize = rankVal[minWeight];
467         MEM_writeLE16(&(DElt.sequence), baseSeq);
468         DElt.nbBits   = (BYTE)(consumed);
469         DElt.length   = 1;
470         for (i = 0; i < skipSize; i++)
471             DTable[i] = DElt;
472     }
473 
474     /* fill DTable */
475     {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
476             const U32 symbol = sortedSymbols[s].symbol;
477             const U32 weight = sortedSymbols[s].weight;
478             const U32 nbBits = nbBitsBaseline - weight;
479             const U32 length = 1 << (sizeLog-nbBits);
480             const U32 start = rankVal[weight];
481             U32 i = start;
482             const U32 end = start + length;
483 
484             MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
485             DElt.nbBits = (BYTE)(nbBits + consumed);
486             DElt.length = 2;
487             do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
488 
489             rankVal[weight] += length;
490     }   }
491 }
492 
493 
494 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
495                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
496                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
497                            const U32 nbBitsBaseline)
498 {
499     U32 rankVal[HUF_TABLELOG_MAX + 1];
500     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
501     const U32 minBits  = nbBitsBaseline - maxWeight;
502     U32 s;
503 
504     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
505 
506     /* fill DTable */
507     for (s=0; s<sortedListSize; s++) {
508         const U16 symbol = sortedList[s].symbol;
509         const U32 weight = sortedList[s].weight;
510         const U32 nbBits = nbBitsBaseline - weight;
511         const U32 start = rankVal[weight];
512         const U32 length = 1 << (targetLog-nbBits);
513 
514         if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
515             U32 sortedRank;
516             int minWeight = nbBits + scaleLog;
517             if (minWeight < 1) minWeight = 1;
518             sortedRank = rankStart[minWeight];
519             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
520                            rankValOrigin[nbBits], minWeight,
521                            sortedList+sortedRank, sortedListSize-sortedRank,
522                            nbBitsBaseline, symbol);
523         } else {
524             HUF_DEltX2 DElt;
525             MEM_writeLE16(&(DElt.sequence), symbol);
526             DElt.nbBits = (BYTE)(nbBits);
527             DElt.length = 1;
528             {   U32 const end = start + length;
529                 U32 u;
530                 for (u = start; u < end; u++) DTable[u] = DElt;
531         }   }
532         rankVal[weight] += length;
533     }
534 }
535 
536 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
537                        const void* src, size_t srcSize,
538                              void* workSpace, size_t wkspSize)
539 {
540     U32 tableLog, maxW, sizeOfSort, nbSymbols;
541     DTableDesc dtd = HUF_getDTableDesc(DTable);
542     U32 const maxTableLog = dtd.maxTableLog;
543     size_t iSize;
544     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
545     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
546     U32 *rankStart;
547 
548     rankValCol_t* rankVal;
549     U32* rankStats;
550     U32* rankStart0;
551     sortedSymbol_t* sortedSymbol;
552     BYTE* weightList;
553     size_t spaceUsed32 = 0;
554 
555     rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
556     spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
557     rankStats = (U32 *)workSpace + spaceUsed32;
558     spaceUsed32 += HUF_TABLELOG_MAX + 1;
559     rankStart0 = (U32 *)workSpace + spaceUsed32;
560     spaceUsed32 += HUF_TABLELOG_MAX + 2;
561     sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
562     spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
563     weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
564     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
565 
566     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
567 
568     rankStart = rankStart0 + 1;
569     memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
570 
571     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
572     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
573     /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
574 
575     iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
576     if (HUF_isError(iSize)) return iSize;
577 
578     /* check result */
579     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
580 
581     /* find maxWeight */
582     for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
583 
584     /* Get start index of each weight */
585     {   U32 w, nextRankStart = 0;
586         for (w=1; w<maxW+1; w++) {
587             U32 current = nextRankStart;
588             nextRankStart += rankStats[w];
589             rankStart[w] = current;
590         }
591         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
592         sizeOfSort = nextRankStart;
593     }
594 
595     /* sort symbols by weight */
596     {   U32 s;
597         for (s=0; s<nbSymbols; s++) {
598             U32 const w = weightList[s];
599             U32 const r = rankStart[w]++;
600             sortedSymbol[r].symbol = (BYTE)s;
601             sortedSymbol[r].weight = (BYTE)w;
602         }
603         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
604     }
605 
606     /* Build rankVal */
607     {   U32* const rankVal0 = rankVal[0];
608         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
609             U32 nextRankVal = 0;
610             U32 w;
611             for (w=1; w<maxW+1; w++) {
612                 U32 current = nextRankVal;
613                 nextRankVal += rankStats[w] << (w+rescale);
614                 rankVal0[w] = current;
615         }   }
616         {   U32 const minBits = tableLog+1 - maxW;
617             U32 consumed;
618             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
619                 U32* const rankValPtr = rankVal[consumed];
620                 U32 w;
621                 for (w = 1; w < maxW+1; w++) {
622                     rankValPtr[w] = rankVal0[w] >> consumed;
623     }   }   }   }
624 
625     HUF_fillDTableX2(dt, maxTableLog,
626                    sortedSymbol, sizeOfSort,
627                    rankStart0, rankVal, maxW,
628                    tableLog+1);
629 
630     dtd.tableLog = (BYTE)maxTableLog;
631     dtd.tableType = 1;
632     memcpy(DTable, &dtd, sizeof(dtd));
633     return iSize;
634 }
635 
636 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
637 {
638   U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
639   return HUF_readDTableX2_wksp(DTable, src, srcSize,
640                                workSpace, sizeof(workSpace));
641 }
642 
643 
644 FORCE_INLINE_TEMPLATE U32
645 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
646 {
647     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
648     memcpy(op, dt+val, 2);
649     BIT_skipBits(DStream, dt[val].nbBits);
650     return dt[val].length;
651 }
652 
653 FORCE_INLINE_TEMPLATE U32
654 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
655 {
656     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
657     memcpy(op, dt+val, 1);
658     if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
659     else {
660         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
661             BIT_skipBits(DStream, dt[val].nbBits);
662             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
663                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
664                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
665     }   }
666     return 1;
667 }
668 
669 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
670     ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
671 
672 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
673     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
674         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
675 
676 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
677     if (MEM_64bits()) \
678         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
679 
680 HINT_INLINE size_t
681 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
682                 const HUF_DEltX2* const dt, const U32 dtLog)
683 {
684     BYTE* const pStart = p;
685 
686     /* up to 8 symbols at a time */
687     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
688         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
689         HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
690         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
691         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
692     }
693 
694     /* closer to end : up to 2 symbols at a time */
695     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
696         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
697 
698     while (p <= pEnd-2)
699         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
700 
701     if (p < pEnd)
702         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
703 
704     return p-pStart;
705 }
706 
707 FORCE_INLINE_TEMPLATE size_t
708 HUF_decompress1X2_usingDTable_internal_body(
709           void* dst,  size_t dstSize,
710     const void* cSrc, size_t cSrcSize,
711     const HUF_DTable* DTable)
712 {
713     BIT_DStream_t bitD;
714 
715     /* Init */
716     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
717 
718     /* decode */
719     {   BYTE* const ostart = (BYTE*) dst;
720         BYTE* const oend = ostart + dstSize;
721         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
722         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
723         DTableDesc const dtd = HUF_getDTableDesc(DTable);
724         HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
725     }
726 
727     /* check */
728     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
729 
730     /* decoded size */
731     return dstSize;
732 }
733 
734 
735 FORCE_INLINE_TEMPLATE size_t
736 HUF_decompress4X2_usingDTable_internal_body(
737           void* dst,  size_t dstSize,
738     const void* cSrc, size_t cSrcSize,
739     const HUF_DTable* DTable)
740 {
741     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
742 
743     {   const BYTE* const istart = (const BYTE*) cSrc;
744         BYTE* const ostart = (BYTE*) dst;
745         BYTE* const oend = ostart + dstSize;
746         const void* const dtPtr = DTable+1;
747         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
748 
749         /* Init */
750         BIT_DStream_t bitD1;
751         BIT_DStream_t bitD2;
752         BIT_DStream_t bitD3;
753         BIT_DStream_t bitD4;
754         size_t const length1 = MEM_readLE16(istart);
755         size_t const length2 = MEM_readLE16(istart+2);
756         size_t const length3 = MEM_readLE16(istart+4);
757         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
758         const BYTE* const istart1 = istart + 6;  /* jumpTable */
759         const BYTE* const istart2 = istart1 + length1;
760         const BYTE* const istart3 = istart2 + length2;
761         const BYTE* const istart4 = istart3 + length3;
762         size_t const segmentSize = (dstSize+3) / 4;
763         BYTE* const opStart2 = ostart + segmentSize;
764         BYTE* const opStart3 = opStart2 + segmentSize;
765         BYTE* const opStart4 = opStart3 + segmentSize;
766         BYTE* op1 = ostart;
767         BYTE* op2 = opStart2;
768         BYTE* op3 = opStart3;
769         BYTE* op4 = opStart4;
770         U32 endSignal;
771         DTableDesc const dtd = HUF_getDTableDesc(DTable);
772         U32 const dtLog = dtd.tableLog;
773 
774         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
775         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
776         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
777         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
778         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
779 
780         /* 16-32 symbols per loop (4-8 symbols per stream) */
781         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
782         for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
783             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
784             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
785             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
786             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
787             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
788             HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
789             HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
790             HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
791             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
792             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
793             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
794             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
795             HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
796             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
797             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
798             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
799 
800             endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
801         }
802 
803         /* check corruption */
804         if (op1 > opStart2) return ERROR(corruption_detected);
805         if (op2 > opStart3) return ERROR(corruption_detected);
806         if (op3 > opStart4) return ERROR(corruption_detected);
807         /* note : op4 already verified within main loop */
808 
809         /* finish bitStreams one by one */
810         HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
811         HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
812         HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
813         HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
814 
815         /* check */
816         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
817           if (!endCheck) return ERROR(corruption_detected); }
818 
819         /* decoded size */
820         return dstSize;
821     }
822 }
823 
824 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
825 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
826 
827 size_t HUF_decompress1X2_usingDTable(
828           void* dst,  size_t dstSize,
829     const void* cSrc, size_t cSrcSize,
830     const HUF_DTable* DTable)
831 {
832     DTableDesc dtd = HUF_getDTableDesc(DTable);
833     if (dtd.tableType != 1) return ERROR(GENERIC);
834     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
835 }
836 
837 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
838                                    const void* cSrc, size_t cSrcSize,
839                                    void* workSpace, size_t wkspSize)
840 {
841     const BYTE* ip = (const BYTE*) cSrc;
842 
843     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
844                                                workSpace, wkspSize);
845     if (HUF_isError(hSize)) return hSize;
846     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
847     ip += hSize; cSrcSize -= hSize;
848 
849     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
850 }
851 
852 
853 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
854                               const void* cSrc, size_t cSrcSize)
855 {
856     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
857     return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
858                                        workSpace, sizeof(workSpace));
859 }
860 
861 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
862 {
863     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
864     return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
865 }
866 
867 size_t HUF_decompress4X2_usingDTable(
868           void* dst,  size_t dstSize,
869     const void* cSrc, size_t cSrcSize,
870     const HUF_DTable* DTable)
871 {
872     DTableDesc dtd = HUF_getDTableDesc(DTable);
873     if (dtd.tableType != 1) return ERROR(GENERIC);
874     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875 }
876 
877 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
878                                    const void* cSrc, size_t cSrcSize,
879                                    void* workSpace, size_t wkspSize, int bmi2)
880 {
881     const BYTE* ip = (const BYTE*) cSrc;
882 
883     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
884                                          workSpace, wkspSize);
885     if (HUF_isError(hSize)) return hSize;
886     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887     ip += hSize; cSrcSize -= hSize;
888 
889     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
890 }
891 
892 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
893                                    const void* cSrc, size_t cSrcSize,
894                                    void* workSpace, size_t wkspSize)
895 {
896     return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
897 }
898 
899 
900 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
901                               const void* cSrc, size_t cSrcSize)
902 {
903     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
904     return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
905                                        workSpace, sizeof(workSpace));
906 }
907 
908 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
909 {
910     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
911     return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912 }
913 
914 
915 /* ***********************************/
916 /* Universal decompression selectors */
917 /* ***********************************/
918 
919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
920                                     const void* cSrc, size_t cSrcSize,
921                                     const HUF_DTable* DTable)
922 {
923     DTableDesc const dtd = HUF_getDTableDesc(DTable);
924     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
926 }
927 
928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
929                                     const void* cSrc, size_t cSrcSize,
930                                     const HUF_DTable* DTable)
931 {
932     DTableDesc const dtd = HUF_getDTableDesc(DTable);
933     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
935 }
936 
937 
938 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
939 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
940 {
941     /* single, double, quad */
942     {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
943     {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
944     {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
945     {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
946     {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
947     {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
948     {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
949     {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
950     {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
951     {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
952     {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
953     {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
954     {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
955     {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
956     {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
957     {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
958 };
959 
960 /** HUF_selectDecoder() :
961  *  Tells which decoder is likely to decode faster,
962  *  based on a set of pre-computed metrics.
963  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
964  *  Assumption : 0 < dstSize <= 128 KB */
965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
966 {
967     assert(dstSize > 0);
968     assert(dstSize <= 128*1024);
969     /* decoder timing evaluation */
970     {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
971         U32 const D256 = (U32)(dstSize >> 8);
972         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
973         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
974         DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
975         return DTime1 < DTime0;
976 }   }
977 
978 
979 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
980 
981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982 {
983     static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
984 
985     /* validation checks */
986     if (dstSize == 0) return ERROR(dstSize_tooSmall);
987     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
988     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
989     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
990 
991     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
992         return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
993     }
994 }
995 
996 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
997 {
998     /* validation checks */
999     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1000     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1001     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1002     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1003 
1004     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1005         return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006                         HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1007     }
1008 }
1009 
1010 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1011 {
1012     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1013     return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1014                                          workSpace, sizeof(workSpace));
1015 }
1016 
1017 
1018 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1019                                      size_t dstSize, const void* cSrc,
1020                                      size_t cSrcSize, void* workSpace,
1021                                      size_t wkspSize)
1022 {
1023     /* validation checks */
1024     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1025     if (cSrcSize == 0) return ERROR(corruption_detected);
1026 
1027     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028         return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029                         HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1030     }
1031 }
1032 
1033 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1034                                   const void* cSrc, size_t cSrcSize,
1035                                   void* workSpace, size_t wkspSize)
1036 {
1037     /* validation checks */
1038     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1039     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1040     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1041     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1042 
1043     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1044         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045                                 cSrcSize, workSpace, wkspSize):
1046                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047                                 cSrcSize, workSpace, wkspSize);
1048     }
1049 }
1050 
1051 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1052                              const void* cSrc, size_t cSrcSize)
1053 {
1054     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1055     return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1056                                       workSpace, sizeof(workSpace));
1057 }
1058 
1059 
1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061 {
1062     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1063     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1065 }
1066 
1067 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068 {
1069     const BYTE* ip = (const BYTE*) cSrc;
1070 
1071     size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1072     if (HUF_isError(hSize)) return hSize;
1073     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1074     ip += hSize; cSrcSize -= hSize;
1075 
1076     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077 }
1078 
1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080 {
1081     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1082     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1084 }
1085 
1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1087 {
1088     /* validation checks */
1089     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1090     if (cSrcSize == 0) return ERROR(corruption_detected);
1091 
1092     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093         return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094                         HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1095     }
1096 }
1097