1 /* ******************************************************************
2    huff0 huffman decoder,
3    part of Finite State Entropy library
4    Copyright (C) 2013-present, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31     You can contact the author at :
32     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Dependencies
37 ****************************************************************/
38 #include <string.h>     /* memcpy, memset */
39 #include "compiler.h"
40 #include "bitstream.h"  /* BIT_* */
41 #include "fse.h"        /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h"
44 #include "error_private.h"
45 #include <ntifs.h>
46 #include <ntddk.h>
47 
48 #define HUFD_ALLOC_TAG 0x64465548 // "HUFd"
49 
50 
51 /* **************************************************************
52 *  Error Management
53 ****************************************************************/
54 #define HUF_isError ERR_isError
55 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
56 
57 
58 /* **************************************************************
59 *  Byte alignment for workSpace management
60 ****************************************************************/
61 #define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
62 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
63 
64 
65 /*-***************************/
66 /*  generic DTableDesc       */
67 /*-***************************/
68 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
69 
70 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
71 {
72     DTableDesc dtd;
73     memcpy(&dtd, table, sizeof(dtd));
74     return dtd;
75 }
76 
77 
78 /*-***************************/
79 /*  single-symbol decoding   */
80 /*-***************************/
81 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
82 
83 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
84 {
85     U32 tableLog = 0;
86     U32 nbSymbols = 0;
87     size_t iSize;
88     void* const dtPtr = DTable + 1;
89     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
90 
91     U32* rankVal;
92     BYTE* huffWeight;
93     size_t spaceUsed32 = 0;
94 
95     rankVal = (U32 *)workSpace + spaceUsed32;
96     spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
97     huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
98     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
99 
100     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
101 
102     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
103     /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
104 
105     iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
106     if (HUF_isError(iSize)) return iSize;
107 
108     /* Table header */
109     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
110         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
111         dtd.tableType = 0;
112         dtd.tableLog = (BYTE)tableLog;
113         memcpy(DTable, &dtd, sizeof(dtd));
114     }
115 
116     /* Calculate starting value for each rank */
117     {   U32 n, nextRankStart = 0;
118         for (n=1; n<tableLog+1; n++) {
119             U32 const current = nextRankStart;
120             nextRankStart += (rankVal[n] << (n-1));
121             rankVal[n] = current;
122     }   }
123 
124     /* fill DTable */
125     {   U32 n;
126         for (n=0; n<nbSymbols; n++) {
127             U32 const w = huffWeight[n];
128             U32 const length = (1 << w) >> 1;
129             U32 u;
130             HUF_DEltX1 D;
131             D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
132             for (u = rankVal[w]; u < rankVal[w] + length; u++)
133                 dt[u] = D;
134             rankVal[w] += length;
135     }   }
136 
137     return iSize;
138 }
139 
140 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
141 {
142     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
143     return HUF_readDTableX1_wksp(DTable, src, srcSize,
144                                  workSpace, sizeof(workSpace));
145 }
146 
147 FORCE_INLINE_TEMPLATE BYTE
148 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
149 {
150     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
151     BYTE const c = dt[val].byte;
152     BIT_skipBits(Dstream, dt[val].nbBits);
153     return c;
154 }
155 
156 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
157     *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
158 
159 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
160     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
161         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
162 
163 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
164     if (MEM_64bits()) \
165         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
166 
167 HINT_INLINE size_t
168 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
169 {
170     BYTE* const pStart = p;
171 
172     /* up to 4 symbols at a time */
173     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
174         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
175         HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
176         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
177         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
178     }
179 
180     /* [0-3] symbols remaining */
181     if (MEM_32bits())
182         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
183             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
184 
185     /* no more data to retrieve from bitstream, no need to reload */
186     while (p < pEnd)
187         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
188 
189     return pEnd-pStart;
190 }
191 
192 FORCE_INLINE_TEMPLATE size_t
193 HUF_decompress1X1_usingDTable_internal_body(
194           void* dst,  size_t dstSize,
195     const void* cSrc, size_t cSrcSize,
196     const HUF_DTable* DTable)
197 {
198     BYTE* op = (BYTE*)dst;
199     BYTE* const oend = op + dstSize;
200     const void* dtPtr = DTable + 1;
201     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
202     BIT_DStream_t bitD;
203     DTableDesc const dtd = HUF_getDTableDesc(DTable);
204     U32 const dtLog = dtd.tableLog;
205 
206     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
207 
208     HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
209 
210     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
211 
212     return dstSize;
213 }
214 
215 FORCE_INLINE_TEMPLATE size_t
216 HUF_decompress4X1_usingDTable_internal_body(
217           void* dst,  size_t dstSize,
218     const void* cSrc, size_t cSrcSize,
219     const HUF_DTable* DTable)
220 {
221     /* Check */
222     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
223 
224     {   const BYTE* const istart = (const BYTE*) cSrc;
225         BYTE* const ostart = (BYTE*) dst;
226         BYTE* const oend = ostart + dstSize;
227         const void* const dtPtr = DTable + 1;
228         const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
229 
230         /* Init */
231         BIT_DStream_t bitD1;
232         BIT_DStream_t bitD2;
233         BIT_DStream_t bitD3;
234         BIT_DStream_t bitD4;
235         size_t const length1 = MEM_readLE16(istart);
236         size_t const length2 = MEM_readLE16(istart+2);
237         size_t const length3 = MEM_readLE16(istart+4);
238         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
239         const BYTE* const istart1 = istart + 6;  /* jumpTable */
240         const BYTE* const istart2 = istart1 + length1;
241         const BYTE* const istart3 = istart2 + length2;
242         const BYTE* const istart4 = istart3 + length3;
243         const size_t segmentSize = (dstSize+3) / 4;
244         BYTE* const opStart2 = ostart + segmentSize;
245         BYTE* const opStart3 = opStart2 + segmentSize;
246         BYTE* const opStart4 = opStart3 + segmentSize;
247         BYTE* op1 = ostart;
248         BYTE* op2 = opStart2;
249         BYTE* op3 = opStart3;
250         BYTE* op4 = opStart4;
251         U32 endSignal = BIT_DStream_unfinished;
252         DTableDesc const dtd = HUF_getDTableDesc(DTable);
253         U32 const dtLog = dtd.tableLog;
254 
255         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
256         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
257         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
258         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
259         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
260 
261         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
262         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
263         while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
264             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
265             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
266             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
267             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
268             HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
269             HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
270             HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
271             HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
272             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
273             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
274             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
275             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
276             HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
277             HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
278             HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
279             HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
280             BIT_reloadDStream(&bitD1);
281             BIT_reloadDStream(&bitD2);
282             BIT_reloadDStream(&bitD3);
283             BIT_reloadDStream(&bitD4);
284         }
285 
286         /* check corruption */
287         /* note : should not be necessary : op# advance in lock step, and we control op4.
288          *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
289         if (op1 > opStart2) return ERROR(corruption_detected);
290         if (op2 > opStart3) return ERROR(corruption_detected);
291         if (op3 > opStart4) return ERROR(corruption_detected);
292         /* note : op4 supposed already verified within main loop */
293 
294         /* finish bitStreams one by one */
295         HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
296         HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
297         HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
298         HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
299 
300         /* check */
301         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
302           if (!endCheck) return ERROR(corruption_detected); }
303 
304         /* decoded size */
305         return dstSize;
306     }
307 }
308 
309 
310 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
311                                                const void *cSrc,
312                                                size_t cSrcSize,
313                                                const HUF_DTable *DTable);
314 #if DYNAMIC_BMI2
315 
316 #define HUF_DGEN(fn)                                                               \
317                                                                             \
318     static size_t fn##_default(                                             \
319                   void* dst,  size_t dstSize,                               \
320             const void* cSrc, size_t cSrcSize,                              \
321             const HUF_DTable* DTable)                                       \
322     {                                                                       \
323         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
324     }                                                                       \
325                                                                             \
326     static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
327                   void* dst,  size_t dstSize,                               \
328             const void* cSrc, size_t cSrcSize,                              \
329             const HUF_DTable* DTable)                                       \
330     {                                                                       \
331         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
332     }                                                                       \
333                                                                             \
334     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
335                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
336     {                                                                       \
337         if (bmi2) {                                                         \
338             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
339         }                                                                   \
340         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
341     }
342 
343 #else
344 
345 #define HUF_DGEN(fn)                                                               \
346     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
347                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
348     {                                                                       \
349         (void)bmi2;                                                         \
350         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
351     }
352 
353 #endif
354 
355 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
356 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
357 
358 
359 
360 size_t HUF_decompress1X1_usingDTable(
361           void* dst,  size_t dstSize,
362     const void* cSrc, size_t cSrcSize,
363     const HUF_DTable* DTable)
364 {
365     DTableDesc dtd = HUF_getDTableDesc(DTable);
366     if (dtd.tableType != 0) return ERROR(GENERIC);
367     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
368 }
369 
370 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
371                                    const void* cSrc, size_t cSrcSize,
372                                    void* workSpace, size_t wkspSize)
373 {
374     const BYTE* ip = (const BYTE*) cSrc;
375 
376     size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
377     if (HUF_isError(hSize)) return hSize;
378     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
379     ip += hSize; cSrcSize -= hSize;
380 
381     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
382 }
383 
384 
385 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
386                               const void* cSrc, size_t cSrcSize)
387 {
388     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
389     return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
390                                        workSpace, sizeof(workSpace));
391 }
392 
393 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
394 {
395     HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1), HUFD_ALLOC_TAG);
396     size_t ret;
397 
398     if (!DTable)
399         return 0;
400 
401     RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1));
402 
403     DTable[0] = (U32)(HUF_TABLELOG_MAX-1) * 0x01000001;
404 
405     ret = HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
406 
407     ExFreePool(DTable);
408 
409     return ret;
410 }
411 
412 size_t HUF_decompress4X1_usingDTable(
413           void* dst,  size_t dstSize,
414     const void* cSrc, size_t cSrcSize,
415     const HUF_DTable* DTable)
416 {
417     DTableDesc dtd = HUF_getDTableDesc(DTable);
418     if (dtd.tableType != 0) return ERROR(GENERIC);
419     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
420 }
421 
422 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
423                                    const void* cSrc, size_t cSrcSize,
424                                    void* workSpace, size_t wkspSize, int bmi2)
425 {
426     const BYTE* ip = (const BYTE*) cSrc;
427 
428     size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
429                                                 workSpace, wkspSize);
430     if (HUF_isError(hSize)) return hSize;
431     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
432     ip += hSize; cSrcSize -= hSize;
433 
434     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
435 }
436 
437 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
438                                    const void* cSrc, size_t cSrcSize,
439                                    void* workSpace, size_t wkspSize)
440 {
441     return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
442 }
443 
444 
445 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
446 {
447     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
448     return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
449                                        workSpace, sizeof(workSpace));
450 }
451 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
452 {
453     HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1), HUFD_ALLOC_TAG);
454     size_t ret;
455 
456     if (!DTable)
457         return 0;
458 
459     RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1));
460 
461     DTable[0] = (U32)(HUF_TABLELOG_MAX-1) * 0x01000001;
462 
463     ret = HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
464 
465     ExFreePool(DTable);
466 
467     return ret;
468 }
469 
470 
471 /* *************************/
472 /* double-symbols decoding */
473 /* *************************/
474 
475 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
476 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
477 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
478 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
479 
480 
481 /* HUF_fillDTableX2Level2() :
482  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
483 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
484                            const U32* rankValOrigin, const int minWeight,
485                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
486                            U32 nbBitsBaseline, U16 baseSeq)
487 {
488     HUF_DEltX2 DElt;
489     U32 rankVal[HUF_TABLELOG_MAX + 1];
490 
491     /* get pre-calculated rankVal */
492     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
493 
494     /* fill skipped values */
495     if (minWeight>1) {
496         U32 i, skipSize = rankVal[minWeight];
497         MEM_writeLE16(&(DElt.sequence), baseSeq);
498         DElt.nbBits   = (BYTE)(consumed);
499         DElt.length   = 1;
500         for (i = 0; i < skipSize; i++)
501             DTable[i] = DElt;
502     }
503 
504     /* fill DTable */
505     {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
506             const U32 symbol = sortedSymbols[s].symbol;
507             const U32 weight = sortedSymbols[s].weight;
508             const U32 nbBits = nbBitsBaseline - weight;
509             const U32 length = 1 << (sizeLog-nbBits);
510             const U32 start = rankVal[weight];
511             U32 i = start;
512             const U32 end = start + length;
513 
514             MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
515             DElt.nbBits = (BYTE)(nbBits + consumed);
516             DElt.length = 2;
517             do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
518 
519             rankVal[weight] += length;
520     }   }
521 }
522 
523 
524 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
525                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
526                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
527                            const U32 nbBitsBaseline)
528 {
529     U32 rankVal[HUF_TABLELOG_MAX + 1];
530     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
531     const U32 minBits  = nbBitsBaseline - maxWeight;
532     U32 s;
533 
534     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
535 
536     /* fill DTable */
537     for (s=0; s<sortedListSize; s++) {
538         const U16 symbol = sortedList[s].symbol;
539         const U32 weight = sortedList[s].weight;
540         const U32 nbBits = nbBitsBaseline - weight;
541         const U32 start = rankVal[weight];
542         const U32 length = 1 << (targetLog-nbBits);
543 
544         if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
545             U32 sortedRank;
546             int minWeight = nbBits + scaleLog;
547             if (minWeight < 1) minWeight = 1;
548             sortedRank = rankStart[minWeight];
549             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
550                            rankValOrigin[nbBits], minWeight,
551                            sortedList+sortedRank, sortedListSize-sortedRank,
552                            nbBitsBaseline, symbol);
553         } else {
554             HUF_DEltX2 DElt;
555             MEM_writeLE16(&(DElt.sequence), symbol);
556             DElt.nbBits = (BYTE)(nbBits);
557             DElt.length = 1;
558             {   U32 const end = start + length;
559                 U32 u;
560                 for (u = start; u < end; u++) DTable[u] = DElt;
561         }   }
562         rankVal[weight] += length;
563     }
564 }
565 
566 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
567                        const void* src, size_t srcSize,
568                              void* workSpace, size_t wkspSize)
569 {
570     U32 tableLog, maxW, sizeOfSort, nbSymbols;
571     DTableDesc dtd = HUF_getDTableDesc(DTable);
572     U32 const maxTableLog = dtd.maxTableLog;
573     size_t iSize;
574     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
575     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
576     U32 *rankStart;
577 
578     rankValCol_t* rankVal;
579     U32* rankStats;
580     U32* rankStart0;
581     sortedSymbol_t* sortedSymbol;
582     BYTE* weightList;
583     size_t spaceUsed32 = 0;
584 
585     rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
586     spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
587     rankStats = (U32 *)workSpace + spaceUsed32;
588     spaceUsed32 += HUF_TABLELOG_MAX + 1;
589     rankStart0 = (U32 *)workSpace + spaceUsed32;
590     spaceUsed32 += HUF_TABLELOG_MAX + 2;
591     sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
592     spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
593     weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
594     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
595 
596     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
597 
598     rankStart = rankStart0 + 1;
599     memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
600 
601     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
602     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
603     /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
604 
605     iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
606     if (HUF_isError(iSize)) return iSize;
607 
608     /* check result */
609     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
610 
611     /* find maxWeight */
612     for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
613 
614     /* Get start index of each weight */
615     {   U32 w, nextRankStart = 0;
616         for (w=1; w<maxW+1; w++) {
617             U32 current = nextRankStart;
618             nextRankStart += rankStats[w];
619             rankStart[w] = current;
620         }
621         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
622         sizeOfSort = nextRankStart;
623     }
624 
625     /* sort symbols by weight */
626     {   U32 s;
627         for (s=0; s<nbSymbols; s++) {
628             U32 const w = weightList[s];
629             U32 const r = rankStart[w]++;
630             sortedSymbol[r].symbol = (BYTE)s;
631             sortedSymbol[r].weight = (BYTE)w;
632         }
633         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
634     }
635 
636     /* Build rankVal */
637     {   U32* const rankVal0 = rankVal[0];
638         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
639             U32 nextRankVal = 0;
640             U32 w;
641             for (w=1; w<maxW+1; w++) {
642                 U32 current = nextRankVal;
643                 nextRankVal += rankStats[w] << (w+rescale);
644                 rankVal0[w] = current;
645         }   }
646         {   U32 const minBits = tableLog+1 - maxW;
647             U32 consumed;
648             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
649                 U32* const rankValPtr = rankVal[consumed];
650                 U32 w;
651                 for (w = 1; w < maxW+1; w++) {
652                     rankValPtr[w] = rankVal0[w] >> consumed;
653     }   }   }   }
654 
655     HUF_fillDTableX2(dt, maxTableLog,
656                    sortedSymbol, sizeOfSort,
657                    rankStart0, rankVal, maxW,
658                    tableLog+1);
659 
660     dtd.tableLog = (BYTE)maxTableLog;
661     dtd.tableType = 1;
662     memcpy(DTable, &dtd, sizeof(dtd));
663     return iSize;
664 }
665 
666 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
667 {
668   U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
669   return HUF_readDTableX2_wksp(DTable, src, srcSize,
670                                workSpace, sizeof(workSpace));
671 }
672 
673 
674 FORCE_INLINE_TEMPLATE U32
675 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
676 {
677     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
678     memcpy(op, dt+val, 2);
679     BIT_skipBits(DStream, dt[val].nbBits);
680     return dt[val].length;
681 }
682 
683 FORCE_INLINE_TEMPLATE U32
684 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
685 {
686     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
687     memcpy(op, dt+val, 1);
688     if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
689     else {
690         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
691             BIT_skipBits(DStream, dt[val].nbBits);
692             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
693                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
694                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
695     }   }
696     return 1;
697 }
698 
699 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
700     ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
701 
702 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
703     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
704         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
705 
706 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
707     if (MEM_64bits()) \
708         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
709 
710 HINT_INLINE size_t
711 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
712                 const HUF_DEltX2* const dt, const U32 dtLog)
713 {
714     BYTE* const pStart = p;
715 
716     /* up to 8 symbols at a time */
717     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
718         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
719         HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
720         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
721         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
722     }
723 
724     /* closer to end : up to 2 symbols at a time */
725     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
726         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
727 
728     while (p <= pEnd-2)
729         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
730 
731     if (p < pEnd)
732         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
733 
734     return p-pStart;
735 }
736 
737 FORCE_INLINE_TEMPLATE size_t
738 HUF_decompress1X2_usingDTable_internal_body(
739           void* dst,  size_t dstSize,
740     const void* cSrc, size_t cSrcSize,
741     const HUF_DTable* DTable)
742 {
743     BIT_DStream_t bitD;
744 
745     /* Init */
746     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
747 
748     /* decode */
749     {   BYTE* const ostart = (BYTE*) dst;
750         BYTE* const oend = ostart + dstSize;
751         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
752         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
753         DTableDesc const dtd = HUF_getDTableDesc(DTable);
754         HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
755     }
756 
757     /* check */
758     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
759 
760     /* decoded size */
761     return dstSize;
762 }
763 
764 
765 FORCE_INLINE_TEMPLATE size_t
766 HUF_decompress4X2_usingDTable_internal_body(
767           void* dst,  size_t dstSize,
768     const void* cSrc, size_t cSrcSize,
769     const HUF_DTable* DTable)
770 {
771     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
772 
773     {   const BYTE* const istart = (const BYTE*) cSrc;
774         BYTE* const ostart = (BYTE*) dst;
775         BYTE* const oend = ostart + dstSize;
776         const void* const dtPtr = DTable+1;
777         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
778 
779         /* Init */
780         BIT_DStream_t bitD1;
781         BIT_DStream_t bitD2;
782         BIT_DStream_t bitD3;
783         BIT_DStream_t bitD4;
784         size_t const length1 = MEM_readLE16(istart);
785         size_t const length2 = MEM_readLE16(istart+2);
786         size_t const length3 = MEM_readLE16(istart+4);
787         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
788         const BYTE* const istart1 = istart + 6;  /* jumpTable */
789         const BYTE* const istart2 = istart1 + length1;
790         const BYTE* const istart3 = istart2 + length2;
791         const BYTE* const istart4 = istart3 + length3;
792         size_t const segmentSize = (dstSize+3) / 4;
793         BYTE* const opStart2 = ostart + segmentSize;
794         BYTE* const opStart3 = opStart2 + segmentSize;
795         BYTE* const opStart4 = opStart3 + segmentSize;
796         BYTE* op1 = ostart;
797         BYTE* op2 = opStart2;
798         BYTE* op3 = opStart3;
799         BYTE* op4 = opStart4;
800         U32 endSignal;
801         DTableDesc const dtd = HUF_getDTableDesc(DTable);
802         U32 const dtLog = dtd.tableLog;
803 
804         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
805         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
806         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
807         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
808         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
809 
810         /* 16-32 symbols per loop (4-8 symbols per stream) */
811         endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
812         for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
813             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
814             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
815             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
816             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
817             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
818             HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
819             HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
820             HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
821             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
822             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
823             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
824             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
825             HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
826             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
827             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
828             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
829 
830             endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
831         }
832 
833         /* check corruption */
834         if (op1 > opStart2) return ERROR(corruption_detected);
835         if (op2 > opStart3) return ERROR(corruption_detected);
836         if (op3 > opStart4) return ERROR(corruption_detected);
837         /* note : op4 already verified within main loop */
838 
839         /* finish bitStreams one by one */
840         HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
841         HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
842         HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
843         HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
844 
845         /* check */
846         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
847           if (!endCheck) return ERROR(corruption_detected); }
848 
849         /* decoded size */
850         return dstSize;
851     }
852 }
853 
854 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
855 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
856 
857 size_t HUF_decompress1X2_usingDTable(
858           void* dst,  size_t dstSize,
859     const void* cSrc, size_t cSrcSize,
860     const HUF_DTable* DTable)
861 {
862     DTableDesc dtd = HUF_getDTableDesc(DTable);
863     if (dtd.tableType != 1) return ERROR(GENERIC);
864     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
865 }
866 
867 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
868                                    const void* cSrc, size_t cSrcSize,
869                                    void* workSpace, size_t wkspSize)
870 {
871     const BYTE* ip = (const BYTE*) cSrc;
872 
873     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
874                                                workSpace, wkspSize);
875     if (HUF_isError(hSize)) return hSize;
876     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
877     ip += hSize; cSrcSize -= hSize;
878 
879     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
880 }
881 
882 
883 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
884                               const void* cSrc, size_t cSrcSize)
885 {
886     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
887     return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
888                                        workSpace, sizeof(workSpace));
889 }
890 
891 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
892 {
893     HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX), HUFD_ALLOC_TAG);
894     size_t ret;
895 
896     if (!DTable)
897         return 0;
898 
899     RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX));
900 
901     DTable[0] = (U32)(HUF_TABLELOG_MAX) * 0x01000001;
902 
903     ret = HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
904 
905     ExFreePool(DTable);
906 
907     return ret;
908 }
909 
910 size_t HUF_decompress4X2_usingDTable(
911           void* dst,  size_t dstSize,
912     const void* cSrc, size_t cSrcSize,
913     const HUF_DTable* DTable)
914 {
915     DTableDesc dtd = HUF_getDTableDesc(DTable);
916     if (dtd.tableType != 1) return ERROR(GENERIC);
917     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
918 }
919 
920 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
921                                    const void* cSrc, size_t cSrcSize,
922                                    void* workSpace, size_t wkspSize, int bmi2)
923 {
924     const BYTE* ip = (const BYTE*) cSrc;
925 
926     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
927                                          workSpace, wkspSize);
928     if (HUF_isError(hSize)) return hSize;
929     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
930     ip += hSize; cSrcSize -= hSize;
931 
932     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
933 }
934 
935 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
936                                    const void* cSrc, size_t cSrcSize,
937                                    void* workSpace, size_t wkspSize)
938 {
939     return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
940 }
941 
942 
943 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
944                               const void* cSrc, size_t cSrcSize)
945 {
946     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
947     return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
948                                        workSpace, sizeof(workSpace));
949 }
950 
951 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
952 {
953     HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX), HUFD_ALLOC_TAG);
954     size_t ret;
955 
956     if (!DTable)
957         return 0;
958 
959     RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX));
960 
961     DTable[0] = (U32)(HUF_TABLELOG_MAX) * 0x01000001;
962 
963     ret = HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
964 
965     ExFreePool(DTable);
966 
967     return ret;
968 }
969 
970 
971 /* ***********************************/
972 /* Universal decompression selectors */
973 /* ***********************************/
974 
975 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
976                                     const void* cSrc, size_t cSrcSize,
977                                     const HUF_DTable* DTable)
978 {
979     DTableDesc const dtd = HUF_getDTableDesc(DTable);
980     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
981                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
982 }
983 
984 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
985                                     const void* cSrc, size_t cSrcSize,
986                                     const HUF_DTable* DTable)
987 {
988     DTableDesc const dtd = HUF_getDTableDesc(DTable);
989     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
990                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
991 }
992 
993 
994 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
995 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
996 {
997     /* single, double, quad */
998     {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
999     {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
1000     {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
1001     {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
1002     {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
1003     {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
1004     {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
1005     {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
1006     {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
1007     {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
1008     {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
1009     {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
1010     {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
1011     {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
1012     {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
1013     {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
1014 };
1015 
1016 /** HUF_selectDecoder() :
1017  *  Tells which decoder is likely to decode faster,
1018  *  based on a set of pre-computed metrics.
1019  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1020  *  Assumption : 0 < dstSize <= 128 KB */
1021 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1022 {
1023     assert(dstSize > 0);
1024     assert(dstSize <= 128*1024);
1025     /* decoder timing evaluation */
1026     {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
1027         U32 const D256 = (U32)(dstSize >> 8);
1028         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1029         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1030         DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
1031         return DTime1 < DTime0;
1032 }   }
1033 
1034 
1035 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1036 
1037 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1038 {
1039     static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1040 
1041     /* validation checks */
1042     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1043     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1044     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1045     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1046 
1047     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1048         return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1049     }
1050 }
1051 
1052 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1053 {
1054     /* validation checks */
1055     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1056     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1057     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1058     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1059 
1060     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1061         return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1062                         HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1063     }
1064 }
1065 
1066 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1067 {
1068     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1069     return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1070                                          workSpace, sizeof(workSpace));
1071 }
1072 
1073 
1074 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1075                                      size_t dstSize, const void* cSrc,
1076                                      size_t cSrcSize, void* workSpace,
1077                                      size_t wkspSize)
1078 {
1079     /* validation checks */
1080     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1081     if (cSrcSize == 0) return ERROR(corruption_detected);
1082 
1083     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1084         return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1085                         HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1086     }
1087 }
1088 
1089 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1090                                   const void* cSrc, size_t cSrcSize,
1091                                   void* workSpace, size_t wkspSize)
1092 {
1093     /* validation checks */
1094     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1095     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
1096     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
1097     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
1098 
1099     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1100         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1101                                 cSrcSize, workSpace, wkspSize):
1102                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1103                                 cSrcSize, workSpace, wkspSize);
1104     }
1105 }
1106 
1107 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1108                              const void* cSrc, size_t cSrcSize)
1109 {
1110     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1111     return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1112                                       workSpace, sizeof(workSpace));
1113 }
1114 
1115 
1116 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1117 {
1118     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1119     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1120                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1121 }
1122 
1123 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1124 {
1125     const BYTE* ip = (const BYTE*) cSrc;
1126 
1127     size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1128     if (HUF_isError(hSize)) return hSize;
1129     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1130     ip += hSize; cSrcSize -= hSize;
1131 
1132     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1133 }
1134 
1135 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1136 {
1137     DTableDesc const dtd = HUF_getDTableDesc(DTable);
1138     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1139                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1140 }
1141 
1142 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1143 {
1144     /* validation checks */
1145     if (dstSize == 0) return ERROR(dstSize_tooSmall);
1146     if (cSrcSize == 0) return ERROR(corruption_detected);
1147 
1148     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1149         return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1150                         HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1151     }
1152 }
1153