1 /* ******************************************************************
2 huff0 huffman decoder,
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34
35 /* **************************************************************
36 * Dependencies
37 ****************************************************************/
38 #include <string.h> /* memcpy, memset */
39 #include "compiler.h"
40 #include "bitstream.h" /* BIT_* */
41 #include "fse.h" /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h"
44 #include "error_private.h"
45
46 /* **************************************************************
47 * Macros
48 ****************************************************************/
49
50 /* These two optional macros force the use one way or another of the two
51 * Huffman decompression implementations. You can't force in both directions
52 * at the same time.
53 */
54 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
55 defined(HUF_FORCE_DECOMPRESS_X2)
56 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
57 #endif
58
59
60 /* **************************************************************
61 * Error Management
62 ****************************************************************/
63 #define HUF_isError ERR_isError
64 #ifndef CHECK_F
65 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
66 #endif
67
68
69 /* **************************************************************
70 * Byte alignment for workSpace management
71 ****************************************************************/
72 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
73 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
74
75
76 /* **************************************************************
77 * BMI2 Variant Wrappers
78 ****************************************************************/
79 #if DYNAMIC_BMI2
80
81 #define HUF_DGEN(fn) \
82 \
83 static size_t fn##_default( \
84 void* dst, size_t dstSize, \
85 const void* cSrc, size_t cSrcSize, \
86 const HUF_DTable* DTable) \
87 { \
88 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
89 } \
90 \
91 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
92 void* dst, size_t dstSize, \
93 const void* cSrc, size_t cSrcSize, \
94 const HUF_DTable* DTable) \
95 { \
96 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
97 } \
98 \
99 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
100 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
101 { \
102 if (bmi2) { \
103 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
104 } \
105 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
106 }
107
108 #else
109
110 #define HUF_DGEN(fn) \
111 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
112 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
113 { \
114 (void)bmi2; \
115 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
116 }
117
118 #endif
119
120
121 /*-***************************/
122 /* generic DTableDesc */
123 /*-***************************/
124 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
125
HUF_getDTableDesc(const HUF_DTable * table)126 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
127 {
128 DTableDesc dtd;
129 memcpy(&dtd, table, sizeof(dtd));
130 return dtd;
131 }
132
133
134 #ifndef HUF_FORCE_DECOMPRESS_X2
135
136 /*-***************************/
137 /* single-symbol decoding */
138 /*-***************************/
139 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
140
HUF_readDTableX1_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)141 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
142 {
143 U32 tableLog = 0;
144 U32 nbSymbols = 0;
145 size_t iSize;
146 void* const dtPtr = DTable + 1;
147 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
148
149 U32* rankVal;
150 BYTE* huffWeight;
151 size_t spaceUsed32 = 0;
152
153 rankVal = (U32 *)workSpace + spaceUsed32;
154 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
155 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
156 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
157
158 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
159
160 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
161 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
162
163 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
164 if (HUF_isError(iSize)) return iSize;
165
166 /* Table header */
167 { DTableDesc dtd = HUF_getDTableDesc(DTable);
168 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
169 dtd.tableType = 0;
170 dtd.tableLog = (BYTE)tableLog;
171 memcpy(DTable, &dtd, sizeof(dtd));
172 }
173
174 /* Calculate starting value for each rank */
175 { U32 n, nextRankStart = 0;
176 for (n=1; n<tableLog+1; n++) {
177 U32 const current = nextRankStart;
178 nextRankStart += (rankVal[n] << (n-1));
179 rankVal[n] = current;
180 } }
181
182 /* fill DTable */
183 { U32 n;
184 for (n=0; n<nbSymbols; n++) {
185 U32 const w = huffWeight[n];
186 U32 const length = (1 << w) >> 1;
187 U32 u;
188 HUF_DEltX1 D;
189 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
190 for (u = rankVal[w]; u < rankVal[w] + length; u++)
191 dt[u] = D;
192 rankVal[w] += length;
193 } }
194
195 return iSize;
196 }
197
HUF_readDTableX1(HUF_DTable * DTable,const void * src,size_t srcSize)198 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
199 {
200 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
201 return HUF_readDTableX1_wksp(DTable, src, srcSize,
202 workSpace, sizeof(workSpace));
203 }
204
205 FORCE_INLINE_TEMPLATE BYTE
HUF_decodeSymbolX1(BIT_DStream_t * Dstream,const HUF_DEltX1 * dt,const U32 dtLog)206 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
207 {
208 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
209 BYTE const c = dt[val].byte;
210 BIT_skipBits(Dstream, dt[val].nbBits);
211 return c;
212 }
213
214 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
215 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
216
217 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
218 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
219 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
220
221 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
222 if (MEM_64bits()) \
223 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
224
225 HINT_INLINE size_t
HUF_decodeStreamX1(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX1 * const dt,const U32 dtLog)226 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
227 {
228 BYTE* const pStart = p;
229
230 /* up to 4 symbols at a time */
231 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
232 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
233 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
234 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
235 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
236 }
237
238 /* [0-3] symbols remaining */
239 if (MEM_32bits())
240 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
241 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
242
243 /* no more data to retrieve from bitstream, no need to reload */
244 while (p < pEnd)
245 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
246
247 return pEnd-pStart;
248 }
249
250 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)251 HUF_decompress1X1_usingDTable_internal_body(
252 void* dst, size_t dstSize,
253 const void* cSrc, size_t cSrcSize,
254 const HUF_DTable* DTable)
255 {
256 BYTE* op = (BYTE*)dst;
257 BYTE* const oend = op + dstSize;
258 const void* dtPtr = DTable + 1;
259 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
260 BIT_DStream_t bitD;
261 DTableDesc const dtd = HUF_getDTableDesc(DTable);
262 U32 const dtLog = dtd.tableLog;
263
264 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
265
266 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
267
268 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
269
270 return dstSize;
271 }
272
273 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)274 HUF_decompress4X1_usingDTable_internal_body(
275 void* dst, size_t dstSize,
276 const void* cSrc, size_t cSrcSize,
277 const HUF_DTable* DTable)
278 {
279 /* Check */
280 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
281
282 { const BYTE* const istart = (const BYTE*) cSrc;
283 BYTE* const ostart = (BYTE*) dst;
284 BYTE* const oend = ostart + dstSize;
285 const void* const dtPtr = DTable + 1;
286 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
287
288 /* Init */
289 BIT_DStream_t bitD1;
290 BIT_DStream_t bitD2;
291 BIT_DStream_t bitD3;
292 BIT_DStream_t bitD4;
293 size_t const length1 = MEM_readLE16(istart);
294 size_t const length2 = MEM_readLE16(istart+2);
295 size_t const length3 = MEM_readLE16(istart+4);
296 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
297 const BYTE* const istart1 = istart + 6; /* jumpTable */
298 const BYTE* const istart2 = istart1 + length1;
299 const BYTE* const istart3 = istart2 + length2;
300 const BYTE* const istart4 = istart3 + length3;
301 const size_t segmentSize = (dstSize+3) / 4;
302 BYTE* const opStart2 = ostart + segmentSize;
303 BYTE* const opStart3 = opStart2 + segmentSize;
304 BYTE* const opStart4 = opStart3 + segmentSize;
305 BYTE* op1 = ostart;
306 BYTE* op2 = opStart2;
307 BYTE* op3 = opStart3;
308 BYTE* op4 = opStart4;
309 U32 endSignal = BIT_DStream_unfinished;
310 DTableDesc const dtd = HUF_getDTableDesc(DTable);
311 U32 const dtLog = dtd.tableLog;
312
313 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
314 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
315 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
316 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
317 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
318
319 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
320 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
321 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
322 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
323 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
324 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
325 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
326 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
327 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
328 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
329 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
330 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
331 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
332 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
333 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
334 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
335 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
336 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
337 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
338 BIT_reloadDStream(&bitD1);
339 BIT_reloadDStream(&bitD2);
340 BIT_reloadDStream(&bitD3);
341 BIT_reloadDStream(&bitD4);
342 }
343
344 /* check corruption */
345 /* note : should not be necessary : op# advance in lock step, and we control op4.
346 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
347 if (op1 > opStart2) return ERROR(corruption_detected);
348 if (op2 > opStart3) return ERROR(corruption_detected);
349 if (op3 > opStart4) return ERROR(corruption_detected);
350 /* note : op4 supposed already verified within main loop */
351
352 /* finish bitStreams one by one */
353 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
354 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
355 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
356 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
357
358 /* check */
359 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
360 if (!endCheck) return ERROR(corruption_detected); }
361
362 /* decoded size */
363 return dstSize;
364 }
365 }
366
367
368 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
369 const void *cSrc,
370 size_t cSrcSize,
371 const HUF_DTable *DTable);
372
373 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)374 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
375
376
377
378 size_t HUF_decompress1X1_usingDTable(
379 void* dst, size_t dstSize,
380 const void* cSrc, size_t cSrcSize,
381 const HUF_DTable* DTable)
382 {
383 DTableDesc dtd = HUF_getDTableDesc(DTable);
384 if (dtd.tableType != 0) return ERROR(GENERIC);
385 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
386 }
387
HUF_decompress1X1_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)388 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
389 const void* cSrc, size_t cSrcSize,
390 void* workSpace, size_t wkspSize)
391 {
392 const BYTE* ip = (const BYTE*) cSrc;
393
394 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
395 if (HUF_isError(hSize)) return hSize;
396 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
397 ip += hSize; cSrcSize -= hSize;
398
399 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
400 }
401
402
HUF_decompress1X1_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)403 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
404 const void* cSrc, size_t cSrcSize)
405 {
406 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
407 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
408 workSpace, sizeof(workSpace));
409 }
410
HUF_decompress1X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)411 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
412 {
413 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
414 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
415 }
416
HUF_decompress4X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)417 size_t HUF_decompress4X1_usingDTable(
418 void* dst, size_t dstSize,
419 const void* cSrc, size_t cSrcSize,
420 const HUF_DTable* DTable)
421 {
422 DTableDesc dtd = HUF_getDTableDesc(DTable);
423 if (dtd.tableType != 0) return ERROR(GENERIC);
424 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
425 }
426
HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)427 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
428 const void* cSrc, size_t cSrcSize,
429 void* workSpace, size_t wkspSize, int bmi2)
430 {
431 const BYTE* ip = (const BYTE*) cSrc;
432
433 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
434 workSpace, wkspSize);
435 if (HUF_isError(hSize)) return hSize;
436 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
437 ip += hSize; cSrcSize -= hSize;
438
439 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
440 }
441
HUF_decompress4X1_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)442 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
443 const void* cSrc, size_t cSrcSize,
444 void* workSpace, size_t wkspSize)
445 {
446 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
447 }
448
449
HUF_decompress4X1_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)450 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
451 {
452 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
453 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
454 workSpace, sizeof(workSpace));
455 }
HUF_decompress4X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)456 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
457 {
458 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
459 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
460 }
461
462 #endif /* HUF_FORCE_DECOMPRESS_X2 */
463
464
465 #ifndef HUF_FORCE_DECOMPRESS_X1
466
467 /* *************************/
468 /* double-symbols decoding */
469 /* *************************/
470
471 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
472 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
473 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
474 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
475
476
477 /* HUF_fillDTableX2Level2() :
478 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX2Level2(HUF_DEltX2 * DTable,U32 sizeLog,const U32 consumed,const U32 * rankValOrigin,const int minWeight,const sortedSymbol_t * sortedSymbols,const U32 sortedListSize,U32 nbBitsBaseline,U16 baseSeq)479 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
480 const U32* rankValOrigin, const int minWeight,
481 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
482 U32 nbBitsBaseline, U16 baseSeq)
483 {
484 HUF_DEltX2 DElt;
485 U32 rankVal[HUF_TABLELOG_MAX + 1];
486
487 /* get pre-calculated rankVal */
488 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
489
490 /* fill skipped values */
491 if (minWeight>1) {
492 U32 i, skipSize = rankVal[minWeight];
493 MEM_writeLE16(&(DElt.sequence), baseSeq);
494 DElt.nbBits = (BYTE)(consumed);
495 DElt.length = 1;
496 for (i = 0; i < skipSize; i++)
497 DTable[i] = DElt;
498 }
499
500 /* fill DTable */
501 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
502 const U32 symbol = sortedSymbols[s].symbol;
503 const U32 weight = sortedSymbols[s].weight;
504 const U32 nbBits = nbBitsBaseline - weight;
505 const U32 length = 1 << (sizeLog-nbBits);
506 const U32 start = rankVal[weight];
507 U32 i = start;
508 const U32 end = start + length;
509
510 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
511 DElt.nbBits = (BYTE)(nbBits + consumed);
512 DElt.length = 2;
513 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
514
515 rankVal[weight] += length;
516 } }
517 }
518
519
HUF_fillDTableX2(HUF_DEltX2 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 sortedListSize,const U32 * rankStart,rankVal_t rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)520 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
521 const sortedSymbol_t* sortedList, const U32 sortedListSize,
522 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
523 const U32 nbBitsBaseline)
524 {
525 U32 rankVal[HUF_TABLELOG_MAX + 1];
526 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
527 const U32 minBits = nbBitsBaseline - maxWeight;
528 U32 s;
529
530 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
531
532 /* fill DTable */
533 for (s=0; s<sortedListSize; s++) {
534 const U16 symbol = sortedList[s].symbol;
535 const U32 weight = sortedList[s].weight;
536 const U32 nbBits = nbBitsBaseline - weight;
537 const U32 start = rankVal[weight];
538 const U32 length = 1 << (targetLog-nbBits);
539
540 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
541 U32 sortedRank;
542 int minWeight = nbBits + scaleLog;
543 if (minWeight < 1) minWeight = 1;
544 sortedRank = rankStart[minWeight];
545 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
546 rankValOrigin[nbBits], minWeight,
547 sortedList+sortedRank, sortedListSize-sortedRank,
548 nbBitsBaseline, symbol);
549 } else {
550 HUF_DEltX2 DElt;
551 MEM_writeLE16(&(DElt.sequence), symbol);
552 DElt.nbBits = (BYTE)(nbBits);
553 DElt.length = 1;
554 { U32 const end = start + length;
555 U32 u;
556 for (u = start; u < end; u++) DTable[u] = DElt;
557 } }
558 rankVal[weight] += length;
559 }
560 }
561
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)562 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
563 const void* src, size_t srcSize,
564 void* workSpace, size_t wkspSize)
565 {
566 U32 tableLog, maxW, sizeOfSort, nbSymbols;
567 DTableDesc dtd = HUF_getDTableDesc(DTable);
568 U32 const maxTableLog = dtd.maxTableLog;
569 size_t iSize;
570 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
571 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
572 U32 *rankStart;
573
574 rankValCol_t* rankVal;
575 U32* rankStats;
576 U32* rankStart0;
577 sortedSymbol_t* sortedSymbol;
578 BYTE* weightList;
579 size_t spaceUsed32 = 0;
580
581 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
582 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
583 rankStats = (U32 *)workSpace + spaceUsed32;
584 spaceUsed32 += HUF_TABLELOG_MAX + 1;
585 rankStart0 = (U32 *)workSpace + spaceUsed32;
586 spaceUsed32 += HUF_TABLELOG_MAX + 2;
587 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
588 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
589 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
590 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
591
592 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
593
594 rankStart = rankStart0 + 1;
595 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
596
597 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
598 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
599 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
600
601 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
602 if (HUF_isError(iSize)) return iSize;
603
604 /* check result */
605 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
606
607 /* find maxWeight */
608 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
609
610 /* Get start index of each weight */
611 { U32 w, nextRankStart = 0;
612 for (w=1; w<maxW+1; w++) {
613 U32 current = nextRankStart;
614 nextRankStart += rankStats[w];
615 rankStart[w] = current;
616 }
617 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
618 sizeOfSort = nextRankStart;
619 }
620
621 /* sort symbols by weight */
622 { U32 s;
623 for (s=0; s<nbSymbols; s++) {
624 U32 const w = weightList[s];
625 U32 const r = rankStart[w]++;
626 sortedSymbol[r].symbol = (BYTE)s;
627 sortedSymbol[r].weight = (BYTE)w;
628 }
629 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
630 }
631
632 /* Build rankVal */
633 { U32* const rankVal0 = rankVal[0];
634 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
635 U32 nextRankVal = 0;
636 U32 w;
637 for (w=1; w<maxW+1; w++) {
638 U32 current = nextRankVal;
639 nextRankVal += rankStats[w] << (w+rescale);
640 rankVal0[w] = current;
641 } }
642 { U32 const minBits = tableLog+1 - maxW;
643 U32 consumed;
644 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
645 U32* const rankValPtr = rankVal[consumed];
646 U32 w;
647 for (w = 1; w < maxW+1; w++) {
648 rankValPtr[w] = rankVal0[w] >> consumed;
649 } } } }
650
651 HUF_fillDTableX2(dt, maxTableLog,
652 sortedSymbol, sizeOfSort,
653 rankStart0, rankVal, maxW,
654 tableLog+1);
655
656 dtd.tableLog = (BYTE)maxTableLog;
657 dtd.tableType = 1;
658 memcpy(DTable, &dtd, sizeof(dtd));
659 return iSize;
660 }
661
HUF_readDTableX2(HUF_DTable * DTable,const void * src,size_t srcSize)662 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
663 {
664 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
665 return HUF_readDTableX2_wksp(DTable, src, srcSize,
666 workSpace, sizeof(workSpace));
667 }
668
669
670 FORCE_INLINE_TEMPLATE U32
HUF_decodeSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)671 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
672 {
673 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
674 memcpy(op, dt+val, 2);
675 BIT_skipBits(DStream, dt[val].nbBits);
676 return dt[val].length;
677 }
678
679 FORCE_INLINE_TEMPLATE U32
HUF_decodeLastSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)680 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
681 {
682 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
683 memcpy(op, dt+val, 1);
684 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
685 else {
686 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
687 BIT_skipBits(DStream, dt[val].nbBits);
688 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
689 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
690 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
691 } }
692 return 1;
693 }
694
695 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
696 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
697
698 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
699 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
700 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
701
702 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
703 if (MEM_64bits()) \
704 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
705
706 HINT_INLINE size_t
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)707 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
708 const HUF_DEltX2* const dt, const U32 dtLog)
709 {
710 BYTE* const pStart = p;
711
712 /* up to 8 symbols at a time */
713 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
714 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
715 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
716 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
717 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
718 }
719
720 /* closer to end : up to 2 symbols at a time */
721 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
722 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
723
724 while (p <= pEnd-2)
725 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
726
727 if (p < pEnd)
728 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
729
730 return p-pStart;
731 }
732
733 FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)734 HUF_decompress1X2_usingDTable_internal_body(
735 void* dst, size_t dstSize,
736 const void* cSrc, size_t cSrcSize,
737 const HUF_DTable* DTable)
738 {
739 BIT_DStream_t bitD;
740
741 /* Init */
742 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
743
744 /* decode */
745 { BYTE* const ostart = (BYTE*) dst;
746 BYTE* const oend = ostart + dstSize;
747 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
748 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
749 DTableDesc const dtd = HUF_getDTableDesc(DTable);
750 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
751 }
752
753 /* check */
754 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
755
756 /* decoded size */
757 return dstSize;
758 }
759
760
761 FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)762 HUF_decompress4X2_usingDTable_internal_body(
763 void* dst, size_t dstSize,
764 const void* cSrc, size_t cSrcSize,
765 const HUF_DTable* DTable)
766 {
767 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
768
769 { const BYTE* const istart = (const BYTE*) cSrc;
770 BYTE* const ostart = (BYTE*) dst;
771 BYTE* const oend = ostart + dstSize;
772 const void* const dtPtr = DTable+1;
773 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
774
775 /* Init */
776 BIT_DStream_t bitD1;
777 BIT_DStream_t bitD2;
778 BIT_DStream_t bitD3;
779 BIT_DStream_t bitD4;
780 size_t const length1 = MEM_readLE16(istart);
781 size_t const length2 = MEM_readLE16(istart+2);
782 size_t const length3 = MEM_readLE16(istart+4);
783 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
784 const BYTE* const istart1 = istart + 6; /* jumpTable */
785 const BYTE* const istart2 = istart1 + length1;
786 const BYTE* const istart3 = istart2 + length2;
787 const BYTE* const istart4 = istart3 + length3;
788 size_t const segmentSize = (dstSize+3) / 4;
789 BYTE* const opStart2 = ostart + segmentSize;
790 BYTE* const opStart3 = opStart2 + segmentSize;
791 BYTE* const opStart4 = opStart3 + segmentSize;
792 BYTE* op1 = ostart;
793 BYTE* op2 = opStart2;
794 BYTE* op3 = opStart3;
795 BYTE* op4 = opStart4;
796 U32 endSignal;
797 DTableDesc const dtd = HUF_getDTableDesc(DTable);
798 U32 const dtLog = dtd.tableLog;
799
800 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
801 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
802 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
803 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
804 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
805
806 /* 16-32 symbols per loop (4-8 symbols per stream) */
807 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
808 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
809 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
810 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
811 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
812 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
813 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
814 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
815 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
816 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
817 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
818 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
819 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
820 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
821 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
822 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
823 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
824 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
825
826 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
827 }
828
829 /* check corruption */
830 if (op1 > opStart2) return ERROR(corruption_detected);
831 if (op2 > opStart3) return ERROR(corruption_detected);
832 if (op3 > opStart4) return ERROR(corruption_detected);
833 /* note : op4 already verified within main loop */
834
835 /* finish bitStreams one by one */
836 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
837 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
838 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
839 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
840
841 /* check */
842 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
843 if (!endCheck) return ERROR(corruption_detected); }
844
845 /* decoded size */
846 return dstSize;
847 }
848 }
849
850 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
HUF_DGEN(HUF_decompress4X2_usingDTable_internal)851 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
852
853 size_t HUF_decompress1X2_usingDTable(
854 void* dst, size_t dstSize,
855 const void* cSrc, size_t cSrcSize,
856 const HUF_DTable* DTable)
857 {
858 DTableDesc dtd = HUF_getDTableDesc(DTable);
859 if (dtd.tableType != 1) return ERROR(GENERIC);
860 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
861 }
862
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)863 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
864 const void* cSrc, size_t cSrcSize,
865 void* workSpace, size_t wkspSize)
866 {
867 const BYTE* ip = (const BYTE*) cSrc;
868
869 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
870 workSpace, wkspSize);
871 if (HUF_isError(hSize)) return hSize;
872 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
873 ip += hSize; cSrcSize -= hSize;
874
875 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
876 }
877
878
HUF_decompress1X2_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)879 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
880 const void* cSrc, size_t cSrcSize)
881 {
882 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
883 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
884 workSpace, sizeof(workSpace));
885 }
886
HUF_decompress1X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)887 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
888 {
889 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
890 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
891 }
892
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)893 size_t HUF_decompress4X2_usingDTable(
894 void* dst, size_t dstSize,
895 const void* cSrc, size_t cSrcSize,
896 const HUF_DTable* DTable)
897 {
898 DTableDesc dtd = HUF_getDTableDesc(DTable);
899 if (dtd.tableType != 1) return ERROR(GENERIC);
900 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
901 }
902
HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)903 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
904 const void* cSrc, size_t cSrcSize,
905 void* workSpace, size_t wkspSize, int bmi2)
906 {
907 const BYTE* ip = (const BYTE*) cSrc;
908
909 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
910 workSpace, wkspSize);
911 if (HUF_isError(hSize)) return hSize;
912 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
913 ip += hSize; cSrcSize -= hSize;
914
915 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
916 }
917
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)918 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
919 const void* cSrc, size_t cSrcSize,
920 void* workSpace, size_t wkspSize)
921 {
922 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
923 }
924
925
HUF_decompress4X2_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)926 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
927 const void* cSrc, size_t cSrcSize)
928 {
929 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
930 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
931 workSpace, sizeof(workSpace));
932 }
933
HUF_decompress4X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)934 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
935 {
936 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
937 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
938 }
939
940 #endif /* HUF_FORCE_DECOMPRESS_X1 */
941
942
943 /* ***********************************/
944 /* Universal decompression selectors */
945 /* ***********************************/
946
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)947 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
948 const void* cSrc, size_t cSrcSize,
949 const HUF_DTable* DTable)
950 {
951 DTableDesc const dtd = HUF_getDTableDesc(DTable);
952 #if defined(HUF_FORCE_DECOMPRESS_X1)
953 (void)dtd;
954 assert(dtd.tableType == 0);
955 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
956 #elif defined(HUF_FORCE_DECOMPRESS_X2)
957 (void)dtd;
958 assert(dtd.tableType == 1);
959 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
960 #else
961 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
962 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
963 #endif
964 }
965
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)966 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
967 const void* cSrc, size_t cSrcSize,
968 const HUF_DTable* DTable)
969 {
970 DTableDesc const dtd = HUF_getDTableDesc(DTable);
971 #if defined(HUF_FORCE_DECOMPRESS_X1)
972 (void)dtd;
973 assert(dtd.tableType == 0);
974 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
975 #elif defined(HUF_FORCE_DECOMPRESS_X2)
976 (void)dtd;
977 assert(dtd.tableType == 1);
978 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
979 #else
980 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
981 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
982 #endif
983 }
984
985
986 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
987 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
988 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
989 {
990 /* single, double, quad */
991 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
992 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
993 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
994 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
995 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
996 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
997 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
998 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
999 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1000 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1001 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1002 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1003 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1004 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1005 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1006 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1007 };
1008 #endif
1009
1010 /** HUF_selectDecoder() :
1011 * Tells which decoder is likely to decode faster,
1012 * based on a set of pre-computed metrics.
1013 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1014 * Assumption : 0 < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)1015 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1016 {
1017 assert(dstSize > 0);
1018 assert(dstSize <= 128*1024);
1019 #if defined(HUF_FORCE_DECOMPRESS_X1)
1020 (void)dstSize;
1021 (void)cSrcSize;
1022 return 0;
1023 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1024 (void)dstSize;
1025 (void)cSrcSize;
1026 return 1;
1027 #else
1028 /* decoder timing evaluation */
1029 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1030 U32 const D256 = (U32)(dstSize >> 8);
1031 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1032 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1033 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1034 return DTime1 < DTime0;
1035 }
1036 #endif
1037 }
1038
1039
1040 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1041
HUF_decompress(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1042 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1043 {
1044 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1045 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1046 #endif
1047
1048 /* validation checks */
1049 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1050 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1051 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1052 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1053
1054 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1055 #if defined(HUF_FORCE_DECOMPRESS_X1)
1056 (void)algoNb;
1057 assert(algoNb == 0);
1058 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1059 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1060 (void)algoNb;
1061 assert(algoNb == 1);
1062 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1063 #else
1064 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1065 #endif
1066 }
1067 }
1068
HUF_decompress4X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1069 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1070 {
1071 /* validation checks */
1072 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1073 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1074 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1075 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1076
1077 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1078 #if defined(HUF_FORCE_DECOMPRESS_X1)
1079 (void)algoNb;
1080 assert(algoNb == 0);
1081 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1082 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1083 (void)algoNb;
1084 assert(algoNb == 1);
1085 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1086 #else
1087 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1088 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1089 #endif
1090 }
1091 }
1092
HUF_decompress4X_hufOnly(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1093 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1094 {
1095 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1096 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1097 workSpace, sizeof(workSpace));
1098 }
1099
1100
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1101 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1102 size_t dstSize, const void* cSrc,
1103 size_t cSrcSize, void* workSpace,
1104 size_t wkspSize)
1105 {
1106 /* validation checks */
1107 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1108 if (cSrcSize == 0) return ERROR(corruption_detected);
1109
1110 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1111 #if defined(HUF_FORCE_DECOMPRESS_X1)
1112 (void)algoNb;
1113 assert(algoNb == 0);
1114 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1115 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1116 (void)algoNb;
1117 assert(algoNb == 1);
1118 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1119 #else
1120 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1121 cSrcSize, workSpace, wkspSize):
1122 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1123 #endif
1124 }
1125 }
1126
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)1127 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1128 const void* cSrc, size_t cSrcSize,
1129 void* workSpace, size_t wkspSize)
1130 {
1131 /* validation checks */
1132 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1133 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1134 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1135 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1136
1137 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1138 #if defined(HUF_FORCE_DECOMPRESS_X1)
1139 (void)algoNb;
1140 assert(algoNb == 0);
1141 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1142 cSrcSize, workSpace, wkspSize);
1143 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1144 (void)algoNb;
1145 assert(algoNb == 1);
1146 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1147 cSrcSize, workSpace, wkspSize);
1148 #else
1149 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1150 cSrcSize, workSpace, wkspSize):
1151 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1152 cSrcSize, workSpace, wkspSize);
1153 #endif
1154 }
1155 }
1156
HUF_decompress1X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1157 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1158 const void* cSrc, size_t cSrcSize)
1159 {
1160 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1161 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1162 workSpace, sizeof(workSpace));
1163 }
1164
1165
HUF_decompress1X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1166 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1167 {
1168 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1169 #if defined(HUF_FORCE_DECOMPRESS_X1)
1170 (void)dtd;
1171 assert(dtd.tableType == 0);
1172 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1173 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1174 (void)dtd;
1175 assert(dtd.tableType == 1);
1176 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1177 #else
1178 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1179 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1180 #endif
1181 }
1182
1183 #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1184 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1185 {
1186 const BYTE* ip = (const BYTE*) cSrc;
1187
1188 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1189 if (HUF_isError(hSize)) return hSize;
1190 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1191 ip += hSize; cSrcSize -= hSize;
1192
1193 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1194 }
1195 #endif
1196
HUF_decompress4X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)1197 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1198 {
1199 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1200 #if defined(HUF_FORCE_DECOMPRESS_X1)
1201 (void)dtd;
1202 assert(dtd.tableType == 0);
1203 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1204 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1205 (void)dtd;
1206 assert(dtd.tableType == 1);
1207 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1208 #else
1209 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1210 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1211 #endif
1212 }
1213
HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)1214 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1215 {
1216 /* validation checks */
1217 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1218 if (cSrcSize == 0) return ERROR(corruption_detected);
1219
1220 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1221 #if defined(HUF_FORCE_DECOMPRESS_X1)
1222 (void)algoNb;
1223 assert(algoNb == 0);
1224 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1225 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1226 (void)algoNb;
1227 assert(algoNb == 1);
1228 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1229 #else
1230 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1231 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1232 #endif
1233 }
1234 }
1235