1 /* ****************************************************************** 2 huff0 huffman decoder, 3 part of Finite State Entropy library 4 Copyright (C) 2013-present, Yann Collet. 5 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions are 10 met: 11 12 * Redistributions of source code must retain the above copyright 13 notice, this list of conditions and the following disclaimer. 14 * Redistributions in binary form must reproduce the above 15 copyright notice, this list of conditions and the following disclaimer 16 in the documentation and/or other materials provided with the 17 distribution. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 You can contact the author at : 32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 33 ****************************************************************** */ 34 35 /* ************************************************************** 36 * Dependencies 37 ****************************************************************/ 38 #include <string.h> /* memcpy, memset */ 39 #include "compiler.h" 40 #include "bitstream.h" /* BIT_* */ 41 #include "fse.h" /* to compress headers */ 42 #define HUF_STATIC_LINKING_ONLY 43 #include "huf.h" 44 #include "error_private.h" 45 #include <ntifs.h> 46 #include <ntddk.h> 47 48 #define HUFD_ALLOC_TAG 0x64465548 // "HUFd" 49 50 51 /* ************************************************************** 52 * Error Management 53 ****************************************************************/ 54 #define HUF_isError ERR_isError 55 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } 56 57 58 /* ************************************************************** 59 * Byte alignment for workSpace management 60 ****************************************************************/ 61 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) 62 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) 63 64 65 /*-***************************/ 66 /* generic DTableDesc */ 67 /*-***************************/ 68 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; 69 70 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) 71 { 72 DTableDesc dtd; 73 memcpy(&dtd, table, sizeof(dtd)); 74 return dtd; 75 } 76 77 78 /*-***************************/ 79 /* single-symbol decoding */ 80 /*-***************************/ 81 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ 82 83 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 84 { 85 U32 tableLog = 0; 86 U32 nbSymbols = 0; 87 size_t iSize; 88 void* const dtPtr = DTable + 1; 89 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; 90 91 U32* rankVal; 92 BYTE* huffWeight; 93 size_t spaceUsed32 = 0; 94 95 rankVal = (U32 *)workSpace + spaceUsed32; 96 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; 97 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); 98 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 99 100 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 101 102 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 103 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 104 105 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); 106 if (HUF_isError(iSize)) return iSize; 107 108 /* Table header */ 109 { DTableDesc dtd = HUF_getDTableDesc(DTable); 110 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ 111 dtd.tableType = 0; 112 dtd.tableLog = (BYTE)tableLog; 113 memcpy(DTable, &dtd, sizeof(dtd)); 114 } 115 116 /* Calculate starting value for each rank */ 117 { U32 n, nextRankStart = 0; 118 for (n=1; n<tableLog+1; n++) { 119 U32 const current = nextRankStart; 120 nextRankStart += (rankVal[n] << (n-1)); 121 rankVal[n] = current; 122 } } 123 124 /* fill DTable */ 125 { U32 n; 126 for (n=0; n<nbSymbols; n++) { 127 U32 const w = huffWeight[n]; 128 U32 const length = (1 << w) >> 1; 129 U32 u; 130 HUF_DEltX1 D; 131 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); 132 for (u = rankVal[w]; u < rankVal[w] + length; u++) 133 dt[u] = D; 134 rankVal[w] += length; 135 } } 136 137 return iSize; 138 } 139 140 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) 141 { 142 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 143 return HUF_readDTableX1_wksp(DTable, src, srcSize, 144 workSpace, sizeof(workSpace)); 145 } 146 147 FORCE_INLINE_TEMPLATE BYTE 148 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) 149 { 150 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ 151 BYTE const c = dt[val].byte; 152 BIT_skipBits(Dstream, dt[val].nbBits); 153 return c; 154 } 155 156 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ 157 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) 158 159 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 160 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 161 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 162 163 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 164 if (MEM_64bits()) \ 165 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 166 167 HINT_INLINE size_t 168 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) 169 { 170 BYTE* const pStart = p; 171 172 /* up to 4 symbols at a time */ 173 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { 174 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 175 HUF_DECODE_SYMBOLX1_1(p, bitDPtr); 176 HUF_DECODE_SYMBOLX1_2(p, bitDPtr); 177 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 178 } 179 180 /* [0-3] symbols remaining */ 181 if (MEM_32bits()) 182 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) 183 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 184 185 /* no more data to retrieve from bitstream, no need to reload */ 186 while (p < pEnd) 187 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 188 189 return pEnd-pStart; 190 } 191 192 FORCE_INLINE_TEMPLATE size_t 193 HUF_decompress1X1_usingDTable_internal_body( 194 void* dst, size_t dstSize, 195 const void* cSrc, size_t cSrcSize, 196 const HUF_DTable* DTable) 197 { 198 BYTE* op = (BYTE*)dst; 199 BYTE* const oend = op + dstSize; 200 const void* dtPtr = DTable + 1; 201 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 202 BIT_DStream_t bitD; 203 DTableDesc const dtd = HUF_getDTableDesc(DTable); 204 U32 const dtLog = dtd.tableLog; 205 206 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 207 208 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); 209 210 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 211 212 return dstSize; 213 } 214 215 FORCE_INLINE_TEMPLATE size_t 216 HUF_decompress4X1_usingDTable_internal_body( 217 void* dst, size_t dstSize, 218 const void* cSrc, size_t cSrcSize, 219 const HUF_DTable* DTable) 220 { 221 /* Check */ 222 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 223 224 { const BYTE* const istart = (const BYTE*) cSrc; 225 BYTE* const ostart = (BYTE*) dst; 226 BYTE* const oend = ostart + dstSize; 227 const void* const dtPtr = DTable + 1; 228 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 229 230 /* Init */ 231 BIT_DStream_t bitD1; 232 BIT_DStream_t bitD2; 233 BIT_DStream_t bitD3; 234 BIT_DStream_t bitD4; 235 size_t const length1 = MEM_readLE16(istart); 236 size_t const length2 = MEM_readLE16(istart+2); 237 size_t const length3 = MEM_readLE16(istart+4); 238 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 239 const BYTE* const istart1 = istart + 6; /* jumpTable */ 240 const BYTE* const istart2 = istart1 + length1; 241 const BYTE* const istart3 = istart2 + length2; 242 const BYTE* const istart4 = istart3 + length3; 243 const size_t segmentSize = (dstSize+3) / 4; 244 BYTE* const opStart2 = ostart + segmentSize; 245 BYTE* const opStart3 = opStart2 + segmentSize; 246 BYTE* const opStart4 = opStart3 + segmentSize; 247 BYTE* op1 = ostart; 248 BYTE* op2 = opStart2; 249 BYTE* op3 = opStart3; 250 BYTE* op4 = opStart4; 251 U32 endSignal = BIT_DStream_unfinished; 252 DTableDesc const dtd = HUF_getDTableDesc(DTable); 253 U32 const dtLog = dtd.tableLog; 254 255 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 256 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 257 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 258 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 259 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 260 261 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ 262 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 263 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { 264 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 265 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 266 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 267 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 268 HUF_DECODE_SYMBOLX1_1(op1, &bitD1); 269 HUF_DECODE_SYMBOLX1_1(op2, &bitD2); 270 HUF_DECODE_SYMBOLX1_1(op3, &bitD3); 271 HUF_DECODE_SYMBOLX1_1(op4, &bitD4); 272 HUF_DECODE_SYMBOLX1_2(op1, &bitD1); 273 HUF_DECODE_SYMBOLX1_2(op2, &bitD2); 274 HUF_DECODE_SYMBOLX1_2(op3, &bitD3); 275 HUF_DECODE_SYMBOLX1_2(op4, &bitD4); 276 HUF_DECODE_SYMBOLX1_0(op1, &bitD1); 277 HUF_DECODE_SYMBOLX1_0(op2, &bitD2); 278 HUF_DECODE_SYMBOLX1_0(op3, &bitD3); 279 HUF_DECODE_SYMBOLX1_0(op4, &bitD4); 280 BIT_reloadDStream(&bitD1); 281 BIT_reloadDStream(&bitD2); 282 BIT_reloadDStream(&bitD3); 283 BIT_reloadDStream(&bitD4); 284 } 285 286 /* check corruption */ 287 /* note : should not be necessary : op# advance in lock step, and we control op4. 288 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ 289 if (op1 > opStart2) return ERROR(corruption_detected); 290 if (op2 > opStart3) return ERROR(corruption_detected); 291 if (op3 > opStart4) return ERROR(corruption_detected); 292 /* note : op4 supposed already verified within main loop */ 293 294 /* finish bitStreams one by one */ 295 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); 296 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); 297 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); 298 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); 299 300 /* check */ 301 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 302 if (!endCheck) return ERROR(corruption_detected); } 303 304 /* decoded size */ 305 return dstSize; 306 } 307 } 308 309 310 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 311 const void *cSrc, 312 size_t cSrcSize, 313 const HUF_DTable *DTable); 314 #if DYNAMIC_BMI2 315 316 #define HUF_DGEN(fn) \ 317 \ 318 static size_t fn##_default( \ 319 void* dst, size_t dstSize, \ 320 const void* cSrc, size_t cSrcSize, \ 321 const HUF_DTable* DTable) \ 322 { \ 323 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 324 } \ 325 \ 326 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ 327 void* dst, size_t dstSize, \ 328 const void* cSrc, size_t cSrcSize, \ 329 const HUF_DTable* DTable) \ 330 { \ 331 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 332 } \ 333 \ 334 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 335 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 336 { \ 337 if (bmi2) { \ 338 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 339 } \ 340 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ 341 } 342 343 #else 344 345 #define HUF_DGEN(fn) \ 346 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 347 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 348 { \ 349 (void)bmi2; \ 350 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 351 } 352 353 #endif 354 355 HUF_DGEN(HUF_decompress1X1_usingDTable_internal) 356 HUF_DGEN(HUF_decompress4X1_usingDTable_internal) 357 358 359 360 size_t HUF_decompress1X1_usingDTable( 361 void* dst, size_t dstSize, 362 const void* cSrc, size_t cSrcSize, 363 const HUF_DTable* DTable) 364 { 365 DTableDesc dtd = HUF_getDTableDesc(DTable); 366 if (dtd.tableType != 0) return ERROR(GENERIC); 367 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 368 } 369 370 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 371 const void* cSrc, size_t cSrcSize, 372 void* workSpace, size_t wkspSize) 373 { 374 const BYTE* ip = (const BYTE*) cSrc; 375 376 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 377 if (HUF_isError(hSize)) return hSize; 378 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 379 ip += hSize; cSrcSize -= hSize; 380 381 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 382 } 383 384 385 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 386 const void* cSrc, size_t cSrcSize) 387 { 388 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 389 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 390 workSpace, sizeof(workSpace)); 391 } 392 393 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 394 { 395 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1), HUFD_ALLOC_TAG); 396 size_t ret; 397 398 if (!DTable) 399 return 0; 400 401 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1)); 402 403 DTable[0] = (U32)(HUF_TABLELOG_MAX-1) * 0x01000001; 404 405 ret = HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); 406 407 ExFreePool(DTable); 408 409 return ret; 410 } 411 412 size_t HUF_decompress4X1_usingDTable( 413 void* dst, size_t dstSize, 414 const void* cSrc, size_t cSrcSize, 415 const HUF_DTable* DTable) 416 { 417 DTableDesc dtd = HUF_getDTableDesc(DTable); 418 if (dtd.tableType != 0) return ERROR(GENERIC); 419 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 420 } 421 422 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 423 const void* cSrc, size_t cSrcSize, 424 void* workSpace, size_t wkspSize, int bmi2) 425 { 426 const BYTE* ip = (const BYTE*) cSrc; 427 428 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, 429 workSpace, wkspSize); 430 if (HUF_isError(hSize)) return hSize; 431 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 432 ip += hSize; cSrcSize -= hSize; 433 434 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 435 } 436 437 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 438 const void* cSrc, size_t cSrcSize, 439 void* workSpace, size_t wkspSize) 440 { 441 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 442 } 443 444 445 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 446 { 447 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 448 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 449 workSpace, sizeof(workSpace)); 450 } 451 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 452 { 453 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1), HUFD_ALLOC_TAG); 454 size_t ret; 455 456 if (!DTable) 457 return 0; 458 459 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX-1)); 460 461 DTable[0] = (U32)(HUF_TABLELOG_MAX-1) * 0x01000001; 462 463 ret = HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 464 465 ExFreePool(DTable); 466 467 return ret; 468 } 469 470 471 /* *************************/ 472 /* double-symbols decoding */ 473 /* *************************/ 474 475 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ 476 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; 477 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; 478 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; 479 480 481 /* HUF_fillDTableX2Level2() : 482 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ 483 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, 484 const U32* rankValOrigin, const int minWeight, 485 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, 486 U32 nbBitsBaseline, U16 baseSeq) 487 { 488 HUF_DEltX2 DElt; 489 U32 rankVal[HUF_TABLELOG_MAX + 1]; 490 491 /* get pre-calculated rankVal */ 492 memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 493 494 /* fill skipped values */ 495 if (minWeight>1) { 496 U32 i, skipSize = rankVal[minWeight]; 497 MEM_writeLE16(&(DElt.sequence), baseSeq); 498 DElt.nbBits = (BYTE)(consumed); 499 DElt.length = 1; 500 for (i = 0; i < skipSize; i++) 501 DTable[i] = DElt; 502 } 503 504 /* fill DTable */ 505 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */ 506 const U32 symbol = sortedSymbols[s].symbol; 507 const U32 weight = sortedSymbols[s].weight; 508 const U32 nbBits = nbBitsBaseline - weight; 509 const U32 length = 1 << (sizeLog-nbBits); 510 const U32 start = rankVal[weight]; 511 U32 i = start; 512 const U32 end = start + length; 513 514 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); 515 DElt.nbBits = (BYTE)(nbBits + consumed); 516 DElt.length = 2; 517 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */ 518 519 rankVal[weight] += length; 520 } } 521 } 522 523 524 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, 525 const sortedSymbol_t* sortedList, const U32 sortedListSize, 526 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, 527 const U32 nbBitsBaseline) 528 { 529 U32 rankVal[HUF_TABLELOG_MAX + 1]; 530 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ 531 const U32 minBits = nbBitsBaseline - maxWeight; 532 U32 s; 533 534 memcpy(rankVal, rankValOrigin, sizeof(rankVal)); 535 536 /* fill DTable */ 537 for (s=0; s<sortedListSize; s++) { 538 const U16 symbol = sortedList[s].symbol; 539 const U32 weight = sortedList[s].weight; 540 const U32 nbBits = nbBitsBaseline - weight; 541 const U32 start = rankVal[weight]; 542 const U32 length = 1 << (targetLog-nbBits); 543 544 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */ 545 U32 sortedRank; 546 int minWeight = nbBits + scaleLog; 547 if (minWeight < 1) minWeight = 1; 548 sortedRank = rankStart[minWeight]; 549 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, 550 rankValOrigin[nbBits], minWeight, 551 sortedList+sortedRank, sortedListSize-sortedRank, 552 nbBitsBaseline, symbol); 553 } else { 554 HUF_DEltX2 DElt; 555 MEM_writeLE16(&(DElt.sequence), symbol); 556 DElt.nbBits = (BYTE)(nbBits); 557 DElt.length = 1; 558 { U32 const end = start + length; 559 U32 u; 560 for (u = start; u < end; u++) DTable[u] = DElt; 561 } } 562 rankVal[weight] += length; 563 } 564 } 565 566 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, 567 const void* src, size_t srcSize, 568 void* workSpace, size_t wkspSize) 569 { 570 U32 tableLog, maxW, sizeOfSort, nbSymbols; 571 DTableDesc dtd = HUF_getDTableDesc(DTable); 572 U32 const maxTableLog = dtd.maxTableLog; 573 size_t iSize; 574 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ 575 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; 576 U32 *rankStart; 577 578 rankValCol_t* rankVal; 579 U32* rankStats; 580 U32* rankStart0; 581 sortedSymbol_t* sortedSymbol; 582 BYTE* weightList; 583 size_t spaceUsed32 = 0; 584 585 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); 586 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; 587 rankStats = (U32 *)workSpace + spaceUsed32; 588 spaceUsed32 += HUF_TABLELOG_MAX + 1; 589 rankStart0 = (U32 *)workSpace + spaceUsed32; 590 spaceUsed32 += HUF_TABLELOG_MAX + 2; 591 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); 592 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; 593 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); 594 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; 595 596 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); 597 598 rankStart = rankStart0 + 1; 599 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); 600 601 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ 602 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 603 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 604 605 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); 606 if (HUF_isError(iSize)) return iSize; 607 608 /* check result */ 609 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ 610 611 /* find maxWeight */ 612 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ 613 614 /* Get start index of each weight */ 615 { U32 w, nextRankStart = 0; 616 for (w=1; w<maxW+1; w++) { 617 U32 current = nextRankStart; 618 nextRankStart += rankStats[w]; 619 rankStart[w] = current; 620 } 621 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ 622 sizeOfSort = nextRankStart; 623 } 624 625 /* sort symbols by weight */ 626 { U32 s; 627 for (s=0; s<nbSymbols; s++) { 628 U32 const w = weightList[s]; 629 U32 const r = rankStart[w]++; 630 sortedSymbol[r].symbol = (BYTE)s; 631 sortedSymbol[r].weight = (BYTE)w; 632 } 633 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ 634 } 635 636 /* Build rankVal */ 637 { U32* const rankVal0 = rankVal[0]; 638 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ 639 U32 nextRankVal = 0; 640 U32 w; 641 for (w=1; w<maxW+1; w++) { 642 U32 current = nextRankVal; 643 nextRankVal += rankStats[w] << (w+rescale); 644 rankVal0[w] = current; 645 } } 646 { U32 const minBits = tableLog+1 - maxW; 647 U32 consumed; 648 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { 649 U32* const rankValPtr = rankVal[consumed]; 650 U32 w; 651 for (w = 1; w < maxW+1; w++) { 652 rankValPtr[w] = rankVal0[w] >> consumed; 653 } } } } 654 655 HUF_fillDTableX2(dt, maxTableLog, 656 sortedSymbol, sizeOfSort, 657 rankStart0, rankVal, maxW, 658 tableLog+1); 659 660 dtd.tableLog = (BYTE)maxTableLog; 661 dtd.tableType = 1; 662 memcpy(DTable, &dtd, sizeof(dtd)); 663 return iSize; 664 } 665 666 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) 667 { 668 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 669 return HUF_readDTableX2_wksp(DTable, src, srcSize, 670 workSpace, sizeof(workSpace)); 671 } 672 673 674 FORCE_INLINE_TEMPLATE U32 675 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 676 { 677 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 678 memcpy(op, dt+val, 2); 679 BIT_skipBits(DStream, dt[val].nbBits); 680 return dt[val].length; 681 } 682 683 FORCE_INLINE_TEMPLATE U32 684 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) 685 { 686 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ 687 memcpy(op, dt+val, 1); 688 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); 689 else { 690 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { 691 BIT_skipBits(DStream, dt[val].nbBits); 692 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) 693 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ 694 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); 695 } } 696 return 1; 697 } 698 699 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 700 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 701 702 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 703 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 704 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 705 706 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 707 if (MEM_64bits()) \ 708 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 709 710 HINT_INLINE size_t 711 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, 712 const HUF_DEltX2* const dt, const U32 dtLog) 713 { 714 BYTE* const pStart = p; 715 716 /* up to 8 symbols at a time */ 717 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { 718 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 719 HUF_DECODE_SYMBOLX2_1(p, bitDPtr); 720 HUF_DECODE_SYMBOLX2_2(p, bitDPtr); 721 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 722 } 723 724 /* closer to end : up to 2 symbols at a time */ 725 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) 726 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); 727 728 while (p <= pEnd-2) 729 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ 730 731 if (p < pEnd) 732 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); 733 734 return p-pStart; 735 } 736 737 FORCE_INLINE_TEMPLATE size_t 738 HUF_decompress1X2_usingDTable_internal_body( 739 void* dst, size_t dstSize, 740 const void* cSrc, size_t cSrcSize, 741 const HUF_DTable* DTable) 742 { 743 BIT_DStream_t bitD; 744 745 /* Init */ 746 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); 747 748 /* decode */ 749 { BYTE* const ostart = (BYTE*) dst; 750 BYTE* const oend = ostart + dstSize; 751 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 752 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 753 DTableDesc const dtd = HUF_getDTableDesc(DTable); 754 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); 755 } 756 757 /* check */ 758 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); 759 760 /* decoded size */ 761 return dstSize; 762 } 763 764 765 FORCE_INLINE_TEMPLATE size_t 766 HUF_decompress4X2_usingDTable_internal_body( 767 void* dst, size_t dstSize, 768 const void* cSrc, size_t cSrcSize, 769 const HUF_DTable* DTable) 770 { 771 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 772 773 { const BYTE* const istart = (const BYTE*) cSrc; 774 BYTE* const ostart = (BYTE*) dst; 775 BYTE* const oend = ostart + dstSize; 776 const void* const dtPtr = DTable+1; 777 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 778 779 /* Init */ 780 BIT_DStream_t bitD1; 781 BIT_DStream_t bitD2; 782 BIT_DStream_t bitD3; 783 BIT_DStream_t bitD4; 784 size_t const length1 = MEM_readLE16(istart); 785 size_t const length2 = MEM_readLE16(istart+2); 786 size_t const length3 = MEM_readLE16(istart+4); 787 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); 788 const BYTE* const istart1 = istart + 6; /* jumpTable */ 789 const BYTE* const istart2 = istart1 + length1; 790 const BYTE* const istart3 = istart2 + length2; 791 const BYTE* const istart4 = istart3 + length3; 792 size_t const segmentSize = (dstSize+3) / 4; 793 BYTE* const opStart2 = ostart + segmentSize; 794 BYTE* const opStart3 = opStart2 + segmentSize; 795 BYTE* const opStart4 = opStart3 + segmentSize; 796 BYTE* op1 = ostart; 797 BYTE* op2 = opStart2; 798 BYTE* op3 = opStart3; 799 BYTE* op4 = opStart4; 800 U32 endSignal; 801 DTableDesc const dtd = HUF_getDTableDesc(DTable); 802 U32 const dtLog = dtd.tableLog; 803 804 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 805 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 806 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 807 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); 808 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); 809 810 /* 16-32 symbols per loop (4-8 symbols per stream) */ 811 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 812 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { 813 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 814 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 815 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 816 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 817 HUF_DECODE_SYMBOLX2_1(op1, &bitD1); 818 HUF_DECODE_SYMBOLX2_1(op2, &bitD2); 819 HUF_DECODE_SYMBOLX2_1(op3, &bitD3); 820 HUF_DECODE_SYMBOLX2_1(op4, &bitD4); 821 HUF_DECODE_SYMBOLX2_2(op1, &bitD1); 822 HUF_DECODE_SYMBOLX2_2(op2, &bitD2); 823 HUF_DECODE_SYMBOLX2_2(op3, &bitD3); 824 HUF_DECODE_SYMBOLX2_2(op4, &bitD4); 825 HUF_DECODE_SYMBOLX2_0(op1, &bitD1); 826 HUF_DECODE_SYMBOLX2_0(op2, &bitD2); 827 HUF_DECODE_SYMBOLX2_0(op3, &bitD3); 828 HUF_DECODE_SYMBOLX2_0(op4, &bitD4); 829 830 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); 831 } 832 833 /* check corruption */ 834 if (op1 > opStart2) return ERROR(corruption_detected); 835 if (op2 > opStart3) return ERROR(corruption_detected); 836 if (op3 > opStart4) return ERROR(corruption_detected); 837 /* note : op4 already verified within main loop */ 838 839 /* finish bitStreams one by one */ 840 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); 841 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); 842 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); 843 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); 844 845 /* check */ 846 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); 847 if (!endCheck) return ERROR(corruption_detected); } 848 849 /* decoded size */ 850 return dstSize; 851 } 852 } 853 854 HUF_DGEN(HUF_decompress1X2_usingDTable_internal) 855 HUF_DGEN(HUF_decompress4X2_usingDTable_internal) 856 857 size_t HUF_decompress1X2_usingDTable( 858 void* dst, size_t dstSize, 859 const void* cSrc, size_t cSrcSize, 860 const HUF_DTable* DTable) 861 { 862 DTableDesc dtd = HUF_getDTableDesc(DTable); 863 if (dtd.tableType != 1) return ERROR(GENERIC); 864 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 865 } 866 867 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 868 const void* cSrc, size_t cSrcSize, 869 void* workSpace, size_t wkspSize) 870 { 871 const BYTE* ip = (const BYTE*) cSrc; 872 873 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, 874 workSpace, wkspSize); 875 if (HUF_isError(hSize)) return hSize; 876 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 877 ip += hSize; cSrcSize -= hSize; 878 879 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 880 } 881 882 883 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, 884 const void* cSrc, size_t cSrcSize) 885 { 886 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 887 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, 888 workSpace, sizeof(workSpace)); 889 } 890 891 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 892 { 893 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX), HUFD_ALLOC_TAG); 894 size_t ret; 895 896 if (!DTable) 897 return 0; 898 899 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX)); 900 901 DTable[0] = (U32)(HUF_TABLELOG_MAX) * 0x01000001; 902 903 ret = HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 904 905 ExFreePool(DTable); 906 907 return ret; 908 } 909 910 size_t HUF_decompress4X2_usingDTable( 911 void* dst, size_t dstSize, 912 const void* cSrc, size_t cSrcSize, 913 const HUF_DTable* DTable) 914 { 915 DTableDesc dtd = HUF_getDTableDesc(DTable); 916 if (dtd.tableType != 1) return ERROR(GENERIC); 917 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 918 } 919 920 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 921 const void* cSrc, size_t cSrcSize, 922 void* workSpace, size_t wkspSize, int bmi2) 923 { 924 const BYTE* ip = (const BYTE*) cSrc; 925 926 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, 927 workSpace, wkspSize); 928 if (HUF_isError(hSize)) return hSize; 929 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 930 ip += hSize; cSrcSize -= hSize; 931 932 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 933 } 934 935 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 936 const void* cSrc, size_t cSrcSize, 937 void* workSpace, size_t wkspSize) 938 { 939 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 940 } 941 942 943 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 944 const void* cSrc, size_t cSrcSize) 945 { 946 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 947 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 948 workSpace, sizeof(workSpace)); 949 } 950 951 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 952 { 953 HUF_DTable* DTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX), HUFD_ALLOC_TAG); 954 size_t ret; 955 956 if (!DTable) 957 return 0; 958 959 RtlZeroMemory(DTable, sizeof(HUF_DTable) * HUF_DTABLE_SIZE(HUF_TABLELOG_MAX)); 960 961 DTable[0] = (U32)(HUF_TABLELOG_MAX) * 0x01000001; 962 963 ret = HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); 964 965 ExFreePool(DTable); 966 967 return ret; 968 } 969 970 971 /* ***********************************/ 972 /* Universal decompression selectors */ 973 /* ***********************************/ 974 975 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 976 const void* cSrc, size_t cSrcSize, 977 const HUF_DTable* DTable) 978 { 979 DTableDesc const dtd = HUF_getDTableDesc(DTable); 980 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 981 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 982 } 983 984 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 985 const void* cSrc, size_t cSrcSize, 986 const HUF_DTable* DTable) 987 { 988 DTableDesc const dtd = HUF_getDTableDesc(DTable); 989 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 990 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 991 } 992 993 994 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; 995 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = 996 { 997 /* single, double, quad */ 998 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ 999 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ 1000 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ 1001 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ 1002 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ 1003 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ 1004 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ 1005 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ 1006 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ 1007 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ 1008 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ 1009 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ 1010 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ 1011 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ 1012 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ 1013 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ 1014 }; 1015 1016 /** HUF_selectDecoder() : 1017 * Tells which decoder is likely to decode faster, 1018 * based on a set of pre-computed metrics. 1019 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . 1020 * Assumption : 0 < dstSize <= 128 KB */ 1021 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) 1022 { 1023 assert(dstSize > 0); 1024 assert(dstSize <= 128*1024); 1025 /* decoder timing evaluation */ 1026 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ 1027 U32 const D256 = (U32)(dstSize >> 8); 1028 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); 1029 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); 1030 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ 1031 return DTime1 < DTime0; 1032 } } 1033 1034 1035 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 1036 1037 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1038 { 1039 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; 1040 1041 /* validation checks */ 1042 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1043 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1044 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1045 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1046 1047 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1048 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); 1049 } 1050 } 1051 1052 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1053 { 1054 /* validation checks */ 1055 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1056 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1057 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1058 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1059 1060 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1061 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : 1062 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; 1063 } 1064 } 1065 1066 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) 1067 { 1068 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1069 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1070 workSpace, sizeof(workSpace)); 1071 } 1072 1073 1074 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1075 size_t dstSize, const void* cSrc, 1076 size_t cSrcSize, void* workSpace, 1077 size_t wkspSize) 1078 { 1079 /* validation checks */ 1080 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1081 if (cSrcSize == 0) return ERROR(corruption_detected); 1082 1083 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1084 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): 1085 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1086 } 1087 } 1088 1089 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1090 const void* cSrc, size_t cSrcSize, 1091 void* workSpace, size_t wkspSize) 1092 { 1093 /* validation checks */ 1094 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1095 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ 1096 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ 1097 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ 1098 1099 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1100 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1101 cSrcSize, workSpace, wkspSize): 1102 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1103 cSrcSize, workSpace, wkspSize); 1104 } 1105 } 1106 1107 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, 1108 const void* cSrc, size_t cSrcSize) 1109 { 1110 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; 1111 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, 1112 workSpace, sizeof(workSpace)); 1113 } 1114 1115 1116 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1117 { 1118 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1119 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1120 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1121 } 1122 1123 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1124 { 1125 const BYTE* ip = (const BYTE*) cSrc; 1126 1127 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); 1128 if (HUF_isError(hSize)) return hSize; 1129 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1130 ip += hSize; cSrcSize -= hSize; 1131 1132 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1133 } 1134 1135 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1136 { 1137 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1138 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1139 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1140 } 1141 1142 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1143 { 1144 /* validation checks */ 1145 if (dstSize == 0) return ERROR(dstSize_tooSmall); 1146 if (cSrcSize == 0) return ERROR(corruption_detected); 1147 1148 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1149 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1150 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1151 } 1152 } 1153