1 /* ****************************************************************** 2 Huffman encoder, part of New Generation Entropy library 3 Copyright (C) 2013-2016, Yann Collet. 4 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 * Redistributions of source code must retain the above copyright 12 notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above 14 copyright notice, this list of conditions and the following disclaimer 15 in the documentation and/or other materials provided with the 16 distribution. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 You can contact the author at : 31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 32 - Public forum : https://groups.google.com/forum/#!forum/lz4c 33 ****************************************************************** */ 34 35 /* ************************************************************** 36 * Compiler specifics 37 ****************************************************************/ 38 #ifdef _MSC_VER /* Visual Studio */ 39 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 40 #endif 41 42 43 /* ************************************************************** 44 * Includes 45 ****************************************************************/ 46 #include <string.h> /* memcpy, memset */ 47 #include <stdio.h> /* printf (debug) */ 48 #include "compiler.h" 49 #include "bitstream.h" 50 #include "hist.h" 51 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 52 #include "fse.h" /* header compression */ 53 #define HUF_STATIC_LINKING_ONLY 54 #include "huf.h" 55 #include "error_private.h" 56 #include <ntifs.h> 57 #include <ntddk.h> 58 59 #define HUFC_ALLOC_TAG 0x63465548 // "HUFc" 60 61 62 /* ************************************************************** 63 * Error Management 64 ****************************************************************/ 65 #define HUF_isError ERR_isError 66 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 67 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e 68 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } 69 70 71 /* ************************************************************** 72 * Utils 73 ****************************************************************/ 74 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 75 { 76 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 77 } 78 79 80 /* ******************************************************* 81 * HUF : Huffman block compression 82 *********************************************************/ 83 /* HUF_compressWeights() : 84 * Same as FSE_compress(), but dedicated to huff0's weights compression. 85 * The use case needs much less stack memory. 86 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 87 */ 88 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 89 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) 90 { 91 BYTE* const ostart = (BYTE*) dst; 92 BYTE* op = ostart; 93 BYTE* const oend = ostart + dstSize; 94 95 U32 maxSymbolValue = HUF_TABLELOG_MAX; 96 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 97 98 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 99 BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; 100 101 U32 count[HUF_TABLELOG_MAX+1]; 102 S16 norm[HUF_TABLELOG_MAX+1]; 103 104 /* init conditions */ 105 if (wtSize <= 1) return 0; /* Not compressible */ 106 107 /* Scan input and build symbol stats */ 108 { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 109 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 110 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 111 } 112 113 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 114 CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); 115 116 /* Write table description header */ 117 { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); 118 op += hSize; 119 } 120 121 /* Compress */ 122 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); 123 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); 124 if (cSize == 0) return 0; /* not enough space for compressed data */ 125 op += cSize; 126 } 127 128 return op-ostart; 129 } 130 131 132 struct HUF_CElt_s { 133 U16 val; 134 BYTE nbBits; 135 }; /* typedef'd to HUF_CElt within "huf.h" */ 136 137 /*! HUF_writeCTable() : 138 `CTable` : Huffman tree to save, using huf representation. 139 @return : size of saved CTable */ 140 size_t HUF_writeCTable (void* dst, size_t maxDstSize, 141 const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) 142 { 143 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 144 BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 145 BYTE* op = (BYTE*)dst; 146 U32 n; 147 148 /* check conditions */ 149 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 150 151 /* convert to weight */ 152 bitsToWeight[0] = 0; 153 for (n=1; n<huffLog+1; n++) 154 bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 155 for (n=0; n<maxSymbolValue; n++) 156 huffWeight[n] = bitsToWeight[CTable[n].nbBits]; 157 158 /* attempt weights compression by FSE */ 159 { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); 160 if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 161 op[0] = (BYTE)hSize; 162 return hSize+1; 163 } } 164 165 /* write raw values as 4-bits (max : 15) */ 166 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 167 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 168 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 169 huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 170 for (n=0; n<maxSymbolValue; n+=2) 171 op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); 172 return ((maxSymbolValue+1)/2) + 1; 173 } 174 175 176 size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize) 177 { 178 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 179 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 180 U32 tableLog = 0; 181 U32 nbSymbols = 0; 182 183 /* get symbol weights */ 184 CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 185 186 /* check result */ 187 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 188 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 189 190 /* Prepare base value per rank */ 191 { U32 n, nextRankStart = 0; 192 for (n=1; n<=tableLog; n++) { 193 U32 current = nextRankStart; 194 nextRankStart += (rankVal[n] << (n-1)); 195 rankVal[n] = current; 196 } } 197 198 /* fill nbBits */ 199 { U32 n; for (n=0; n<nbSymbols; n++) { 200 const U32 w = huffWeight[n]; 201 CTable[n].nbBits = (BYTE)(tableLog + 1 - w); 202 } } 203 204 /* fill val */ 205 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 206 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 207 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } 208 /* determine stating value per rank */ 209 valPerRank[tableLog+1] = 0; /* for w==0 */ 210 { U16 min = 0; 211 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 212 valPerRank[n] = min; /* get starting value within each rank */ 213 min += nbPerRank[n]; 214 min >>= 1; 215 } } 216 /* assign value within rank, symbol order */ 217 { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } 218 } 219 220 *maxSymbolValuePtr = nbSymbols - 1; 221 return readSize; 222 } 223 224 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) 225 { 226 const HUF_CElt* table = (const HUF_CElt*)symbolTable; 227 assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 228 return table[symbolValue].nbBits; 229 } 230 231 232 typedef struct nodeElt_s { 233 U32 count; 234 U16 parent; 235 BYTE byte; 236 BYTE nbBits; 237 } nodeElt; 238 239 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 240 { 241 const U32 largestBits = huffNode[lastNonNull].nbBits; 242 if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ 243 244 /* there are several too large elements (at least >= 2) */ 245 { int totalCost = 0; 246 const U32 baseCost = 1 << (largestBits - maxNbBits); 247 U32 n = lastNonNull; 248 249 while (huffNode[n].nbBits > maxNbBits) { 250 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 251 huffNode[n].nbBits = (BYTE)maxNbBits; 252 n --; 253 } /* n stops at huffNode[n].nbBits <= maxNbBits */ 254 while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ 255 256 /* renorm totalCost */ 257 totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ 258 259 /* repay normalized cost */ 260 { U32 const noSymbol = 0xF0F0F0F0; 261 U32 rankLast[HUF_TABLELOG_MAX+2]; 262 int pos; 263 264 /* Get pos of last (smallest) symbol per rank */ 265 memset(rankLast, 0xF0, sizeof(rankLast)); 266 { U32 currentNbBits = maxNbBits; 267 for (pos=n ; pos >= 0; pos--) { 268 if (huffNode[pos].nbBits >= currentNbBits) continue; 269 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 270 rankLast[maxNbBits-currentNbBits] = pos; 271 } } 272 273 while (totalCost > 0) { 274 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; 275 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 276 U32 highPos = rankLast[nBitsToDecrease]; 277 U32 lowPos = rankLast[nBitsToDecrease-1]; 278 if (highPos == noSymbol) continue; 279 if (lowPos == noSymbol) break; 280 { U32 const highTotal = huffNode[highPos].count; 281 U32 const lowTotal = 2 * huffNode[lowPos].count; 282 if (highTotal <= lowTotal) break; 283 } } 284 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 285 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 286 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 287 nBitsToDecrease ++; 288 totalCost -= 1 << (nBitsToDecrease-1); 289 if (rankLast[nBitsToDecrease-1] == noSymbol) 290 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ 291 huffNode[rankLast[nBitsToDecrease]].nbBits ++; 292 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 293 rankLast[nBitsToDecrease] = noSymbol; 294 else { 295 rankLast[nBitsToDecrease]--; 296 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 297 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 298 } } /* while (totalCost > 0) */ 299 300 while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 301 if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ 302 while (huffNode[n].nbBits == maxNbBits) n--; 303 huffNode[n+1].nbBits--; 304 rankLast[1] = n+1; 305 totalCost++; 306 continue; 307 } 308 huffNode[ rankLast[1] + 1 ].nbBits--; 309 rankLast[1]++; 310 totalCost ++; 311 } } } /* there are several too large elements (at least >= 2) */ 312 313 return maxNbBits; 314 } 315 316 317 typedef struct { 318 U32 base; 319 U32 current; 320 } rankPos; 321 322 static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) 323 { 324 rankPos rank[32]; 325 U32 n; 326 327 memset(rank, 0, sizeof(rank)); 328 for (n=0; n<=maxSymbolValue; n++) { 329 U32 r = BIT_highbit32(count[n] + 1); 330 rank[r].base ++; 331 } 332 for (n=30; n>0; n--) rank[n-1].base += rank[n].base; 333 for (n=0; n<32; n++) rank[n].current = rank[n].base; 334 for (n=0; n<=maxSymbolValue; n++) { 335 U32 const c = count[n]; 336 U32 const r = BIT_highbit32(c+1) + 1; 337 U32 pos = rank[r].current++; 338 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { 339 huffNode[pos] = huffNode[pos-1]; 340 pos--; 341 } 342 huffNode[pos].count = c; 343 huffNode[pos].byte = (BYTE)n; 344 } 345 } 346 347 348 /** HUF_buildCTable_wksp() : 349 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 350 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. 351 */ 352 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 353 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 354 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 355 { 356 nodeElt* const huffNode0 = (nodeElt*)workSpace; 357 nodeElt* const huffNode = huffNode0+1; 358 U32 n, nonNullRank; 359 int lowS, lowN; 360 U16 nodeNb = STARTNODE; 361 U32 nodeRoot; 362 363 /* safety checks */ 364 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 365 if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); 366 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 367 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 368 memset(huffNode0, 0, sizeof(huffNodeTable)); 369 370 /* sort, decreasing order */ 371 HUF_sort(huffNode, count, maxSymbolValue); 372 373 /* init for parents */ 374 nonNullRank = maxSymbolValue; 375 while(huffNode[nonNullRank].count == 0) nonNullRank--; 376 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 377 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 378 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; 379 nodeNb++; lowS-=2; 380 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 381 huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 382 383 /* create parents */ 384 while (nodeNb <= nodeRoot) { 385 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 386 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 387 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 388 huffNode[n1].parent = huffNode[n2].parent = nodeNb; 389 nodeNb++; 390 } 391 392 /* distribute weights (unlimited tree height) */ 393 huffNode[nodeRoot].nbBits = 0; 394 for (n=nodeRoot-1; n>=STARTNODE; n--) 395 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 396 for (n=0; n<=nonNullRank; n++) 397 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 398 399 /* enforce maxTableLog */ 400 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); 401 402 /* fill result into tree (val, nbBits) */ 403 { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 404 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 405 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 406 for (n=0; n<=nonNullRank; n++) 407 nbPerRank[huffNode[n].nbBits]++; 408 /* determine stating value per rank */ 409 { U16 min = 0; 410 for (n=maxNbBits; n>0; n--) { 411 valPerRank[n] = min; /* get starting value within each rank */ 412 min += nbPerRank[n]; 413 min >>= 1; 414 } } 415 for (n=0; n<=maxSymbolValue; n++) 416 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ 417 for (n=0; n<=maxSymbolValue; n++) 418 tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ 419 } 420 421 return maxNbBits; 422 } 423 424 /** HUF_buildCTable() : 425 * @return : maxNbBits 426 * Note : count is used before tree is written, so they can safely overlap 427 */ 428 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) 429 { 430 huffNodeTable* nodeTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(huffNodeTable), HUFC_ALLOC_TAG); 431 size_t ret; 432 433 if (!nodeTable) 434 return 0; 435 436 ret = HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(huffNodeTable)); 437 438 ExFreePool(nodeTable); 439 440 return ret; 441 } 442 443 static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 444 { 445 size_t nbBits = 0; 446 int s; 447 for (s = 0; s <= (int)maxSymbolValue; ++s) { 448 nbBits += CTable[s].nbBits * count[s]; 449 } 450 return nbBits >> 3; 451 } 452 453 static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 454 int bad = 0; 455 int s; 456 for (s = 0; s <= (int)maxSymbolValue; ++s) { 457 bad |= (count[s] != 0) & (CTable[s].nbBits == 0); 458 } 459 return !bad; 460 } 461 462 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 463 464 FORCE_INLINE_TEMPLATE void 465 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) 466 { 467 BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); 468 } 469 470 #define HUF_FLUSHBITS(s) BIT_flushBits(s) 471 472 #define HUF_FLUSHBITS_1(stream) \ 473 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) 474 475 #define HUF_FLUSHBITS_2(stream) \ 476 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) 477 478 FORCE_INLINE_TEMPLATE size_t 479 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 480 const void* src, size_t srcSize, 481 const HUF_CElt* CTable) 482 { 483 const BYTE* ip = (const BYTE*) src; 484 BYTE* const ostart = (BYTE*)dst; 485 BYTE* const oend = ostart + dstSize; 486 BYTE* op = ostart; 487 size_t n; 488 BIT_CStream_t bitC; 489 490 /* init */ 491 if (dstSize < 8) return 0; /* not enough space to compress */ 492 { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); 493 if (HUF_isError(initErr)) return 0; } 494 495 n = srcSize & ~3; /* join to mod 4 */ 496 switch (srcSize & 3) 497 { 498 case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); 499 HUF_FLUSHBITS_2(&bitC); 500 /* fall-through */ 501 case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); 502 HUF_FLUSHBITS_1(&bitC); 503 /* fall-through */ 504 case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); 505 HUF_FLUSHBITS(&bitC); 506 /* fall-through */ 507 case 0 : /* fall-through */ 508 default: break; 509 } 510 511 for (; n>0; n-=4) { /* note : n&3==0 at this stage */ 512 HUF_encodeSymbol(&bitC, ip[n- 1], CTable); 513 HUF_FLUSHBITS_1(&bitC); 514 HUF_encodeSymbol(&bitC, ip[n- 2], CTable); 515 HUF_FLUSHBITS_2(&bitC); 516 HUF_encodeSymbol(&bitC, ip[n- 3], CTable); 517 HUF_FLUSHBITS_1(&bitC); 518 HUF_encodeSymbol(&bitC, ip[n- 4], CTable); 519 HUF_FLUSHBITS(&bitC); 520 } 521 522 return BIT_closeCStream(&bitC); 523 } 524 525 #if DYNAMIC_BMI2 526 527 static TARGET_ATTRIBUTE("bmi2") size_t 528 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 529 const void* src, size_t srcSize, 530 const HUF_CElt* CTable) 531 { 532 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 533 } 534 535 static size_t 536 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 537 const void* src, size_t srcSize, 538 const HUF_CElt* CTable) 539 { 540 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 541 } 542 543 static size_t 544 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 545 const void* src, size_t srcSize, 546 const HUF_CElt* CTable, const int bmi2) 547 { 548 if (bmi2) { 549 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 550 } 551 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 552 } 553 554 #else 555 556 static size_t 557 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 558 const void* src, size_t srcSize, 559 const HUF_CElt* CTable, const int bmi2) 560 { 561 (void)bmi2; 562 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 563 } 564 565 #endif 566 567 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 568 { 569 return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 570 } 571 572 573 static size_t 574 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 575 const void* src, size_t srcSize, 576 const HUF_CElt* CTable, int bmi2) 577 { 578 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 579 const BYTE* ip = (const BYTE*) src; 580 const BYTE* const iend = ip + srcSize; 581 BYTE* const ostart = (BYTE*) dst; 582 BYTE* const oend = ostart + dstSize; 583 BYTE* op = ostart; 584 585 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 586 if (srcSize < 12) return 0; /* no saving possible : too small input */ 587 op += 6; /* jumpTable */ 588 589 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 590 if (cSize==0) return 0; 591 assert(cSize <= 65535); 592 MEM_writeLE16(ostart, (U16)cSize); 593 op += cSize; 594 } 595 596 ip += segmentSize; 597 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 598 if (cSize==0) return 0; 599 assert(cSize <= 65535); 600 MEM_writeLE16(ostart+2, (U16)cSize); 601 op += cSize; 602 } 603 604 ip += segmentSize; 605 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); 606 if (cSize==0) return 0; 607 assert(cSize <= 65535); 608 MEM_writeLE16(ostart+4, (U16)cSize); 609 op += cSize; 610 } 611 612 ip += segmentSize; 613 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); 614 if (cSize==0) return 0; 615 op += cSize; 616 } 617 618 return op-ostart; 619 } 620 621 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 622 { 623 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 624 } 625 626 627 static size_t HUF_compressCTable_internal( 628 BYTE* const ostart, BYTE* op, BYTE* const oend, 629 const void* src, size_t srcSize, 630 unsigned singleStream, const HUF_CElt* CTable, const int bmi2) 631 { 632 size_t const cSize = singleStream ? 633 HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : 634 HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); 635 if (HUF_isError(cSize)) { return cSize; } 636 if (cSize==0) { return 0; } /* uncompressible */ 637 op += cSize; 638 /* check compressibility */ 639 if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 640 return op-ostart; 641 } 642 643 typedef struct { 644 U32 count[HUF_SYMBOLVALUE_MAX + 1]; 645 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; 646 huffNodeTable nodeTable; 647 } HUF_compress_tables_t; 648 649 /* HUF_compress_internal() : 650 * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ 651 static size_t HUF_compress_internal ( 652 void* dst, size_t dstSize, 653 const void* src, size_t srcSize, 654 unsigned maxSymbolValue, unsigned huffLog, 655 unsigned singleStream, 656 void* workSpace, size_t wkspSize, 657 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 658 const int bmi2) 659 { 660 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; 661 BYTE* const ostart = (BYTE*)dst; 662 BYTE* const oend = ostart + dstSize; 663 BYTE* op = ostart; 664 665 /* checks & inits */ 666 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ 667 if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); 668 if (!srcSize) return 0; /* Uncompressed */ 669 if (!dstSize) return 0; /* cannot fit anything within dst budget */ 670 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 671 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 672 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 673 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 674 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 675 676 /* Heuristic : If old table is valid, use it for small inputs */ 677 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 678 return HUF_compressCTable_internal(ostart, op, oend, 679 src, srcSize, 680 singleStream, oldHufTable, bmi2); 681 } 682 683 /* Scan input and build symbol stats */ 684 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); 685 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 686 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 687 } 688 689 /* Check validity of previous table */ 690 if ( repeat 691 && *repeat == HUF_repeat_check 692 && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 693 *repeat = HUF_repeat_none; 694 } 695 /* Heuristic : use existing table for small inputs */ 696 if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 697 return HUF_compressCTable_internal(ostart, op, oend, 698 src, srcSize, 699 singleStream, oldHufTable, bmi2); 700 } 701 702 /* Build Huffman Tree */ 703 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 704 { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, 705 maxSymbolValue, huffLog, 706 table->nodeTable, sizeof(table->nodeTable)) ); 707 huffLog = (U32)maxBits; 708 /* Zero unused symbols in CTable, so we can check it for validity */ 709 memset(table->CTable + (maxSymbolValue + 1), 0, 710 sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); 711 } 712 713 /* Write table description header */ 714 { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); 715 /* Check if using previous huffman table is beneficial */ 716 if (repeat && *repeat != HUF_repeat_none) { 717 size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 718 size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 719 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 720 return HUF_compressCTable_internal(ostart, op, oend, 721 src, srcSize, 722 singleStream, oldHufTable, bmi2); 723 } } 724 725 /* Use the new huffman table */ 726 if (hSize + 12ul >= srcSize) { return 0; } 727 op += hSize; 728 if (repeat) { *repeat = HUF_repeat_none; } 729 if (oldHufTable) 730 memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 731 } 732 return HUF_compressCTable_internal(ostart, op, oend, 733 src, srcSize, 734 singleStream, table->CTable, bmi2); 735 } 736 737 738 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 739 const void* src, size_t srcSize, 740 unsigned maxSymbolValue, unsigned huffLog, 741 void* workSpace, size_t wkspSize) 742 { 743 return HUF_compress_internal(dst, dstSize, src, srcSize, 744 maxSymbolValue, huffLog, 1 /*single stream*/, 745 workSpace, wkspSize, 746 NULL, NULL, 0, 0 /*bmi2*/); 747 } 748 749 size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 750 const void* src, size_t srcSize, 751 unsigned maxSymbolValue, unsigned huffLog, 752 void* workSpace, size_t wkspSize, 753 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 754 { 755 return HUF_compress_internal(dst, dstSize, src, srcSize, 756 maxSymbolValue, huffLog, 1 /*single stream*/, 757 workSpace, wkspSize, hufTable, 758 repeat, preferRepeat, bmi2); 759 } 760 761 size_t HUF_compress1X (void* dst, size_t dstSize, 762 const void* src, size_t srcSize, 763 unsigned maxSymbolValue, unsigned huffLog) 764 { 765 unsigned* workSpace = ExAllocatePoolWithTag(NonPagedPool, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32, HUFC_ALLOC_TAG); 766 size_t ret; 767 768 if (!workSpace) 769 return 0; 770 771 ret = HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32); 772 773 ExFreePool(workSpace); 774 775 return ret; 776 } 777 778 /* HUF_compress4X_repeat(): 779 * compress input using 4 streams. 780 * provide workspace to generate compression tables */ 781 size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 782 const void* src, size_t srcSize, 783 unsigned maxSymbolValue, unsigned huffLog, 784 void* workSpace, size_t wkspSize) 785 { 786 return HUF_compress_internal(dst, dstSize, src, srcSize, 787 maxSymbolValue, huffLog, 0 /*4 streams*/, 788 workSpace, wkspSize, 789 NULL, NULL, 0, 0 /*bmi2*/); 790 } 791 792 /* HUF_compress4X_repeat(): 793 * compress input using 4 streams. 794 * re-use an existing huffman compression table */ 795 size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 796 const void* src, size_t srcSize, 797 unsigned maxSymbolValue, unsigned huffLog, 798 void* workSpace, size_t wkspSize, 799 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) 800 { 801 return HUF_compress_internal(dst, dstSize, src, srcSize, 802 maxSymbolValue, huffLog, 0 /* 4 streams */, 803 workSpace, wkspSize, 804 hufTable, repeat, preferRepeat, bmi2); 805 } 806 807 size_t HUF_compress2 (void* dst, size_t dstSize, 808 const void* src, size_t srcSize, 809 unsigned maxSymbolValue, unsigned huffLog) 810 { 811 unsigned* workSpace = ExAllocatePoolWithTag(NonPagedPool, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32, HUFC_ALLOC_TAG); 812 size_t ret; 813 814 if (!workSpace) 815 return 0; 816 817 ret = HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32); 818 819 ExFreePool(workSpace); 820 821 return ret; 822 } 823 824 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) 825 { 826 return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); 827 } 828