1 /* ******************************************************************
2    Huffman encoder, part of New Generation Entropy library
3    Copyright (C) 2013-2016, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Compiler specifics
37 ****************************************************************/
38 #ifdef _MSC_VER    /* Visual Studio */
39 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
40 #endif
41 
42 
43 /* **************************************************************
44 *  Includes
45 ****************************************************************/
46 #include <string.h>     /* memcpy, memset */
47 #include <stdio.h>      /* printf (debug) */
48 #include "compiler.h"
49 #include "bitstream.h"
50 #include "hist.h"
51 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
52 #include "fse.h"        /* header compression */
53 #define HUF_STATIC_LINKING_ONLY
54 #include "huf.h"
55 #include "error_private.h"
56 #include <ntifs.h>
57 #include <ntddk.h>
58 
59 #define HUFC_ALLOC_TAG 0x63465548 // "HUFc"
60 
61 
62 /* **************************************************************
63 *  Error Management
64 ****************************************************************/
65 #define HUF_isError ERR_isError
66 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
67 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
68 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
69 
70 
71 /* **************************************************************
72 *  Utils
73 ****************************************************************/
74 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
75 {
76     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
77 }
78 
79 
80 /* *******************************************************
81 *  HUF : Huffman block compression
82 *********************************************************/
83 /* HUF_compressWeights() :
84  * Same as FSE_compress(), but dedicated to huff0's weights compression.
85  * The use case needs much less stack memory.
86  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
87  */
88 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
89 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
90 {
91     BYTE* const ostart = (BYTE*) dst;
92     BYTE* op = ostart;
93     BYTE* const oend = ostart + dstSize;
94 
95     U32 maxSymbolValue = HUF_TABLELOG_MAX;
96     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
97 
98     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
99     BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
100 
101     U32 count[HUF_TABLELOG_MAX+1];
102     S16 norm[HUF_TABLELOG_MAX+1];
103 
104     /* init conditions */
105     if (wtSize <= 1) return 0;  /* Not compressible */
106 
107     /* Scan input and build symbol stats */
108     {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
109         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
110         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
111     }
112 
113     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
114     CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
115 
116     /* Write table description header */
117     {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
118         op += hSize;
119     }
120 
121     /* Compress */
122     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
123     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
124         if (cSize == 0) return 0;   /* not enough space for compressed data */
125         op += cSize;
126     }
127 
128     return op-ostart;
129 }
130 
131 
132 struct HUF_CElt_s {
133   U16  val;
134   BYTE nbBits;
135 };   /* typedef'd to HUF_CElt within "huf.h" */
136 
137 /*! HUF_writeCTable() :
138     `CTable` : Huffman tree to save, using huf representation.
139     @return : size of saved CTable */
140 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
141                         const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
142 {
143     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
144     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
145     BYTE* op = (BYTE*)dst;
146     U32 n;
147 
148      /* check conditions */
149     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
150 
151     /* convert to weight */
152     bitsToWeight[0] = 0;
153     for (n=1; n<huffLog+1; n++)
154         bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
155     for (n=0; n<maxSymbolValue; n++)
156         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
157 
158     /* attempt weights compression by FSE */
159     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
160         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
161             op[0] = (BYTE)hSize;
162             return hSize+1;
163     }   }
164 
165     /* write raw values as 4-bits (max : 15) */
166     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
167     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
168     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
169     huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
170     for (n=0; n<maxSymbolValue; n+=2)
171         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
172     return ((maxSymbolValue+1)/2) + 1;
173 }
174 
175 
176 size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize)
177 {
178     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
179     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
180     U32 tableLog = 0;
181     U32 nbSymbols = 0;
182 
183     /* get symbol weights */
184     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
185 
186     /* check result */
187     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
188     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
189 
190     /* Prepare base value per rank */
191     {   U32 n, nextRankStart = 0;
192         for (n=1; n<=tableLog; n++) {
193             U32 current = nextRankStart;
194             nextRankStart += (rankVal[n] << (n-1));
195             rankVal[n] = current;
196     }   }
197 
198     /* fill nbBits */
199     {   U32 n; for (n=0; n<nbSymbols; n++) {
200             const U32 w = huffWeight[n];
201             CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
202     }   }
203 
204     /* fill val */
205     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
206         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
207         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
208         /* determine stating value per rank */
209         valPerRank[tableLog+1] = 0;   /* for w==0 */
210         {   U16 min = 0;
211             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
212                 valPerRank[n] = min;     /* get starting value within each rank */
213                 min += nbPerRank[n];
214                 min >>= 1;
215         }   }
216         /* assign value within rank, symbol order */
217         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
218     }
219 
220     *maxSymbolValuePtr = nbSymbols - 1;
221     return readSize;
222 }
223 
224 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
225 {
226     const HUF_CElt* table = (const HUF_CElt*)symbolTable;
227     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
228     return table[symbolValue].nbBits;
229 }
230 
231 
232 typedef struct nodeElt_s {
233     U32 count;
234     U16 parent;
235     BYTE byte;
236     BYTE nbBits;
237 } nodeElt;
238 
239 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
240 {
241     const U32 largestBits = huffNode[lastNonNull].nbBits;
242     if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
243 
244     /* there are several too large elements (at least >= 2) */
245     {   int totalCost = 0;
246         const U32 baseCost = 1 << (largestBits - maxNbBits);
247         U32 n = lastNonNull;
248 
249         while (huffNode[n].nbBits > maxNbBits) {
250             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
251             huffNode[n].nbBits = (BYTE)maxNbBits;
252             n --;
253         }  /* n stops at huffNode[n].nbBits <= maxNbBits */
254         while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
255 
256         /* renorm totalCost */
257         totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
258 
259         /* repay normalized cost */
260         {   U32 const noSymbol = 0xF0F0F0F0;
261             U32 rankLast[HUF_TABLELOG_MAX+2];
262             int pos;
263 
264             /* Get pos of last (smallest) symbol per rank */
265             memset(rankLast, 0xF0, sizeof(rankLast));
266             {   U32 currentNbBits = maxNbBits;
267                 for (pos=n ; pos >= 0; pos--) {
268                     if (huffNode[pos].nbBits >= currentNbBits) continue;
269                     currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
270                     rankLast[maxNbBits-currentNbBits] = pos;
271             }   }
272 
273             while (totalCost > 0) {
274                 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
275                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
276                     U32 highPos = rankLast[nBitsToDecrease];
277                     U32 lowPos = rankLast[nBitsToDecrease-1];
278                     if (highPos == noSymbol) continue;
279                     if (lowPos == noSymbol) break;
280                     {   U32 const highTotal = huffNode[highPos].count;
281                         U32 const lowTotal = 2 * huffNode[lowPos].count;
282                         if (highTotal <= lowTotal) break;
283                 }   }
284                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
285                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
286                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
287                     nBitsToDecrease ++;
288                 totalCost -= 1 << (nBitsToDecrease-1);
289                 if (rankLast[nBitsToDecrease-1] == noSymbol)
290                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
291                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
292                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
293                     rankLast[nBitsToDecrease] = noSymbol;
294                 else {
295                     rankLast[nBitsToDecrease]--;
296                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
297                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
298             }   }   /* while (totalCost > 0) */
299 
300             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
301                 if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
302                     while (huffNode[n].nbBits == maxNbBits) n--;
303                     huffNode[n+1].nbBits--;
304                     rankLast[1] = n+1;
305                     totalCost++;
306                     continue;
307                 }
308                 huffNode[ rankLast[1] + 1 ].nbBits--;
309                 rankLast[1]++;
310                 totalCost ++;
311     }   }   }   /* there are several too large elements (at least >= 2) */
312 
313     return maxNbBits;
314 }
315 
316 
317 typedef struct {
318     U32 base;
319     U32 current;
320 } rankPos;
321 
322 static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
323 {
324     rankPos rank[32];
325     U32 n;
326 
327     memset(rank, 0, sizeof(rank));
328     for (n=0; n<=maxSymbolValue; n++) {
329         U32 r = BIT_highbit32(count[n] + 1);
330         rank[r].base ++;
331     }
332     for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
333     for (n=0; n<32; n++) rank[n].current = rank[n].base;
334     for (n=0; n<=maxSymbolValue; n++) {
335         U32 const c = count[n];
336         U32 const r = BIT_highbit32(c+1) + 1;
337         U32 pos = rank[r].current++;
338         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
339             huffNode[pos] = huffNode[pos-1];
340             pos--;
341         }
342         huffNode[pos].count = c;
343         huffNode[pos].byte  = (BYTE)n;
344     }
345 }
346 
347 
348 /** HUF_buildCTable_wksp() :
349  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
350  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
351  */
352 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
353 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
354 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
355 {
356     nodeElt* const huffNode0 = (nodeElt*)workSpace;
357     nodeElt* const huffNode = huffNode0+1;
358     U32 n, nonNullRank;
359     int lowS, lowN;
360     U16 nodeNb = STARTNODE;
361     U32 nodeRoot;
362 
363     /* safety checks */
364     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
365     if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
366     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
367     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
368     memset(huffNode0, 0, sizeof(huffNodeTable));
369 
370     /* sort, decreasing order */
371     HUF_sort(huffNode, count, maxSymbolValue);
372 
373     /* init for parents */
374     nonNullRank = maxSymbolValue;
375     while(huffNode[nonNullRank].count == 0) nonNullRank--;
376     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
377     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
378     huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
379     nodeNb++; lowS-=2;
380     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
381     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
382 
383     /* create parents */
384     while (nodeNb <= nodeRoot) {
385         U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
386         U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
387         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
388         huffNode[n1].parent = huffNode[n2].parent = nodeNb;
389         nodeNb++;
390     }
391 
392     /* distribute weights (unlimited tree height) */
393     huffNode[nodeRoot].nbBits = 0;
394     for (n=nodeRoot-1; n>=STARTNODE; n--)
395         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
396     for (n=0; n<=nonNullRank; n++)
397         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
398 
399     /* enforce maxTableLog */
400     maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
401 
402     /* fill result into tree (val, nbBits) */
403     {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
404         U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
405         if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
406         for (n=0; n<=nonNullRank; n++)
407             nbPerRank[huffNode[n].nbBits]++;
408         /* determine stating value per rank */
409         {   U16 min = 0;
410             for (n=maxNbBits; n>0; n--) {
411                 valPerRank[n] = min;      /* get starting value within each rank */
412                 min += nbPerRank[n];
413                 min >>= 1;
414         }   }
415         for (n=0; n<=maxSymbolValue; n++)
416             tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
417         for (n=0; n<=maxSymbolValue; n++)
418             tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
419     }
420 
421     return maxNbBits;
422 }
423 
424 /** HUF_buildCTable() :
425  * @return : maxNbBits
426  *  Note : count is used before tree is written, so they can safely overlap
427  */
428 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
429 {
430     huffNodeTable* nodeTable = ExAllocatePoolWithTag(NonPagedPool, sizeof(huffNodeTable), HUFC_ALLOC_TAG);
431     size_t ret;
432 
433     if (!nodeTable)
434         return 0;
435 
436     ret = HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(huffNodeTable));
437 
438     ExFreePool(nodeTable);
439 
440     return ret;
441 }
442 
443 static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
444 {
445     size_t nbBits = 0;
446     int s;
447     for (s = 0; s <= (int)maxSymbolValue; ++s) {
448         nbBits += CTable[s].nbBits * count[s];
449     }
450     return nbBits >> 3;
451 }
452 
453 static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
454   int bad = 0;
455   int s;
456   for (s = 0; s <= (int)maxSymbolValue; ++s) {
457     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
458   }
459   return !bad;
460 }
461 
462 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
463 
464 FORCE_INLINE_TEMPLATE void
465 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
466 {
467     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
468 }
469 
470 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
471 
472 #define HUF_FLUSHBITS_1(stream) \
473     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
474 
475 #define HUF_FLUSHBITS_2(stream) \
476     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
477 
478 FORCE_INLINE_TEMPLATE size_t
479 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
480                                    const void* src, size_t srcSize,
481                                    const HUF_CElt* CTable)
482 {
483     const BYTE* ip = (const BYTE*) src;
484     BYTE* const ostart = (BYTE*)dst;
485     BYTE* const oend = ostart + dstSize;
486     BYTE* op = ostart;
487     size_t n;
488     BIT_CStream_t bitC;
489 
490     /* init */
491     if (dstSize < 8) return 0;   /* not enough space to compress */
492     { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
493       if (HUF_isError(initErr)) return 0; }
494 
495     n = srcSize & ~3;  /* join to mod 4 */
496     switch (srcSize & 3)
497     {
498         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
499                  HUF_FLUSHBITS_2(&bitC);
500 		 /* fall-through */
501         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
502                  HUF_FLUSHBITS_1(&bitC);
503 		 /* fall-through */
504         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
505                  HUF_FLUSHBITS(&bitC);
506 		 /* fall-through */
507         case 0 : /* fall-through */
508         default: break;
509     }
510 
511     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
512         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
513         HUF_FLUSHBITS_1(&bitC);
514         HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
515         HUF_FLUSHBITS_2(&bitC);
516         HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
517         HUF_FLUSHBITS_1(&bitC);
518         HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
519         HUF_FLUSHBITS(&bitC);
520     }
521 
522     return BIT_closeCStream(&bitC);
523 }
524 
525 #if DYNAMIC_BMI2
526 
527 static TARGET_ATTRIBUTE("bmi2") size_t
528 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
529                                    const void* src, size_t srcSize,
530                                    const HUF_CElt* CTable)
531 {
532     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
533 }
534 
535 static size_t
536 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
537                                       const void* src, size_t srcSize,
538                                       const HUF_CElt* CTable)
539 {
540     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
541 }
542 
543 static size_t
544 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
545                               const void* src, size_t srcSize,
546                               const HUF_CElt* CTable, const int bmi2)
547 {
548     if (bmi2) {
549         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
550     }
551     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
552 }
553 
554 #else
555 
556 static size_t
557 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
558                               const void* src, size_t srcSize,
559                               const HUF_CElt* CTable, const int bmi2)
560 {
561     (void)bmi2;
562     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
563 }
564 
565 #endif
566 
567 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
568 {
569     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
570 }
571 
572 
573 static size_t
574 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
575                               const void* src, size_t srcSize,
576                               const HUF_CElt* CTable, int bmi2)
577 {
578     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
579     const BYTE* ip = (const BYTE*) src;
580     const BYTE* const iend = ip + srcSize;
581     BYTE* const ostart = (BYTE*) dst;
582     BYTE* const oend = ostart + dstSize;
583     BYTE* op = ostart;
584 
585     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
586     if (srcSize < 12) return 0;   /* no saving possible : too small input */
587     op += 6;   /* jumpTable */
588 
589     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
590         if (cSize==0) return 0;
591         assert(cSize <= 65535);
592         MEM_writeLE16(ostart, (U16)cSize);
593         op += cSize;
594     }
595 
596     ip += segmentSize;
597     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
598         if (cSize==0) return 0;
599         assert(cSize <= 65535);
600         MEM_writeLE16(ostart+2, (U16)cSize);
601         op += cSize;
602     }
603 
604     ip += segmentSize;
605     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
606         if (cSize==0) return 0;
607         assert(cSize <= 65535);
608         MEM_writeLE16(ostart+4, (U16)cSize);
609         op += cSize;
610     }
611 
612     ip += segmentSize;
613     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
614         if (cSize==0) return 0;
615         op += cSize;
616     }
617 
618     return op-ostart;
619 }
620 
621 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
622 {
623     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
624 }
625 
626 
627 static size_t HUF_compressCTable_internal(
628                 BYTE* const ostart, BYTE* op, BYTE* const oend,
629                 const void* src, size_t srcSize,
630                 unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
631 {
632     size_t const cSize = singleStream ?
633                          HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
634                          HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
635     if (HUF_isError(cSize)) { return cSize; }
636     if (cSize==0) { return 0; }   /* uncompressible */
637     op += cSize;
638     /* check compressibility */
639     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
640     return op-ostart;
641 }
642 
643 typedef struct {
644     U32 count[HUF_SYMBOLVALUE_MAX + 1];
645     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
646     huffNodeTable nodeTable;
647 } HUF_compress_tables_t;
648 
649 /* HUF_compress_internal() :
650  * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
651 static size_t HUF_compress_internal (
652                 void* dst, size_t dstSize,
653                 const void* src, size_t srcSize,
654                 unsigned maxSymbolValue, unsigned huffLog,
655                 unsigned singleStream,
656                 void* workSpace, size_t wkspSize,
657                 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
658                 const int bmi2)
659 {
660     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
661     BYTE* const ostart = (BYTE*)dst;
662     BYTE* const oend = ostart + dstSize;
663     BYTE* op = ostart;
664 
665     /* checks & inits */
666     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
667     if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
668     if (!srcSize) return 0;  /* Uncompressed */
669     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
670     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
671     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
672     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
673     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
674     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
675 
676     /* Heuristic : If old table is valid, use it for small inputs */
677     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
678         return HUF_compressCTable_internal(ostart, op, oend,
679                                            src, srcSize,
680                                            singleStream, oldHufTable, bmi2);
681     }
682 
683     /* Scan input and build symbol stats */
684     {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
685         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
686         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
687     }
688 
689     /* Check validity of previous table */
690     if ( repeat
691       && *repeat == HUF_repeat_check
692       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
693         *repeat = HUF_repeat_none;
694     }
695     /* Heuristic : use existing table for small inputs */
696     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
697         return HUF_compressCTable_internal(ostart, op, oend,
698                                            src, srcSize,
699                                            singleStream, oldHufTable, bmi2);
700     }
701 
702     /* Build Huffman Tree */
703     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
704     {   CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
705                                                 maxSymbolValue, huffLog,
706                                                 table->nodeTable, sizeof(table->nodeTable)) );
707         huffLog = (U32)maxBits;
708         /* Zero unused symbols in CTable, so we can check it for validity */
709         memset(table->CTable + (maxSymbolValue + 1), 0,
710                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
711     }
712 
713     /* Write table description header */
714     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
715         /* Check if using previous huffman table is beneficial */
716         if (repeat && *repeat != HUF_repeat_none) {
717             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
718             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
719             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
720                 return HUF_compressCTable_internal(ostart, op, oend,
721                                                    src, srcSize,
722                                                    singleStream, oldHufTable, bmi2);
723         }   }
724 
725         /* Use the new huffman table */
726         if (hSize + 12ul >= srcSize) { return 0; }
727         op += hSize;
728         if (repeat) { *repeat = HUF_repeat_none; }
729         if (oldHufTable)
730             memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
731     }
732     return HUF_compressCTable_internal(ostart, op, oend,
733                                        src, srcSize,
734                                        singleStream, table->CTable, bmi2);
735 }
736 
737 
738 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
739                       const void* src, size_t srcSize,
740                       unsigned maxSymbolValue, unsigned huffLog,
741                       void* workSpace, size_t wkspSize)
742 {
743     return HUF_compress_internal(dst, dstSize, src, srcSize,
744                                  maxSymbolValue, huffLog, 1 /*single stream*/,
745                                  workSpace, wkspSize,
746                                  NULL, NULL, 0, 0 /*bmi2*/);
747 }
748 
749 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
750                       const void* src, size_t srcSize,
751                       unsigned maxSymbolValue, unsigned huffLog,
752                       void* workSpace, size_t wkspSize,
753                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
754 {
755     return HUF_compress_internal(dst, dstSize, src, srcSize,
756                                  maxSymbolValue, huffLog, 1 /*single stream*/,
757                                  workSpace, wkspSize, hufTable,
758                                  repeat, preferRepeat, bmi2);
759 }
760 
761 size_t HUF_compress1X (void* dst, size_t dstSize,
762                  const void* src, size_t srcSize,
763                  unsigned maxSymbolValue, unsigned huffLog)
764 {
765     unsigned* workSpace = ExAllocatePoolWithTag(NonPagedPool, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32, HUFC_ALLOC_TAG);
766     size_t ret;
767 
768     if (!workSpace)
769         return 0;
770 
771     ret = HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32);
772 
773     ExFreePool(workSpace);
774 
775     return ret;
776 }
777 
778 /* HUF_compress4X_repeat():
779  * compress input using 4 streams.
780  * provide workspace to generate compression tables */
781 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
782                       const void* src, size_t srcSize,
783                       unsigned maxSymbolValue, unsigned huffLog,
784                       void* workSpace, size_t wkspSize)
785 {
786     return HUF_compress_internal(dst, dstSize, src, srcSize,
787                                  maxSymbolValue, huffLog, 0 /*4 streams*/,
788                                  workSpace, wkspSize,
789                                  NULL, NULL, 0, 0 /*bmi2*/);
790 }
791 
792 /* HUF_compress4X_repeat():
793  * compress input using 4 streams.
794  * re-use an existing huffman compression table */
795 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
796                       const void* src, size_t srcSize,
797                       unsigned maxSymbolValue, unsigned huffLog,
798                       void* workSpace, size_t wkspSize,
799                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
800 {
801     return HUF_compress_internal(dst, dstSize, src, srcSize,
802                                  maxSymbolValue, huffLog, 0 /* 4 streams */,
803                                  workSpace, wkspSize,
804                                  hufTable, repeat, preferRepeat, bmi2);
805 }
806 
807 size_t HUF_compress2 (void* dst, size_t dstSize,
808                 const void* src, size_t srcSize,
809                 unsigned maxSymbolValue, unsigned huffLog)
810 {
811     unsigned* workSpace = ExAllocatePoolWithTag(NonPagedPool, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32, HUFC_ALLOC_TAG);
812     size_t ret;
813 
814     if (!workSpace)
815         return 0;
816 
817     ret = HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(unsigned) * HUF_WORKSPACE_SIZE_U32);
818 
819     ExFreePool(workSpace);
820 
821     return ret;
822 }
823 
824 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
825 {
826     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
827 }
828